Blame - jni/libpcre/sources/132html - jami-client-android

blob: ccfbfd91f3ef6c13a6ee16e181bd69fc9c233bd1 [file] [log] [blame]

Tristan Matthews	0461646	2013-11-14 16:09:34 -0500	[diff] [blame]	1	#! /usr/bin/perl -w
				2
				3	# Script to turn PCRE man pages into HTML
				4
				5
				6	# Subroutine to handle font changes and other escapes
				7
				8	sub do_line {
				9	my($s) = $_[0];
				10
				11	$s =~ s/</</g; # Deal with < and >
				12	$s =~ s/>/>/g;
				13	$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
				14	$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
				15	$s =~ s"\\e"\\"g;
				16	$s =~ s/(?<=Copyright )$c$/©/g;
				17	$s;
				18	}
				19
				20	# Subroutine to ensure not in a paragraph
				21
				22	sub end_para {
				23	if ($inpara)
				24	{
				25	print TEMP "</PRE>\n" if ($inpre);
				26	print TEMP "</P>\n";
				27	}
				28	$inpara = $inpre = 0;
				29	$wrotetext = 0;
				30	}
				31
				32	# Subroutine to start a new paragraph
				33
				34	sub new_para {
				35	&end_para();
				36	print TEMP "<P>\n";
				37	$inpara = 1;
				38	}
				39
				40
				41	# Main program
				42
				43	$innf = 0;
				44	$inpara = 0;
				45	$inpre = 0;
				46	$wrotetext = 0;
				47	$toc = 0;
				48	$ref = 1;
				49
				50	while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
				51	{
				52	$toc = 1 if $ARGV[0] eq "-toc";
				53	shift;
				54	}
				55
				56	# Initial output to STDOUT
				57
				58	print <<End ;
				59	<html>
				60	<head>
				61	<title>$ARGV[0] specification</title>
				62	</head>
				63	<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
				64	<h1>$ARGV[0] man page</h1>
				65	<p>
				66	Return to the <a href="index.html">PCRE index page</a>.
				67	</p>
				68	<p>
				69	This page is part of the PCRE HTML documentation. It was generated automatically
				70	from the original man page. If there is any nonsense in it, please consult the
				71	man page, in case the conversion went wrong.
				72	<br>
				73	End
				74
				75	print "<ul>\n" if ($toc);
				76
				77	open(TEMP, ">/tmp/$$") \|\| die "Can't open /tmp/$$ for output\n";
				78
				79	while (<STDIN>)
				80	{
				81	# Handle lines beginning with a dot
				82
				83	if (/^\./)
				84	{
				85	# Some of the PCRE man pages used to contain instances of .br. However,
				86	# they should have all been removed because they cause trouble in some
				87	# (other) automated systems that translate man pages to HTML. Complain if
				88	# we find .br or .in (another macro that is deprecated).
				89
				90	if (/^\.br/ \|\| /^\.in/)
				91	{
				92	print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
				93	print STDERR "*** $_\n";
				94	die "*** Processing abandoned\n";
				95	}
				96
				97	# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
				98
				99	elsif (/^\.nf/)
				100	{
				101	$innf = 1;
				102	}
				103
				104	elsif (/^\.fi/)
				105	{
				106	$innf = 0;
				107	}
				108
				109	# Handling .sp is subtle. If it is inside a literal section, do nothing if
				110	# the next line is a non literal text line; similarly, if not inside a
				111	# literal section, do nothing if a literal follows. The point being that
				112	# the <pre> and </pre> that delimit literal sections will do the spacing.
				113	# Always skip if no previous output.
				114
				115	elsif (/^\.sp/)
				116	{
				117	if ($wrotetext)
				118	{
				119	$_ = <STDIN>;
				120	if ($inpre)
				121	{
				122	print TEMP "\n" if (/^[\s.]/);
				123	}
				124	else
				125	{
				126	print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
				127	}
				128	redo; # Now process the lookahead line we just read
				129	}
				130	}
				131	elsif (/^\.TP/ \|\| /^\.PP/ \|\| /^\.P/)
				132	{
				133	&new_para();
				134	}
				135	elsif (/^\.SH\s("?)(.)\1/)
				136	{
				137	# Ignore the NAME section
				138	if ($2 =~ /^NAME\b/)
				139	{
				140	<STDIN>;
				141	next;
				142	}
				143
				144	&end_para();
				145	my($title) = &do_line($2);
				146	if ($toc)
				147	{
				148	printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
				149	$ref, $ref);
				150	printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
				151	$ref, $ref);
				152	$ref++;
				153	}
				154	else
				155	{
				156	print TEMP "<br><b>\n$title\n</b><br>\n";
				157	}
				158	}
				159	elsif (/^\.SS\s("?)(.)\1/)
				160	{
				161	&end_para();
				162	my($title) = &do_line($2);
				163	print TEMP "<br><b>\n$title\n</b><br>\n";
				164	}
				165	elsif (/^\.B\s(.)/)
				166	{
				167	&new_para() if (!$inpara);
				168	$_ = &do_line($1);
				169	s/"(.*?)"/$1/g;
				170	print TEMP "<b>$_</b>\n";
				171	$wrotetext = 1;
				172	}
				173	elsif (/^\.I\s(.)/)
				174	{
				175	&new_para() if (!$inpara);
				176	$_ = &do_line($1);
				177	s/"(.*?)"/$1/g;
				178	print TEMP "<i>$_</i>\n";
				179	$wrotetext = 1;
				180	}
				181
				182	# A comment that starts "HREF" takes the next line as a name that
				183	# is turned into a hyperlink, using the text given, which might be
				184	# in a special font. If it ends in () or (digits) or punctuation, they
				185	# aren't part of the link.
				186
				187	elsif (/^\.\\"\s*HREF/)
				188	{
				189	$_=<STDIN>;
				190	chomp;
				191	$_ = &do_line($_);
				192	$_ =~ s/\s+$//;
				193	$_ =~ /^(?:<.>)?([^<(]+)(?:)?(?:<\/.>)?(?:$\d+$)?[.,;:]?$/;
				194	print TEMP "<a href=\"$1.html\">$_</a>\n";
				195	}
				196
				197	# A comment that starts "HTML" inserts literal HTML
				198
				199	elsif (/^\.\\"\sHTML\s(.*)/)
				200	{
				201	print TEMP $1;
				202	}
				203
				204	# A comment that starts < inserts that HTML at the end of the
				205	# next input line - so as not to get a newline between them.
				206
				207	elsif (/^\.\\"\s(<.>)/)
				208	{
				209	my($markup) = $1;
				210	$_=<STDIN>;
				211	chomp;
				212	$_ = &do_line($_);
				213	$_ =~ s/\s+$//;
				214	print TEMP "$_$markup\n";
				215	}
				216
				217	# A comment that starts JOIN joins the next two lines together, with one
				218	# space between them. Then that line is processed. This is used in some
				219	# displays where two lines are needed for the "man" version. JOINSH works
				220	# the same, except that it assumes this is a shell command, so removes
				221	# continuation backslashes.
				222
				223	elsif (/^\.\\"\s*JOIN(SH)?/)
				224	{
				225	my($one,$two);
				226	$one = <STDIN>;
				227	$two = <STDIN>;
				228	$one =~ s/\s\\e\s$// if (defined($1));
				229	chomp($one);
				230	$two =~ s/^\s+//;
				231	$_ = "$one $two";
				232	redo; # Process the joined lines
				233	}
				234
				235	# .EX/.EE are used in the pcredemo page to bracket the entire program,
				236	# which is unmodified except for turning backslash into "\e".
				237
				238	elsif (/^\.EX\s*$/)
				239	{
				240	print TEMP "<PRE>\n";
				241	while (<STDIN>)
				242	{
				243	last if /^\.EE\s*$/;
				244	s/\\e/\\/g;
				245	s/&/&/g;
				246	s/</</g;
				247	s/>/>/g;
				248	print TEMP;
				249	}
				250	}
				251
				252	# Ignore anything not recognized
				253
				254	next;
				255	}
				256
				257	# Line does not begin with a dot. Replace blank lines with new paragraphs
				258
				259	if (/^\s*$/)
				260	{
				261	&end_para() if ($wrotetext);
				262	next;
				263	}
				264
				265	# Convert fonts changes and output an ordinary line. Ensure that indented
				266	# lines are marked as literal.
				267
				268	$_ = &do_line($_);
				269	&new_para() if (!$inpara);
				270
				271	if (/^\s/)
				272	{
				273	if (!$inpre)
				274	{
				275	print TEMP "<pre>\n";
				276	$inpre = 1;
				277	}
				278	}
				279	elsif ($inpre)
				280	{
				281	print TEMP "</pre>\n";
				282	$inpre = 0;
				283	}
				284
				285	# Add <br> to the end of a non-literal line if we are within .nf/.fi
				286
				287	$_ .= "<br>\n" if (!$inpre && $innf);
				288
				289	print TEMP;
				290	$wrotetext = 1;
				291	}
				292
				293	# The TOC, if present, will have been written - terminate it
				294
				295	print "</ul>\n" if ($toc);
				296
				297	# Copy the remainder to the standard output
				298
				299	close(TEMP);
				300	open(TEMP, "/tmp/$$") \|\| die "Can't open /tmp/$$ for input\n";
				301
				302	print while (<TEMP>);
				303
				304	print <<End ;
				305	<p>
				306	Return to the <a href="index.html">PCRE index page</a>.
				307	</p>
				308	End
				309
				310	close(TEMP);
				311	unlink("/tmp/$$");
				312
				313	# End