#!/usr/local/bin/perl
# convert mail addresses and anchor-like constructs into URLs
require "getopts.pl";
&Getopts ("bh:pt:");

if ((!$opt_t && !$opt_h) || ($opt_p && $opt_b)) {
    print "usage:
htmlify -h \"your header\" -t \"your title\" [ -p | -b ] <file>
        -p: add <PRE> and </PRE> tags
        -b: append a <BR> tag to each line
";	
    exit;
} else {
    $title = $opt_t;
    $header = $opt_h;
}


print "<HTML>
<HEAD>
<TITLE>$title</TITLE>
</HEAD>
<BODY>
<H3>$header</H3>
";
print "<PRE>\n" if ($opt_p);

# slurp in whole file into a string
undef $/;
$string = (<>);

# convert mail addresses
# this regexp isolates mail addresses enclosed in <> but handles
# mail adresses separated by commas as one big address. It works,
# however, with Netscape 1.0N. Mosaic 2.? does not handle such 
# mailto URLs properly. To avoid this, care for ", " pairs instead 
# of single commas. Reason: The "\b" expression does not interpret
# the sequence "\S,\S" as a two separate words, whereas "\S, \S" 
# is interpreted that way. Urk. 
$string =~ s#\b(\S+@\S+)\b#<A HREF=\"mailto:$1\">$1</A>#g;

# convert other anchors
$string =~ s#\b(http://\S+)\b#<A HREF=\"$1\">$1</A>#g;
$string =~ s#\b(ftp://\S+)\b#<A HREF=\"$1\">$1</A>#g;
$string =~ s#\b(gopher://\S+)\b#<A HREF=\"$1\">$1</A>#g;
$string =~ s#\b(wais://\S+)\b#<A HREF=\"$1\">$1</A>#g;

# FIXME: what about mailto: and news: URLs that are already in the 
# text literally?

# append <BR> to each line if requested
$string =~ s#\n#<BR>\n#g if ($opt_b);

# spit out the result
print $string;

print "</PRE>" if $opt_p;
print "</BODY> </HTML>\n";

