#!/usr/bin/perl -w
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0;

$version = '$Id: bib2xhtml.pl,v 1.1 2006/10/11 22:56:31 tgkolda Exp $';

#
# Convert from bibtex to XHTML.
#
# (C) Copyright 1995, 1996 David Hull.
# (David Hull / hull@cs.uiuc.edu / http://www.uiuc.edu/ph/www/dlhull)
#
# (C) Copyright 2002-2004 Diomidis Spinellis
# http://www.spinellis.gr
#
# This program is free software.  You can redistribute it and/or modify
# it under the terms of the GNU General Public License.  See the
# files README and COPYING for details.
#
# This source code contains UTF-8 characters.  You might want to use
# an appropriate editor, if you want to view/modify the LaTeX to Unicode
# substitution commands.
#

use Getopt::Std;
use Math::BigFloat;
require 'ctime.pl';

# Label styles.
$label_styles{'plain'} = 	$LABEL_PLAIN = 		1;
$label_styles{'numbered'} = 	$LABEL_NUMBERED = 	2;
$label_styles{'default'} = 	$LABEL_DEFAULT = 	3;

$list_start[$LABEL_PLAIN] = "ul";
$list_end[$LABEL_PLAIN] = "/ul";
$list_start[$LABEL_NUMBERED] = 'dl compact="1"';
$list_end[$LABEL_NUMBERED] = "/dl";
$list_start[$LABEL_DEFAULT] = "dl";
$list_end[$LABEL_DEFAULT] = "/dl";

@tmpfiles = ();

sub usage {
    $program = $0;
    $program =~ s+^.*/++;
    print STDERR <<_EOF_;
usage: $program [-a] [-b bibtex-options] [-c] [-d delim] [-h heading] [-i]
                [-k] [-m macro file] [-r] [-s style] [-t] [-u] [-v]
		sourcefile [htmlfile]
    -a  Write abstract to htmlfile.
    -b bibtex-options
	Options to pass to bibtex.
    -c	Sort chronologically, by year and then by author.
    -d delim
	Specify bibliography delimiter.
    -h heading
	String to use instead of default title when creating a new htmlfile.
	If updating an existing htmlfile, this option is ignored.
    -i Use included citations
    -k  In labeled styles append to the label of each entry its bibtex key.
    -m macro file
	Specify an additional macro file.
    -r	Sort in reverse chronological order.
    -s style
	Control style of bibliography:
	(empty, plain, alpha, named, unsort, or unsortlist).
    -t  Write timestamp to htmlfile.
    -u  Output a Unicode-coded document.
    -v  Report the version number.
_EOF_
    exit(1);
}

# Return the command needed to open a (perhaps compressed) file,
# as well as the type of compression.
sub openCommand {
    local($path) = @_;
    local($cmd);
    local($cmp);

command: {
	($path =~ m/\.Z$/ &&
	  ($cmd = "uncompress -c $path |", $cmp = "Compressed", last command));
	($path =~ m/\.g?z$/ &&
	  ($cmd = "gzip -d -c $path |", $cmp = "Gzipped", last command));
	($cmd = "<$path", $cmp = "", last command);
    }

    ($cmd, $cmp);
}

@paperTypes = ("PostScript", "PDF", "DVI", "DOI");

sub PostScriptPageCount {
    local($cmd) = @_;
    local($pageCount);

    #print "in PostScriptPageCount $cmd\n";

    open(FILE, $cmd) || (warn "error opening $cmd: $!\n", return undef);

    local($_);
    local($/) = "\n";

line:
    while (<FILE>) {
	last line if m/^%%EndComments/;
	if (m/^%%Pages:\s*(\d+)/) {
	    $pageCount = $1 if ($1 > 0);
	    last line;
	}
    }
    close(FILE);

    $pageCount;
}

# Don't know how to determine page count of PDF file.
sub PDFPageCount { undef; }

sub DVIPageCount {
    local($cmd) = @_;
    local($pageCount);

    #print "in DVIPageCount $cmd\n";

    if ($cmd =~ m/^</) {
	# Simple file.
	$cmd = "dviselect : $cmd >/dev/null";
    } else {
	# Compressed file.
	$cmd .= "dviselect : >/dev/null";
    }

    # Look at dviselect's stderr.
    open(DVISELECT, "-|") || (open(STDERR, ">&STDOUT"), exec $cmd);

    local($_);
    local($/) = "\n";
line:
    while (<DVISELECT>) {
	if (m/[Ww]rote (\d+) pages/) {
	    $pageCount = $1;
	    last line;
	}
    }
    close(DVISELECT);

    $pageCount;
}

# Make an intelligent link to a paper file.
sub doPaperLinks {
    local($file);
    local($url);
    local($paper, $ppaper);
    local($cstr, $pstr, $sstr);

papertype:
    foreach $paper (@paperTypes) {

	$ppaper = $paper;
	$sstr = "";
	$pstr = "";
	$cstr = "";

	if (($url) = m/\<\!\-\- $paper:[\s\n]+(\S+)[\s\n]+\-\-\>/) {

	    # If $url looks like a file (doesn't begin with http://, ftp://, 
	    # etc.), get more info.
	    if ($paper ne 'DOI' && $url !~ m/^[^\:\/]+\:\//) {
		local($file) = $url;
		local($path);
		local($dir);
		foreach $dir (@filedir) {
		    $path = join('/', $dir, $file);
		    if ( -f $path) {
			if (defined $dirmap{$dir}) {
			    $url = join('/', $dirmap{$dir}, $file);
			} else {
			    $url = $path;
			}
			last;
		    }
		}

		if (! -f $path) {
		    print STDERR "couldn't find $file\n";
		    next papertype;
		}

		local($opencmd);
		local($size);
		local($pageCountRoutine);
		local($pageCount) = 0;

		($opencmd, $cstr) = &openCommand($path);

		# Get size.
		$size = -s _;
		$size = int( ($size * 10 / 1024 / 1024) + .5 )/10;
		$sstr = ", $size MB";

		# Get page count.
		$pageCountRoutine = $paper . "PageCount";
		$pageCount = &$pageCountRoutine($opencmd);
		$pstr = ", $pageCount pages" if (defined $pageCount);

		# Get compression type.
		$cstr = "$cstr " if ($cstr ne "");
	    } elsif ($paper eq 'DOI' &&
	             (($url =~ m/^doi:(.*)/i) ||
	              ($url =~ m/^http:\/\/[\w.]+\/(.*)/i) ||
	              ($url =~ m/^(.*)$/))) {
			# Convert the DOI URL into an HTTP link
			$url = "http://dx.doi.org/$1";
			$ppaper = "doi:$1";
	    }

	    #print STDERR "found $paper $file$pstr$sstr\n";

	    s/\<\!\-\- $paper:[\s\n]+\S+[\s\n]+\-\-\>/(<a href=\"$url\">${cstr}$ppaper<\/a>$pstr$sstr)/;
	}
    }
}

# html_encode(string)
#   Protect character entities in string.
sub html_encode {
    local($_) = @_;

    s/&/&amp;/g;        # Must be first.
    s/</&lt;/g;
    s/>/&gt;/g;
    s/"/&quot;/g;

    $_;
}

# Convert $_ into an HTML entity representation
sub html_ent {
	# Accents.
	s/\\i\b/i/g;					# dotless i.
	s/\\\'(\001\d+)\{([AEIOUaeiou])\1\}/&$2acute;/g;	# acute accent \'{x}
	s/\\\'([AEIOUaeiou])/&$1acute;/g;			# acute accent \'x
	s/\\\`(\001\d+)\{([AEIOUaeiou])\1\}/&$2grave;/g;	# grave accent \`{x}
	s/\\\`([AEIOUaeiou])/&$1grave;/g;			# grave accent \`x
	s/\\\"(\001\d+)\{([AEIOUaeiouy])\1\}/&$2uml;/g;	# umlaut \"{x}
	s/\\\"([AEIOUaeiouy])/&$1uml;/g;			# umlaut \"x
	s/\\\~(\001\d+)\{([ANOano])\1\}/&$2tilde;/g;	# tilde \~{x}
	s/\\\~([ANOano])/&$1tilde;/g;			# tilde \~x
	s/\\\^(\001\d+)\{([AEIOUaeiou])\1\}/&$2circ;/g;	# circumflex \^{x}
	s/\\\^(AEIOUaeiou])/&$1circ;/g;			# circumflex \^x
	s/\\c(\001\d+)\{([Cc])\1\}/&$2cedil;/g;		# cedilla \c{x}
	# The following accents have no HTML equivalent.
	# (This list is still not complete.)
	s/\\u(\001\d+)\{(.)\1\}/$2/g;			# breve accent \u{x}
	s/\\v(\001\d+)\{(.)\1\}/$2/g;			# hacek accent \v{x}
	s/\\([lL])\b/$1/g;					# slashed l
	s/\\\=(\001\d+)\{(.)\1\}/$2/g;			# macron \={x}
	s/\\\=(.)/$1/g;					# macron accent \=x
	s/\\\.(\001\d+)\{(.)\1\}/$2/g;			# dot \.{x}
	s/\\\.(.)/$1/g;					# dot accent \.x

	# Other special characters.
	s/\\([Oo])\b\s*/&$1slash;/g;	# \[Oo] -> &[Oo]slash;
	s/\\AA\b\s*/&Aring;/g;		# \AA -> &Aring;
	s/\\aa\b\s*/&aring;/g;		# \aa -> &aring;
	s/\\AE\b\s*/&AElig;/g;		# \AE -> &AElig;
	s/\\ae\b\s*/&aelig;/g;		# \ae -> &aelig;
	s/\\ss\b\s*/&szlig;/g;		# \ss -> &szlig;
	s/\\S\b\s*/&sect;/g;		# \S -> &sect;
	s/\\P\b\s*/&para;/g;		# \P -> &para;
	s/\\pounds\b\s*/&pound;/g;	# \pounds -> &pound;
	s/\?\`/&iquest;/g;		# ?` -> &iquest;
	s/\!\`/&iexcl;/g;		# !` -> &iexcl;

	# Other special characters.
	# Try to be careful to not change the dashes in HTML comments
	# (<!-- comment -->) to &ndash;s.
	s/\-\-\-/&mdash;/g;		# --- -> &mdash;
	s/([^\!])\-\-([^\>])/$1&ndash;$2/g;	# -- -> &ndash;
	#s/\-\-\-/\227/g;		# --- -> &mdash;
	#s/([^\!])\-\-([^\>])/$1\226$2/g;	# -- -> &ndash;

	# Upper and lower case greek
	s/\\([aA]lpha)\b/&$1;/g;
	s/\\([bB]eta)\b/&$1;/g;
	s/\\([gG]amma)\b/&$1;/g;
	s/\\([dD]elta)\b/&$1;/g;
	s/\\varepsilon\b/&epsilon;/g;
	s/\\([eE]psilon)\b/&$1;/g;
	s/\\([zZ]eta)\b/&$1;/g;
	s/\\([eE]ta)\b/&$1;/g;
	s/\\([tT]heta)\b/&$1;/g;
	s/\\vartheta\b/&theta;/g;
	s/\\([iI]ota)\b/&$1;/g;
	s/\\([kK]appa)\b/&$1;/g;
	s/\\([lL]ambda)\b/&$1;/g;
	s/\\([mM]u)\b/&$1;/g;
	s/\\([nN]u)\b/&$1;/g;
	s/\\([xX]i)\b/&$1;/g;
	s/\\([oO]micron)\b/&$1;/g;
	s/\\([pP]i)\b/&$1;/g;
	s/\\varpi\b/&pi;/g;
	s/\\([rR]ho)\b/&$1;/g;
	s/\\varrho\b/&rho;/g;
	s/\\([sS]igma)\b/&$1;/g;
	s/\\varsigma\b/&sigmaf;/g;
	s/\\([tT]au)\b/&$1;/g;
	s/\\([uU]psilon)\b/&$1;/g;
	s/\\([pP]hi)\b/&$1;/g;
	s/\\varphi\b/&phi;/g;
	s/\\([cC]hi)\b/&$1;/g;
	s/\\([pP]si)\b/&$1;/g;
	s/\\([oO]mega)\b/&$1;/g;
}

# Convert $_ into a UTF-8 character
sub utf_ent {
	# Accents.
	s/\\i\b/ı/g;					# dotless i.

	# acute accent \'{x}
	s/\\\'(\001\d+)\{A\1\}/Á/g;
	s/\\\'(\001\d+)\{C\1\}/Ć/g;
	s/\\\'(\001\d+)\{E\1\}/É/g;
	s/\\\'(\001\d+)\{I\1\}/Í/g;
	s/\\\'(\001\d+)\{L\1\}/Ĺ/g;
	s/\\\'(\001\d+)\{N\1\}/Ń/g;
	s/\\\'(\001\d+)\{O\1\}/Ó/g;
	s/\\\'(\001\d+)\{R\1\}/Ŕ/g;
	s/\\\'(\001\d+)\{S\1\}/Ś/g;
	s/\\\'(\001\d+)\{U\1\}/Ú/g;
	s/\\\'(\001\d+)\{Z\1\}/Ź/g;
	s/\\\'(\001\d+)\{a\1\}/á/g;
	s/\\\'(\001\d+)\{c\1\}/ć/g;
	s/\\\'(\001\d+)\{e\1\}/é/g;
	s/\\\'(\001\d+)\{i\1\}/í/g;
	s/\\\'(\001\d+)\{l\1\}/ĺ/g;
	s/\\\'(\001\d+)\{n\1\}/ń/g;
	s/\\\'(\001\d+)\{o\1\}/ó/g;
	s/\\\'(\001\d+)\{r\1\}/ŕ/g;
	s/\\\'(\001\d+)\{s\1\}/ś/g;
	s/\\\'(\001\d+)\{u\1\}/ú/g;
	s/\\\'(\001\d+)\{z\1\}/ź/g;

	# acute accent \'x
	s/\\\'A/Á/g;
	s/\\\'C/Ć/g;
	s/\\\'E/É/g;
	s/\\\'I/Í/g;
	s/\\\'L/Ĺ/g;
	s/\\\'N/Ń/g;
	s/\\\'O/Ó/g;
	s/\\\'R/Ŕ/g;
	s/\\\'S/Ś/g;
	s/\\\'U/Ù/g;
	s/\\\'Z/Ź/g;
	s/\\\'a/á/g;
	s/\\\'c/ć/g;
	s/\\\'e/é/g;
	s/\\\'i/í/g;
	s/\\\'l/ĺ/g;
	s/\\\'n/ń/g;
	s/\\\'o/ó/g;
	s/\\\'r/ŕ/g;
	s/\\\'s/ś/g;
	s/\\\'u/ú/g;
	s/\\\'z/ź/g;

	# grave accent \`{x}
	s/\\\`(\001\d+)\{A\1\}/À/g;
	s/\\\`(\001\d+)\{E\1\}/È/g;
	s/\\\`(\001\d+)\{I\1\}/Ì/g;
	s/\\\`(\001\d+)\{O\1\}/Ò/g;
	s/\\\`(\001\d+)\{U\1\}/Ù/g;
	s/\\\`(\001\d+)\{a\1\}/à/g;
	s/\\\`(\001\d+)\{e\1\}/è/g;
	s/\\\`(\001\d+)\{i\1\}/ì/g;
	s/\\\`(\001\d+)\{o\1\}/ò/g;
	s/\\\`(\001\d+)\{u\1\}/ù/g;

	# grave accent \`x
	s/\\\`A/À/g;
	s/\\\`E/È/g;
	s/\\\`I/Ì/g;
	s/\\\`O/Ò/g;
	s/\\\`U/Ù/g;
	s/\\\`a/à/g;
	s/\\\`e/è/g;
	s/\\\`i/ì/g;
	s/\\\`o/ò/g;
	s/\\\`u/ù/g;

	# umlaut \"{x}
	s/\\\"(\001\d+)\{A\1\}/Ä/g;
	s/\\\"(\001\d+)\{E\1\}/Ë/g;
	s/\\\"(\001\d+)\{I\1\}/Ï/g;
	s/\\\"(\001\d+)\{O\1\}/Ö/g;
	s/\\\"(\001\d+)\{U\1\}/Ü/g;
	s/\\\"(\001\d+)\{Y\1\}/Ÿ/g;
	s/\\\"(\001\d+)\{a\1\}/ä/g;
	s/\\\"(\001\d+)\{e\1\}/ë/g;
	s/\\\"(\001\d+)\{i\1\}/ï/g;
	s/\\\"(\001\d+)\{o\1\}/ö/g;
	s/\\\"(\001\d+)\{u\1\}/ü/g;
	s/\\\"(\001\d+)\{y\1\}/ÿ/g;

	# umlaut \"x
	s/\\\"A/Ä/g;
	s/\\\"E/Ë/g;
	s/\\\"I/Ï/g;
	s/\\\"O/Ö/g;
	s/\\\"U/Ü/g;
	s/\\\"Y/Ÿ/g;
	s/\\\"a/ä/g;
	s/\\\"e/ë/g;
	s/\\\"i/ï/g;
	s/\\\"o/ö/g;
	s/\\\"u/ü/g;
	s/\\\"y/ÿ/g;

	# tilde \~{x}
	s/\\\~(\001\d+)\{A\1\}/Ã/g;
	s/\\\~(\001\d+)\{N\1\}/Ñ/g;
	s/\\\~(\001\d+)\{O\1\}/Õ/g;
	s/\\\~(\001\d+)\{a\1\}/ã/g;
	s/\\\~(\001\d+)\{n\1\}/ñ/g;
	s/\\\~(\001\d+)\{o\1\}/õ/g;

	# tilde \~x
	s/\\\~A/Ã/g;
	s/\\\~N/Ñ/g;
	s/\\\~O/Õ/g;
	s/\\\~a/ã/g;
	s/\\\~n/ñ/g;
	s/\\\~O/õ/g;

	# circumflex \^{x}
	s/\\\^(\001\d+)\{A\1\}/Â/g;
	s/\\\^(\001\d+)\{E\1\}/Ê/g;
	s/\\\^(\001\d+)\{G\1\}/Ĝ/g;
	s/\\\^(\001\d+)\{H\1\}/Ĥ/g;
	s/\\\^(\001\d+)\{I\1\}/Î/g;
	s/\\\^(\001\d+)\{J\1\}/Ĵ/g;
	s/\\\^(\001\d+)\{O\1\}/Ô/g;
	s/\\\^(\001\d+)\{U\1\}/Û/g;
	s/\\\^(\001\d+)\{W\1\}/Ŵ/g;
	s/\\\^(\001\d+)\{Y\1\}/Ŷ/g;
	s/\\\^(\001\d+)\{a\1\}/â/g;
	s/\\\^(\001\d+)\{e\1\}/ê/g;
	s/\\\^(\001\d+)\{g\1\}/ĝ/g;
	s/\\\^(\001\d+)\{h\1\}/ĥ/g;
	s/\\\^(\001\d+)\{i\1\}/î/g;
	s/\\\^(\001\d+)\{j\1\}/ĵ/g;
	s/\\\^(\001\d+)\{o\1\}/ô/g;
	s/\\\^(\001\d+)\{u\1\}/û/g;
	s/\\\^(\001\d+)\{w\1\}/ŵ/g;
	s/\\\^(\001\d+)\{y\1\}/ŷ/g;

	# circumflex \^x
	s/\\\^A/Â/g;
	s/\\\^E/Ê/g;
	s/\\\^G/Ĝ/g;
	s/\\\^H/Ĥ/g;
	s/\\\^I/Î/g;
	s/\\\^J/Ĵ/g;
	s/\\\^O/Ô/g;
	s/\\\^U/Û/g;
	s/\\\^W/Ŵ/g;
	s/\\\^Y/Ŷ/g;
	s/\\\^a/â/g;
	s/\\\^e/ê/g;
	s/\\\^g/ĝ/g;
	s/\\\^h/ĥ/g;
	s/\\\^i/î/g;
	s/\\\^J/ĵ/g;
	s/\\\^o/ô/g;
	s/\\\^u/û/g;
	s/\\\^w/ŵ/g;
	s/\\\^y/ŷ/g;

	# cedilla \c{x}
	s/\\c(\001\d+)\{C\1\}/Ç/g;
	s/\\c(\001\d+)\{c\1\}/ç/g;
	s/\\c(\001\d+)\{K\1\}/Ķ/g;
	s/\\c(\001\d+)\{k\1\}/ķ/g;
	s/\\c(\001\d+)\{L\1\}/Ļ/g;
	s/\\c(\001\d+)\{l\1\}/ļ/g;
	s/\\c(\001\d+)\{N\1\}/Ņ/g;
	s/\\c(\001\d+)\{n\1\}/ņ/g;
	s/\\c(\001\d+)\{N\1\}/Ŗ/g;
	s/\\c(\001\d+)\{n\1\}/ŗ/g;

	# double acute accent \H{x}
	s/\\H(\001\d+)\{O\1\}/Ő/g;
	s/\\H(\001\d+)\{U\1\}/Ű/g;
	s/\\H(\001\d+)\{o\1\}/ő/g;
	s/\\H(\001\d+)\{u\1\}/ű/g;

	# breve accent \u{x}
	s/\\u(\001\d+)\{A\1\}/Ă/g;
	s/\\u(\001\d+)\{E\1\}/Ĕ/g;
	s/\\u(\001\d+)\{G\1\}/Ğ/g;
	s/\\u(\001\d+)\{I\1\}/Ĭ/g;
	s/\\u(\001\d+)\{O\1\}/Ŏ/g;
	s/\\u(\001\d+)\{U\1\}/Ŭ/g;
	s/\\u(\001\d+)\{a\1\}/ă/g;
	s/\\u(\001\d+)\{e\1\}/ĕ/g;
	s/\\u(\001\d+)\{g\1\}/ğ/g;
	s/\\u(\001\d+)\{i\1\}/ĭ/g;
	s/\\u(\001\d+)\{o\1\}/ŏ/g;
	s/\\u(\001\d+)\{u\1\}/ŭ/g;

	# hacek/caron? accent \v{x}
	s/\\v(\001\d+)\{C\1\}/Č/g;
	s/\\v(\001\d+)\{D\1\}/Ď/g;
	s/\\v(\001\d+)\{E\1\}/Ě/g;
	s/\\v(\001\d+)\{L\1\}/Ľ/g;
	s/\\v(\001\d+)\{N\1\}/Ň/g;
	s/\\v(\001\d+)\{Z\1\}/Ž/g;
	s/\\v(\001\d+)\{c\1\}/č/g;
	s/\\v(\001\d+)\{d\1\}/ď/g;
	s/\\v(\001\d+)\{e\1\}/ě/g;
	s/\\v(\001\d+)\{l\1\}/ľ/g;
	s/\\v(\001\d+)\{n\1\}/ň/g;
	s/\\v(\001\d+)\{z\1\}/ž/g;

	# macron \={x}
	s/\\\=(\001\d+)\{A\1\}/Ā/g;
	s/\\\=(\001\d+)\{E\1\}/Ē/g;
	s/\\\=(\001\d+)\{O\1\}/Ō/g;
	s/\\\=(\001\d+)\{U\1\}/Ū/g;
	s/\\\=(\001\d+)\{a\1\}/ā/g;
	s/\\\=(\001\d+)\{e\1\}/ē/g;
	s/\\\=(\001\d+)\{o\1\}/ō/g;
	s/\\\=(\001\d+)\{u\1\}/ū/g;

	# macron \=x
	s/\\\=A/Ā/g;
	s/\\\=E/Ē/g;
	s/\\\=O/Ō/g;
	s/\\\=U/Ū/g;
	s/\\\=a/ā/g;
	s/\\\=e/ē/g;
	s/\\\=o/ō/g;
	s/\\\=u/ū/g;

	# dot \.{x}
	s/\\\.(\001\d+)\{G\1\}/Ġ/g;
	s/\\\.(\001\d+)\{L\1\}/Ŀ/g;
	s/\\\.(\001\d+)\{Z\1\}/Ż/g;
	s/\\\.(\001\d+)\{g\1\}/ġ/g;
	s/\\\.(\001\d+)\{l\1\}/ŀ/g;
	s/\\\.(\001\d+)\{z\1\}/ż/g;

	# dot \.x
	s/\\\.G/Ġ/g;
	s/\\\.L/Ŀ/g;
	s/\\\.Z/Ż/g;
	s/\\\.g/ġ/g;
	s/\\\.l/ŀ/g;
	s/\\\.z/ż/g;


	# slashed l
	s/\\l\b/ł/g;
	s/\\L\b/Ł/g;

	# Other special characters.
	s/\\O\b\s*/Ø/g;
	s/\\o\b\s*/ø/g;
	s/\\AA\b\s*/Å/g;
	s/\\aa\b\s*/å/g;
	s/\\AE\b\s*/Æ/g;
	s/\\ae\b\s*/æ/g;
	s/\\OE\b\s*/Œ/g;
	s/\\oe\b\s*/œ/g;
	s/\\ss\b\s*/ß/g;
	s/\\S\b\s*/§/g;
	s/\\P\b\s*/¶/g;
	s/\\pounds\b\s*/£/g;
	s/\?\`/¿/g;
	s/\!\`/¡/g;

	# en and em dashes
	# Try to be careful to not change the dashes in HTML comments
	# (<!-- comment -->) to &ndash;s.
	s/\-\-\-/—/g;			# --- -> &#x2014
	s/([^\!])\-\-([^\>])/$1–$2/g;	# -- -> &#x2013

	# Upper case Greek
	s/\\Alpha\b/Α/g;
	s/\\Beta\b/Β/g;
	s/\\Gamma\b/Γ/g;
	s/\\Delta\b/Δ/g;
	s/\\Epsilon\b/Ε/g;
	s/\\Zeta\b/Ζ/g;
	s/\\Eta\b/Η/g;
	s/\\Theta\b/Θ/g;
	s/\\Iota\b/Ι/g;
	s/\\Kappa\b/Κ/g;
	s/\\Lambda\b/Λ/g;
	s/\\Mu\b/Μ/g;
	s/\\Nu\b/Ν/g;
	s/\\Xi\b/Ξ/g;
	s/\\Omicron\b/Ο/g;
	s/\\Pi\b/Π/g;
	s/\\Rho\b/Ρ/g;
	s/\\Sigma\b/Σ/g;
	s/\\Tau\b/Τ/g;
	s/\\Upsilon\b/Υ/g;
	s/\\Phi\b/Φ/g;
	s/\\Chi\b/Χ/g;
	s/\\Psi\b/Ψ/g;
	s/\\Omega\b/Ω/g;

	# Lower case Greek
	s/\\alpha\b/α/g;
	s/\\beta\b/β/g;
	s/\\gamma\b/γ/g;
	s/\\delta\b/δ/g;
	s/\\varepsilon\b/ε/g;
	s/\\epsilon\b/ε/g;
	s/\\zeta\b/ζ/g;
	s/\\eta\b/η/g;
	s/\\theta\b/θ/g;
	s/\\vartheta\b/θ/g;
	s/\\iota\b/ι/g;
	s/\\kappa\b/κ/g;
	s/\\lambda\b/λ/g;
	s/\\mu\b/μ/g;
	s/\\nu\b/ν/g;
	s/\\xi\b/ξ/g;
	s/\\omicron\b/ο/g;
	s/\\pi\b/π/g;
	s/\\varpi\b/π/g;
	s/\\rho\b/ρ/g;
	s/\\varrho\b/ρ/g;
	s/\\sigma\b/σ/g;
	s/\\varsigma\b/ς/g;
	s/\\tau\b/τ/g;
	s/\\upsilon\b/υ/g;
	s/\\phi\b/φ/g;
	s/\\varphi\b/φ/g;
	s/\\chi\b/χ/g;
	s/\\psi\b/ψ/g;
	s/\\omega\b/ω/g;
}


# Prevent "identifier used only once" warnings.
$opt_a = $opt_b = $opt_c = $opt_D = $opt_d = $opt_h = $opt_m = $opt_r = undef;
$opt_i = $opt_k = $opt_s = $opt_t = $opt_v = $opt_u = undef;

$macrofile = '';

$command_line = &html_encode(join(' ', $0, @ARGV));

getopts("ab:cd:D:h:ikm:rs:tuv") || &usage;

if (defined($opt_v)) {
	print "$version\n";
	exit 0;
}

&usage if (($#ARGV < 0) || ($#ARGV > 1));

if ($ARGV[0] =~ m/\.bib$/) {
    $bibfile = $ARGV[0];
    $bibfile =~ s/\.bib$//;
    $delimiter = $bibfile;
} elsif ($ARGV[0] =~ m/\.aux$/) {
    if ($opt_i) {
        print STDERR "source file must be a bibliography (.bib) file when run with the -i switch \n";
        &usage;
    }
    $citefile = $ARGV[0];
    $citefile =~ s/\.aux$//;
    $delimiter = $citefile;
} else {
    print STDERR "Unknown file extension on $ARGV[0]\n";
    &usage;
}

$htmlfile = $ARGV[1] if ($#ARGV == 1);

$delimiter = $opt_d if (defined($opt_d));
#TGK
$title = (defined($opt_h) ? $opt_h : "Bibliography generated from $ARGV[0]");
$macrofile = "$opt_m," if (defined($opt_m));

$opt_s = 'empty' if (! defined $opt_s);
style: {
    ($opt_s eq 'empty') &&
	($bstfile = "html-n",
	 $label_style = $LABEL_PLAIN,
	 last style);
    ($opt_s eq 'plain') &&
	($bstfile = "html-n",
	 $label_style = $LABEL_NUMBERED,
	 last style);
    ($opt_s eq 'alpha') &&
	($bstfile = "html-a",
	 $label_style = $LABEL_DEFAULT,
	 last style);
    ($opt_s eq 'named') &&
	($bstfile = "html-n",
	 $label_style = $LABEL_DEFAULT,
	 last style);
    ($opt_s eq 'unsort') &&
	($bstfile = "html-u",
	 $label_style = $LABEL_NUMBERED,
	 last style);
    ($opt_s eq 'unsortlist') &&
	($bstfile = "html-u",
	 $label_style = $LABEL_PLAIN,
	 last style);
    ($opt_s =~ s/\.bst$//) &&
	($bstfile = $opt_s,
	 # label-style will be defined in .bst file.
	 last style);
    print STDERR "Unknown style: $_\n";
    &usage;
}

if ($bstfile eq "html-u" && ($opt_r || $opt_c)) {
    print STDERR "Unsorted styles do not support a sort specification\n";
    exit(1);
}

$bstfile .= "c" if (defined ($opt_c));
$bstfile .= "r" if (defined ($opt_r));
$bstfile .= "a" if (defined ($opt_a));


# PostScript and PDF files are assumed to be in same directory
# as the target HTML file.
if (defined($htmlfile) && ($htmlfile =~ m+(^.*)/+)) {
    push @filedir, $1;
} else {
    push @filedir, "."
}
if (defined $opt_D) {
    local($dir, $url);
    foreach $dir (split(/\,/, $opt_D)) {
	$url = $dir;
	if ($dir =~ s/\@(.*)$//) { $url = $1; }
	push @filedir, $dir;
	$dirmap{$dir} = $url;
    }
}

umask(077);

open(HTMLFILE, (defined($htmlfile) ? ">$htmlfile$$" : ">&STDOUT"));
if (defined($htmlfile) && open(OHTMLFILE, "$htmlfile")) {
    $mode = (stat OHTMLFILE)[2] & 0xfff;
    $updating = 1;
} else {
    $mode = 0644;
    $updating = 0;

    # An existing HTML file does not exist, so output some boilerplate.
    if ($opt_u) {
        $enc = 'UTF-8';
    } else {
        $enc = 'US-ASCII';
    }
    print HTMLFILE
qq{<?xml version="1.0" encoding="$enc"?>
<!DOCTYPE html
	PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>$title</title>
<meta http-equiv="Content-type" content="text/html; charset=$enc" />} . q{
<meta name="Generator" content="$Id: bib2xhtml.pl,v 1.1 2006/10/11 22:56:31 tgkolda Exp $" />
</head>
<body bgcolor="white">
}
}

$beginstring = "<!-- BEGIN CITATIONS $delimiter -->";
$endstring = "<!-- END CITATIONS $delimiter -->";

@citations = ();

if ($opt_i && $updating) {
 loop:
  while (<OHTMLFILE>) {
    print HTMLFILE;
    last loop if m/^$beginstring$/;
  }
 loop:
  while (<OHTMLFILE>) {
    print HTMLFILE;
    last loop if m/^$endstring$/;
    push(@citations, $2) if m/^([^\\]*)?(.+\})(.*)?$/;
  }
  push(@citations, "\\bibata{$bibfile}");
}

# Create an .aux file for bibtex to read.

$auxfile = "bib$$";
push(@tmpfiles, "$auxfile.aux");

open(AUXFILE, ">$auxfile" . ".aux");

print AUXFILE "\\relax\n\\bibstyle{$bstfile}\n";

if (defined($citefile)) {
    $citefile .= ".aux";
    open(CITEFILE, "<$citefile") || die "error opening $citefile: $!\n";
    while (<CITEFILE>) {
	print AUXFILE $_ if (m/^\\(citation|bibdata){/);
    }
    close(CITEFILE);
} elsif (@citations) {
  foreach $citation (@citations) {
    print AUXFILE "$citation\n";
  }
} else {
    print AUXFILE "\\citation{*}\n\\bibdata{$macrofile$bibfile}\n";
}

close(AUXFILE);


# run bibtex, redirecting bibtex's output from STDOUT to STDERR.

push(@tmpfiles, "$auxfile.blg");
push(@tmpfiles, "$auxfile.bbl");

#Flush HTMLFILE to avoid duplicate buffer writes after the fork
select(HTMLFILE);
$| = 1; $| = 0;
select(STDOUT);

# We attempt to fork in order to redirect bibtex's stdout to stderr.
# This is needed when bib2xhtml is generating its output on the
# standard output.
# The shell redirection syntax used in the system() alternative
# is by no means portable.
#TGK - added next line to force use of local html-ua.bst file.
$opt_b = "-include-directory=.";
eval { fork || (open(STDOUT, ">&STDERR"),
	# Handle leakage in Win32 prevents the final rename()
	close(HTMLFILE),
	close(OHTMLFILE),
	exec('bibtex', (split(/\s+/, ($opt_b ? $opt_b : "")), $auxfile)));
	wait; };
# fork is not implemented on some non-Unix platforms.
if ($@) {
    # The fork failed (perhaps not implemented on this system).
    system("bibtex $opt_b $auxfile 1>&2");
}

$beginstring = "<!-- BEGIN BIBLIOGRAPHY $delimiter -->";
$endstring = "<!-- END BIBLIOGRAPHY $delimiter -->";

if ($updating) {
loop:
    while (<OHTMLFILE>) {
	last loop if m/^$beginstring$/;
	print HTMLFILE;
    }
loop:
    while (<OHTMLFILE>) {
	last loop if m/^$endstring$/;
    }
}

print HTMLFILE "$beginstring\n";
print HTMLFILE <<EOF;
<!--
    DO NOT MODIFY THIS BIBLIOGRAPHY BY HAND!  IT IS MAINTAINED AUTOMATICALLY!
    YOUR CHANGES WILL BE LOST THE NEXT TIME IT IS UPDATED!
-->
<!-- Generated by: $command_line -->
EOF
# Now we make two passes over the .bbl file.  In the first pass, we
# just collect the {cite, label} pairs, which we will use later for
# crossrefs.

$t = $auxfile . ".bbl";

$/ = "";

# Make a first pass through the .bbl file, collecting citation/label pairs.
open(BBLFILE, "<$t") || die "error opening $t: $!\n";
$nentry = 0;
loop:
while (<BBLFILE>) {
    # Check for definitions at start of .bbl file.
    if (($nentry == 0) && (m/^#/)) {
	if ((m/#\s*label-style:\s*(\S+)/) && (! defined $label_style)) {
	    $label_style = $label_styles{$1};
	    if (! defined $label_style) {
		print STDERR "label style unknown: \n";
		next loop;
	    }
	}
	next loop;
    }
    $nentry++;
    ($bcite, $blabel) = m+<dt><a name=\"([^\"]*)\">\[([^\]]*)\]</a></dt><dd>+;
    $blabel = "$nentry" if ($label_style == $LABEL_NUMBERED);
    $bibcite{$bcite} = $blabel;
}
close(BBLFILE);

$label_style = $LABEL_DEFAULT if (! defined $label_style);
$list_start = $list_start[$label_style];
$list_end = $list_end[$label_style];

if (defined($opt_t)) {
    #TGK: Adding link to bibtex file
    print HTMLFILE "<a href=\"". $bibfile .".bib\">BibTeX file</a> ";
    print HTMLFILE "$nentry references, last updated " . &ctime(time) . "<p />\n";
}

#print HTMLFILE "<$list_start>\n\n";
print HTMLFILE "\n\n";

#foreach $key (sort (keys(%bibcite))) {
#    print "$key : $bibcite{$key}\n";
#}

open(BBLFILE, "<$t") || die "error opening $t: $!\n";
$nentry = 0;
loop:
while (<BBLFILE>) {
    # Skip definitions at start of .bbl file.
    next loop if (($nentry == 0) && (m/^#/));

    $nentry++;

    # Protect \{, \}, and \$, and then assign matching {} pairs a unique ID.
    s/\\\{/\002/g;
    s/\\\}/\003/g;
    s/\\\$/\004/g;
    {
	local ($c, $l, $z) = (0, 0, ());
	s/([\{\}])/join("","\001",($1 eq "\{" ? $z[$l++]=$c++ : $z[--$l]),$1)/ge;
    }

    # bibtex sometimes breaks long lines by inserting "%\n".  We remove
    # that because it might accidently break the line in the middle
    # of a URL.  We don't need to deal with TeX comments in general
    # because bibtex seems to munge them up anyway, so there shouldn't
    # be any in the bibliography file.
    s/\%\n//g;

    # bibtex's add.period$ knows how to avoid adding extra periods
    # when a block already ends in a period.  bib2xhtml's modifications
    # of bibtex's style files break that.  We fix it here.
    s/(\.(<\/cite>|<\/a>|\')+)\./$1/g;

    # Adjust beginning of entry based on bibliography style.
    if ($label_style == $LABEL_PLAIN) {

#TGK modifications

#	s:<dt>(<a name=\"[^\"]*\">)\[[^\]]*\](</a>)</dt><dd>:<li>$1$2:;
#	s:</dd>:</li>:;

        if ($opt_k) 
        {
            #      $1          $2      $3                                         
	    s:<dt>(<a name=\")([^\"]*)(\">)\[[^\]]*\](</a>)</dt><dd>:<hr><font color=\"white\">$2</font><br>$1$2$3:;
	    s:</dd>:</li>:;
        } 
        else 
        {
	   s:<dt>(<a name=\"[^\"]*\">)\[[^\]]*\](</a>)</dt><dd>:<hr><br>$1$2:;
	   s:</dd>:</li>:;
        }


	# Attempt to fix up empty <a name=...></a> tag, which some browsers
	# don't handle properly (even though it *is* legal HTML).
	# First try to combine a <a name=...></a> with a following <A ".
	s:(<a name=\"[^\"]*\")></a><a\b:$1:
	# If that doesn't work, try to swallow following word.
	or s:(<a name=\"[^\"]*\">)</a>([\w]+):$1$2<\/a>:;
    } elsif ($label_style == $LABEL_NUMBERED) {
	s:(<dt><a name=\"[^\"]*\">\[)[^\]]*(\]</a></dt><dd>):$1$nentry$2:;
    }

    # Append the key name, if asked so
    if ($opt_k && ($label_style == $LABEL_NUMBERED || $label_style == $LABEL_DEFAULT)) {
	# $1              $2      $3     $4      $5
	s:(<dt><a name=\")([^\"]*)(\">\[)([^\]]*)(\]</a></dt><dd>):$1$2$3$4 --- $2$5:;
    }

    # Attempt to fix up crossrefs.
    while (m/(\\(cite(label)?)(\001\d+)\{([^\001]+)\4\})/) {
	$old = $1;
	$cmd = $2;
	$doxref = defined($3);
	$bcite = $5;
	if (! defined $bibcite{$bcite}) {
	    $blabel = " [" . $bcite . "]";
	} elsif ($doxref) {
	    $blabel = " <a href=\"#$bcite\">[" . $bibcite{$bcite} . "]<\/a>";
	} else {
	    $blabel = " [" . $bibcite{$bcite} . "]";
	}
	$old =~ s/(\W)/\\$1/g;
	s/\s*$old/$blabel/g;
    }
    # In some styles crossrefs become something like 
    # "In Doe and Roe [Doe and Roe, 1995]."  Change this to
    # "In [Doe and Roe, 1995]." to remove the redundancy.
    s/In (<a href=\"[^\"]*\">)([^\[]+) \[(\2)/In $1\[$2/;

    # Handle the latex2html commands \htmladdnormallink{text}{url}
    # and \htmladdnormallinkfoot{text}{url}.
    s/\\htmladdnormallink(foot)?(\001\d+)\{([^\001]+)\2\}(\001\d+)\{([^\001]+)\4\}/<a href="$5">$3<\/a>/g;
    s/\\href(\001\d+)\{([^\001]*)\1\}(\001\d+)\{([^\001]*)\3\}/<a href="$2">$4<\/a>/g;

    s/\\&/&amp;/g;			# \& -> &amp;

    if ($opt_u) {
        utf_ent();
    } else {
    	html_ent();
    }

    # Handle \char123 -> &123;.
    while (m/\\char([\'\"]?[0-9a-fA-F]+)/) {
	$o = $r = $1;
	if ($r =~ s/^\'//) {
	    $r = oct($r);
	} elsif ($r =~ s/^\"//) {
	    $r = hex($r);
	}
	s/\\char$o\s*/&#$r;/g;
    }

    s/{\\etalchar\001(\d+)\{(.)}\001\1\}/$2/g;	# {\etalchar{x}} -> x

    s/\\par\b/<p \/>/g;

    s/\\url(\001\d+)\{([^\001]*)\1\}/<a href="$2">$2<\/a>/g; #\url{text} -> <a href"text">text</a>

    # There's no way to easily handle \rm and \textrm because
    # HTML has no tag to convert back to plain text.  Since it's very
    # difficult to do the right thing, we do the wrong thing, and just
    # remove them.
    s/(\001\d+)\{\\rm\s+([^\001]*)\1\}/$2/g;		# {\rm text} -> text
    s/\\textrm(\001\d+)\{([^\001]*)\1\}/$2/g;		# \textrm{text} -> text

    # This doesn't create correct HTML, because HTML doesn't allow nested
    # character style tags.  Oh well.
    s/(\001\d+)\{\\em\s+([^\001]*)\1\}/<em>$2<\/em>/g; # {\em text} -> <EM>text</EM>
    s/(\001\d+)\{\\it\s+([^\001]*)\1\}/<i>$2<\/i>/g;   # {\it text} -> <I>text</I>
    s/(\001\d+)\{\\bf\s+([^\001]*)\1\}/<b>$2<\/b>/g;   # {\bf text} -> <B>text</B>
    s/(\001\d+)\{\\tt\s+([^\001]*)\1\}/<tt>$2<\/tt>/g; # {\tt text} -> <TT>text</TT>

    s/\\emph(\001\d+)\{([^\001]*)\1\}/<em>$2<\/em>/g;  # \emph{text} -> <EM>text</EM>
    s/\\textit(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g;  # \textit{text} -> <I>text</I>
    s/\\textbf(\001\d+)\{([^\001]*)\1\}/<b>$2<\/b>/g;  # \textbf{text} -> <B>text</B>
    s/\\texttt(\001\d+)\{([^\001]*)\1\}/<tt>$2<\/tt>/g;# \textit{text} -> <TT>text</TT>

    s/\\mathrm(\001\d+)\{([^\001]*)\1\}/$2/g;		# \mathrm{text} -> text
    s/\\mathnormal(\001\d+)\{([^\001]*)\1\}/$2/g;	# \mathnormal{text} -> text
    s/\\mathsf(\001\d+)\{([^\001]*)\1\}/$2/g;		# \mathsf{text} -> text
    s/\\mathbf(\001\d+)\{([^\001]*)\1\}/<b>$2<\/b>/g;	# \mathbf{text} -> <B>text</B>
    s/\\mathcal(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g;# \mathcal{text} -> <I>text</I>
    s/\\mathit(\001\d+)\{([^\001]*)\1\}/<i>$2<\/i>/g;	# \mathit{text} -> <I>text</I>
    s/\\mathtt(\001\d+)\{([^\001]*)\1\}/<tt>$2<\/tt>/g;# \mathtt{text} -> <TT>text</TT>

    # {\boldmath $mathstuff$} -> <B>mathstuff</B>
#    s/(\001\d+)\{\s*\\boldmath ?([^A-Za-z\{\}][^\{\}]*)\}/<b>$1<\/b>/g;


sub domath {
    local($t) = @_;
    $t =~ s/\^(\001\d+)\{\\circ\1\}/\&\#176/g;		# ^{\circ}->degree
    $t =~ s/\^\\circ/\&\#176/g;				# ^\circ->degree
#   $t =~ s/\^(\001\d+)\{([^\001]*)\1\}/<sup>$2<\/sup>/g;	# ^{x}
    $t =~ s/\^(\001\d+)\{(.*)\1\}/<sup>$2<\/sup>/g;	# ^{x}
    $t =~ s/\^(\w)/<sup>$1<\/sup>/g;			# ^x
#   $t =~ s/\_(\001\d+)\{([^\001]*)\1\}/<sub>$2<\/sub>/g;	# _{x}
    $t =~ s/\_(\001\d+)\{(.*)\1\}/<sub>$2<\/sub>/g;	# _{x}
    $t =~ s/\_(\w)/<sub>$1<\/sub>/g;			# _x
    $t;
}

    # Handle superscripts and subscripts in inline math mode.
    s/(\$([^\$]+)\$)/&domath($2)/ge;			# $ ... $
    s/(\\\((([^\\]|\\[^\(\)])+)\\\))/&domath($2)/ge;	# \( ... \)

    # Remove \mbox.
    s/\\mbox(\001\d+)\{([^\001]*)\1\}/$2/g;		# \mbox{x}

    if ($opt_u) {
        s/([^\/\\])~/$1 /g;			# ~  non-breaking space - &#xa0;
        s/\\\,/ /g;				# \, thin space - &#x2009;
        s/\\ldots\b\s*/…/g;			# Horizontal ellipsis
    } else {
        s/([^\/\\])~/$1&nbsp;/g;			# ~  non-breaking space
        s/\\\,/&thinsp;/g;			# \, thin space
        s/\\ldots\b/&hellip;/g;			# Horizontal ellipsis
    }
    s/\\ / /g;					# \  (normal space)
    s/\\textasciitilde\b\s*/~/g;		# \textasciitilde -> ~

    # Non-alphabetic macros that we keep.
    s/\\([\#\&\%\~\_\^\|])/$1/g;

    # Non-alphabetic macros that we remove.
    #   (discretionary hyphen)
    #   (italic correction)
    s/\\\W//g;

    # Clean up things we don't handle.
#    s/\\//g;

    # The format {\Xyz{Abc}} is interpreted by BibTeX as a single letter
    # whose text is given by "Abc".  If we see this pattern, it is
    # likely that discarding the \Xyz will do the right thing.
    s/\001(\d+)\{\\[A-Za-z]+\001(\d+)\{([^\001]*)\001\2\}\001\1\}/$3/g;

    # Macro names may be meaningful, so keep them and don't run them together.
    s/\\([A-Za-z]+)/ $1 /g;

    # Remove an empty <a href=...></a> tag that bad cross-referencing
    # in the BibTeX file may have left us with.
    s+In <a href=\"[^\"]*\"></a>++;

    &doPaperLinks;

    # Get rid of { } ids, and put protected { } back.
    s/\001\d+[\{\}]//g;
    tr/\002\003\004/{}$/;

    print HTMLFILE $_;
}

close(BBLFILE);

#print HTMLFILE "<$list_end>\n\n$endstring\n";
print HTMLFILE "<hr>\n\n$endstring\n";

if ($updating) {
    while (<OHTMLFILE>) {
	print HTMLFILE;
    }
    close (OHTMLFILE);
} else {
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "<!--#include virtual=\"footer.html\"-->\n";
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "<p>&nbsp</p>\n";
    print HTMLFILE "</body></html>\n";
}

close(HTMLFILE);

if (defined ($htmlfile)) {
    #$mode &= 0777;
    #print "setting $htmlfile$$ to $mode\n";
    #printf("mode = %lo\n", $mode);

    chmod($mode, "$htmlfile$$");
    rename("$htmlfile$$", $htmlfile);
}

unlink(@tmpfiles);

exit(0);
