#!/usr/bin/perl
#
# create map from hand engineered map
# 
#
# [20071103] (dipanjan)


my %entity = ();
my $ent_count = 0;
$entity{"E000"} = "E000";  # the unmarked entity case
my @labels = ();
my $tag_count = 0;

# we assume that EACH LINE in the mapping file is UNIQUE wrt to the pattern
# this should have been taken care of by fix_map.pl
open(MAP,"$ARGV[0]") or die ": can't open map file\n";
@inmap = <MAP>; close(MAP);
open(OUTMAP, ">$ARGV[1]");

$mappings = join "", @inmap;
@rawmap = split /\f/, $mappings;

foreach (@rawmap) {
  s/^\n//;  # mapping file has a \n at start of line, for readability
  if ( length $_ eq 0 ) { next; }
  ($fulltag,$origstr) = /(.+?)\t(.+)/sg;
  chomp($origstr);
  $string = trim($origstr);
  $origstr = $string;
  ($string) =~ s/([\+\.\?])/\\$1/g;    # need to esccape RE special chars
  # set values for tag and  entity
  if ( $fulltag =~ /\+/ ) { ($tag,$ent) = split /\+/, $fulltag; }
  else { $tag = $fulltag; $ent = "E000"; }
  # is this a new entity?
  if ( not defined $entity{$ent} ) { $entity{$ent} = sprintf "E%03d", ++$ent_count; }

  # make the label; note that we are just counting tags sequentially
  $label = sprintf "%s_%04d+%s", $tag, ++$tag_count, $entity{$ent};
  print STDERR "$string => $fulltag --> [$label]\n";
  print OUTMAP "$label\t$origstr\f";
  
  #$pattern = qr/\b$string\b/i;  # precompile search pattern
  #push @map, [$pattern,$label];
}
close(OUTMAP);

sub trim($)
{
	my $string = shift;
	$string =~ s/^\s+//;
	$string =~ s/\s+$//;
	return $string;
}
