#!/usr/bin/perl

while (<>) {
    tr/A-Z/a-z/;
    s/\'//g;
    s/\.( \S)/$1/g;
    s/[:;]/./g;
    s/-/ /g;
    s/--/ /g;
    s/,/ /g;
    s/\?/./g;
    s/!/./g;
    s/[^a-z.]+/ /g;
    s/\./.\n/g;
    $txt .= $_;    
}

for (split /\n/, $txt) {
    s/^\s+//;
    s/\s+$//;
    s/\s+/ /g;
    s/\.//g;
    print STDERR "$_\n";
    push @w, [split];
}

for (@w) {
    my $l = @$_;
    for my $i (0..$#$_) {
    $total++;
	$count{$_->[$i]}++;
	$score{$_->[$i]} += $i;
    }
}

for (keys %count) {
    $score{$_} = $score{$_} / $count{$_};
    print STDERR "score of $_ is $score{$_}\n";
}

$width = 1200;
$height = 600;

print << "EOF";
<?xml version="1.0" encoding="UTF-8"?>
<svg width="$width" height="$height" viewBox="0 0 $width $height" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
 xml:space="preserve" version="1.1"  baseProfile="full">
<g font-size="9" font-family="austin" text-anchor="middle">
EOF

sub listmax {
    my $max = 0;
    for (@_) {
	$_ > $max and $max = $_;
    }
    return $max;
}

print STDERR "$total\n";
my @k = keys %count;
for (@k) {
    if ($count{$_} < $total / 20000){
	delete $count{$_};
	delete $score{$_};
    }
}

$maxcount = listmax(map {log($count{$_})} keys %count);
$maxscore = listmax(map {$score{$_}} keys %score);
print STDERR "max score: $maxscore\n";

for (sort {$score{$a} <=> $score{$b}} keys %score) {
    $text = $_;
    $x = int($width / $maxscore * $score{$_});
    $y = $height + 20 - int( $height / $maxcount * log($count{$_}));
    print "<text x='$x' y='$y'>$text</text>\n";
}

print "</g></svg>";
