#! /usr/local/bin/perl -w
use strict;
# (C) 2007, Benoit Hudson
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# Create a graph of a set of proofs, from LaTeX sources. Output is in the
# graphviz 'dot' format.
# The following will generate a pdf of your proof:
# parse-proof foo.tex | dot -Tpdf -o foo.pdf
# Upon a \label in a theorem-like environment (what counts as theorem-like is
# mentioned below), stores the name of the theorem. In the subsequent proof
# environment, any \ref will create an edge to the theorem from the reference.
# You can use %\ref or \comment{\ref{...}} to avoid having the ref appear in
# the text.
# TODO: begin and end statements for all environments must be at the beginning
# of the line, or weird things may happen. This is because I'm lazy.
# TODO: I completely ignore comments. Commented-out sections of proofs will
# generate edges just as surely as anything else.
#
# Theorem-like environments are labeled below in %nickname. The name of the
# label must also match the environment: in a theorem, use \label{thm:foo}; in
# a lemma, lem:, and so on. All other labels are ignored. I assume that
# statements don't nest (i.e. we don't state a lemma while stating a theorem).
#
# In the output graph, definitions (def) are boxes, theorems (thm) are
# diamonds, everything else is the default.
# Generalize over kinds of statements. We ignore labels, refs, and
# environments not in this list.
my %environment = (
'thm' => 'theorem',
'fact' => 'fact',
'def' => 'definition',
'lem' => 'lemma'
);
my %nickname;
for my $key (keys %environment) {
$nickname{$environment{$key}} = $key;
}
# Parsing information.
my $filename;
my $theorem_name;
my $theorem_type;
my $theorem_long_name;
# The final output.
my %edges; # keyed by \ref, points to list of theorem_name
my %nodes; # keyed by theorem_name, points to the optional long name or undef.
my @states = ('NONE', 'INTHM', 'HAVE_THM', 'INPROOF');
my $state = 'NONE';
sub warning {
print STDERR ("$filename:$.: warning: ", @_, "\n");
}
sub warn_unnamed {
warning "unnamed $environment{$theorem_type}";
}
sub warn_unproved {
warning ("unproved $environment{$theorem_type} ", getName());
}
sub getName {
if (!defined $theorem_name) {
warn_unnamed();
$theorem_name = "$theorem_type:$filename:$.";
}
return $theorem_name;
}
sub makeNode {
die unless defined $theorem_type;
$nodes{getName()} = $theorem_long_name;
}
sub parse {
$filename = shift @_;
open(FILE, $filename) or do {
warn "Can't open $filename: $!\n";
return;
};
$. = 0;
while() {
chomp;
if (/^\\begin{([^}]*)}/) {
my $env = $1;
if (exists $nickname{$env}) {
$theorem_type = $nickname{$env};
if (/\[([^\]]*)\]/) {
$theorem_long_name = $1;
$theorem_long_name =~ s/\\/\\\\/;
} else {
$theorem_long_name = undef;
}
$state = 'INTHM';
} elsif ($env eq 'proof') {
unless ($state eq 'HAVE_THM') {
warning "proof without theorem";
next;
}
makeNode();
$state = 'INPROOF';
}
}
elsif (/^\\end{([^}]*)}/) {
my $env = $1;
if ($env eq 'proof') {
$theorem_type = undef;
$theorem_name = undef;
$state = 'NONE';
}
elsif ($state eq 'INTHM' && $env eq $environment{$theorem_type}) {
$state = 'HAVE_THM';
}
}
elsif (/\\label{([^}]*)}/) {
my $label = $1;
if ($state eq 'INTHM' && $label =~ /^$theorem_type/) {
$theorem_name = $label;
makeNode();
}
# TODO
warning "multiple labels" if /\\label.*\\label/;
}
elsif ($state ne 'NONE' && /\\ref{([^\}]*)}/) {
my $ref = $1;
if (exists $edges{$ref}) {
push(@{$edges{$ref}}, getName());
} else {
$edges{$ref} = [getName()];
}
warning "multiple refs" if /\\ref.*\\ref/;
}
}
}
##################
## output
# Print one node. Does not print nodes already printed.
# Adds options in [] if there are any to add. Particularly, this is how I set
# the shape of def and thm nodes, and the label in general.
my %printed_nodes;
sub printNode {
my $node = shift @_;
return if $printed_nodes{$node}++;
print " \"$node\"";
# collect a number of options to print with the node.
my @options;
# print the long name, if any, as the label to use for the node
if (defined $nodes{$node}) {
push(@options, "label=\"$nodes{$node}\"");
}
# set the shape according to type
if ($node =~ /^thm:/) {
push(@options, "shape=diamond");
} elsif ($node =~ /^def:/) {
push(@options, "shape=box");
}
# print the options
print ' [', join(' ', @options), ']' if @options;
print ";\n";
}
# print the nodes, and then the edges
sub output {
print "digraph proof {\n";
# collect all nodes, whether we saw them defined or not
my @nodes = keys %nodes;
for my $from (keys %edges) {
push(@nodes, $from);
for my $to (@{$edges{$from}}) {
push(@nodes, $to);
}
}
for my $node (sort @nodes) {
printNode($node);
}
for my $from (sort keys %edges) {
for my $to (@{$edges{$from}}) {
print " \"$from\" -> \"$to\";\n";
}
}
print "}\n";
}
###########################################################################
## Mainline!
for (@ARGV) {
parse $_;
}
output();