#!/usr/local/bin/perl

###############################################################################
# This software is being provided to you, the LICENSEE, by the Massachusetts  #
# Institute of Technology (M.I.T.) under the following license.  By           #
# obtaining, using and/or copying this software, you agree that you have      #
# read, understood, and will comply with these terms and conditions:          #
#                                                                             #
# Permission to use, copy, modify and distribute, including the right to      #
# grant others the right to distribute at any tier, this software and its     #
# documentation for any purpose and without fee or royalty is hereby granted, #
# provided that you agree to comply with the following copyright notice and   #
# statements, including the disclaimer, and that the same appear on ALL       #
# copies of the software and documentation, including modifications that you  #
# make for internal use or for distribution:                                  #
#                                                                             #
# Copyright 1991-4 by the Massachusetts Institute of Technology.  All rights  #
# reserved.                                                                   #
#                                                                             #
# THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS OR   #
# WARRANTIES, EXPRESS OR IMPLIED.  By way of example, but not limitation,     #
# M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS #
# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR      #
# DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS,        #
# TRADEMARKS OR OTHER RIGHTS.                                                 #
#                                                                             #
# The name of the Massachusetts Institute of Technology or M.I.T. may NOT be  #
# used in advertising or publicity pertaining to distribution of the          #
# software.  Title to copyright in this software and any associated           #
# documentation shall at all times remain with M.I.T., and USER agrees to     #
# preserve same.                                                              #
###############################################################################

# getsent fileid
# gets sentences from ACL/DCI SGMLized WSJ
# places paragraph and sentence marks in modified SGML
#	par mark: <p.fileid.doc_no.par_no>
#	sent mark: <s.fileid.doc_no.par_no.sent_no>
# output SGML marks: 1 per line
# output Sentences: 1 per line

for($i=0;$i<=$#ARGV;$i++)
{	if($ARGV[$i] =~ /^-/)
	{	if($ARGV[$i] =~ /^-v/) {$vflg=1;}
		else {&perr("illegal flag: $ARGV[$i]");}
	}
	else
	{	if($fileid) {&perr("multiple fileid arg");}
		$fileid=$ARGV[i];
	}
}
@ARGV=();
if(!$fileid) {&perr("no abbreviation file specified"); }

@output=();

while(<>)
{	s/\n//;
	@input = split(/\s+/);
	for($field=0;$field<=$#input;$field++)
	{	$_=$input[$field];
		if(/<DOCNO>/)
		{	$docno=$input[$field+1];
			$docno =~ s/\.$//;
			if($docno =~ /\./) {&perr2("docno contains .: $docno");}
			$parno=1;
			next;
		}
		if(/<p>/)
		{	&flusho();
			if(!$docno) {&perr("<p>: no docno");}
			if($paron) {&perr("<p>: par on");}
			if($senton) {&perr("<p>: sent on");}
			print "<p.$fileid.$docno.$parno>\n";
			$sentno=1;
			$paron=1;
			next;
		}
		if(/<s>/)
		{	&flusho();
			if(!$docno) {&perr("<s>: no docno");}
			if(!$paron) {&perr("<s>: no par on");}
			if($senton) {&perr("<s>: sent on");}
			print "<s.$fileid.$docno.$parno.$sentno>\n";
			$sentno++;
			$senton=1;
			next;
		}
		if(/<\/s>/)
		{	&flusho();
			if(!$docno) {&perr("</s>: no docno");}
			if(!$paron) {&perr("</s>: no par on");}
			if(!$senton) {&perr("</s>: no sent on");}
			print("</s>\n");
			$senton=0;
			next;
		}
		if(/<\/p>/)
		{	&flusho();
			if(!$docno) {&perr("</p>: no docno");}
			if(!$paron) {&perr("</p>: no par on");}
			if($senton) {&perr("</p>: sent on");}
			print("</p>\n");
			$parno++;
			$paron=0;
			next;
		}

		if($senton) { push(@output,$_);}
	}
}
if($paron) {&perr("par on at end");}
if($senton) {&perr("sent on at end");}

sub flusho				# flush @output
{	if($#output>=0)
	{	$_=join(" ",@output);
		s/\s+/ /g;
		s/^ //;
		s/ $//;
		print "$_\n";
	}
	@output=();
}


sub perr
{	print STDERR "getsent: $_[0]\n";
	exit(1);
}

sub perr2
{	print STDERR "getsent: $_[0]\n";
}
