/*	This file is part of the software similarity tester SIM.
	Written by Dick Grune, Vrije Universiteit, Amsterdam.
	$Header: algollike.c,v 2.1 91/06/18 22:00:44 dick Exp $
*/

/*	This module implements the routines MayBeStartOfRun and CheckRun
	which are required by compare.c and which describe in some sense
	the language. This set is for ALGOL-like languages, in which it
	is meaningful and useful to isolate function bodies.
	
	It requires the user to define, preferably in Xlang.l, four token
	sets, represented as TOKEN[] and terminated by NOTOKEN:
	
	TOKEN Headers[]		tokens that may not end a chunk
	TOKEN Trailers[]	tokens that may not start a chunk
	TOKEN Openers[]		openers of parentheses that must balance
					in functions
	TOKEN Closers[]		the corresponding closers, in the same order
*/

#include	"private.h"
#include	"token.h"

extern char *malloc();

extern TOKEN Headers[];
extern TOKEN Trailers[];
extern TOKEN Openers[];
extern TOKEN Closers[];

extern char options[];			/* to recognize -f-option */

PRIVATE char headers[256];
PRIVATE char trailers[256];

PRIVATE int *bal_count;
PRIVATE unsigned int bal_size;

PRIVATE int STRlen();

InitLanguage()
{
	register TOKEN *cp;

	for (cp = &Headers[0]; !TOKEN_EQ(*cp, NOTOKEN); cp++) {
		headers[TOKEN2int(*cp)] = 1;
	}
	for (cp = &Trailers[0]; !TOKEN_EQ(*cp, NOTOKEN); cp++) {
		trailers[TOKEN2int(*cp)] = 1;
	}

	/*	Allocte the array bal_count.
		Its size is dependent on the size of the Openers set,
		which is extern. So we allocate through malloc().
	*/
	bal_size = STRlen(Openers);
	
	if (STRlen(Closers) != bal_size)
		fatal("different number of Openers and Closers");
	
	bal_count = (int *)malloc(sizeof (int) * bal_size);
	if (!bal_count) fatal("out of memory");
}

PRIVATE int
STRlen(str)
	TOKEN *str;
{
	register int len = 0;

	while (!TOKEN_EQ(*str, NOTOKEN)) {
		str++, len++;
	}
	return len;
}

int
MayBeStartOfRun(ch)
	TOKEN ch;
{
	return !trailers[TOKEN2int(ch)];
}

PRIVATE unsigned int
largest_function(str, size)
	TOKEN *str;
	unsigned int size;
{
	register unsigned int lb_pos = 0; /* latest balancing position */
	register unsigned int pos;
	register int i;
	
	for (i = 0; i < bal_size; i++) {
		bal_count[i] = 0;
	}
	
	/* see how far we get */
	for (pos = 0; pos < size; pos++) {
		register TOKEN tk;
		
		tk = str[pos];
		
		/* account for Openers */
		for (i = 0; i < bal_size; i++) {
			if (TOKEN_EQ(tk, Openers[i])) {
				bal_count[i]++;
				break;
			}
		}
		
		/* account for Closers */
		for (i = 0; i < bal_size; i++) {
			if (TOKEN_EQ(tk, Closers[i])) {
				bal_count[i]--;
				break;
			}
		}
		
		if (	/* it was a Closer */
			i < bal_size
		) {
			/* interesting things may have happened */
			
			/* if this was one Closer too many */
			if (bal_count[i] < 0) break;
			
			/* see if it all happens to balance here */
			for (i = 0; i < bal_size; i++) {
				if (	/* there is a counter-example */
					bal_count[i] != 0
				) {
					break;
				}
			}
			if (	/* no non-balance found */
				i == bal_size
			) {
				/* register balance point */
				lb_pos = pos + 1;
			}
		}
	}
	return lb_pos;			/* cut to size */
}

unsigned int
CheckRun(str, size)
	TOKEN *str;
	unsigned int size;
{
	/*	Checks the run starting at str with length size for
		acceptability in the language.  Cuts from the end if
		necessary and returns the accepted length (which may
		be zero).
	*/
	
	if (options['f']) {
		/* function-like forms only */
		size = largest_function(str, size);
	}
	else {
		while (	/* there is trailing garbage */
			size != 0 && headers[TOKEN2int(str[size-1])]
		) {
			/* remove it */
			size--;
		}
	}
	return size;
}

