/*	This file is part of the software similarity tester SIM.
	Written by Dick Grune, Vrije Universiteit, Amsterdam.
	$Header: buff.c,v 2.2 92/06/30 16:16:16 dick Exp $
*/

#include	"private.h"
#include	"buff.h"
#include	"sim.h"
#include	"stream.h"
#include	"debug.h"

/*	This level implements the possibly buffered access to the lexical
	scanner. The buffer tries to save newline information, anticipating
	a second scan which is interested in this information only
*/

extern char *malloc(), *realloc();
PRIVATE char *good_realloc();

PRIVATE scrap_nl_buff();
PRIVATE init_tk_buff();
PRIVATE init_nl_buff();

init_buff() {
	init_tk_buff();
	init_nl_buff();
}

/********	Token buffer section	********/

#define	TK_INCR		10000		/* increment of token buffer size */

TOKEN *tk_buff;				/* to be filled by malloc */
PRIVATE unsigned int tk_size;		/* size of token buffer */
PRIVATE unsigned int tk_free;		/* next free position in tk_buff[] */

PRIVATE
init_tk_buff() {
	/* Allocate the token buffer */
	tk_size = 0 + TK_INCR;
	tk_buff = (TOKEN *)malloc(sizeof (TOKEN) * tk_size);
	if (!tk_buff) fatal("out of memory");
	tk_free = 1;			/* don't use position 0 */
}

store_token()
{
	if (tk_free == tk_size) {
		/* "Carefull" is the word */
		register TOKEN *new_buff = 0;
		unsigned int new_size = tk_size + TK_INCR;

		if (new_size < tk_free) fatal("TK_INCR causes overflow");
		new_buff = (TOKEN *)
			good_realloc((char *)tk_buff,
				sizeof (TOKEN) * new_size);
		if (!new_buff) {
			/*	not enough room for rest of buffer,
				let's try sacrificing the newline buffer
				if it's still there
			*/
			scrap_nl_buff();
			/* and try again */
			new_buff = (TOKEN *)
				good_realloc((char *)tk_buff,
					sizeof (TOKEN) * new_size);
			if (!new_buff) {
				/* did not help */
				fatal("out of memory");
			}
		}
		tk_buff = new_buff, tk_size = new_size;
	}
	tk_buff[tk_free++] = lex_token;
}

unsigned int
text_length() {
	return tk_free;
}


/********	Newline buffer section		********/

#define	NL_INCR		1000		/* increment of newline buffer size */

/*	To speed up pass2 which is interested in fseek positions at line ends,
	the newline buffer keeps this info from pass1. To reduce the size of
	the newline buffer, the info is kept as the differences of the values
	at consecutive line ends. This allows shorts to be used rather than
	integers (unsigned chars are too small???)

	The recording of token count and fseek position differences at EOL is
	optional, and is switched off if
	-	it would use up memory that is required otherwise, i.e.
		for the token buffer.
	-	a difference would not fit in the fields in the struct.
	Switching off is done by freeing the buffer and setting nl_buff to 0.
	Anybody using nl_buff should therefore test for nl_buff being zero.
*/

struct newline {
	short nl_tk_diff;		/* token position difference */
	short nl_fs_diff;		/* fseek position difference */
};

PRIVATE struct newline *nl_buff;	/* to be filled by malloc */
PRIVATE unsigned int nl_size;		/* size of newline buffer */
PRIVATE unsigned int nl_free;		/* next free position in nl_buff[] */
PRIVATE unsigned int last_tk_cnt;
PRIVATE long last_fs_pos;

PRIVATE
init_nl_buff() {
	/* Allocate the newline buffer, if possible */
	nl_size = 0 + NL_INCR;
	nl_buff = (struct newline *)malloc(sizeof (struct newline) * nl_size);
	/* no check needed */
}

PRIVATE
store_newline()
{
	if (!nl_buff) return;

	if (nl_free == nl_size) {
		/* "Careful" is the word */
		struct newline *new_buff = 0;
		unsigned int new_size = nl_size + NL_INCR;

		if (new_size < nl_free)
			fatal("NL_INCR causes unsigned integer overflow");
		new_buff = (struct newline *)
				good_realloc((char *)nl_buff,
					sizeof (struct newline) * new_size);
		if (!new_buff) {
			scrap_nl_buff();
			return;
		}
		nl_buff = new_buff, nl_size = new_size;
	}

	/* now we are sure there is room enough */
	{
		register struct newline *nl = &nl_buff[nl_free++];
		register unsigned int tk_diff = lex_tk_cnt - last_tk_cnt;
		register long fs_diff = lex_fs_pos - last_fs_pos;

		nl->nl_tk_diff = tk_diff;
		if (nl->nl_tk_diff != tk_diff) {
			/* tk_diff does not fit in nl_tk_diff */
			scrap_nl_buff();
		}
		last_tk_cnt = lex_tk_cnt;

		nl->nl_fs_diff = fs_diff;
		if (nl->nl_fs_diff != fs_diff) {
			/* fs_diff does not fit in nl_fs_diff */
			scrap_nl_buff();
		}
		last_fs_pos = lex_fs_pos;
	}
}

#ifdef	DB_NL_BUFF

db_pr_nl_buff(start, limit)
	unsigned int start;
	unsigned int limit;
{
	int i;

	printf(">>>> NL_BUFF <<<<\n");
	if (!nl_buff) {
		printf("NO NL_BUFF\n");
		return;
	}

	if (start > nl_free) {
		printf("**** start (%u) > nl_free (%u)\n",
			start, nl_free
		);
		return;
	}
	if (limit > nl_free) {
		printf("**** limit (%u) > nl_free (%u)\n",
			limit, nl_free
		);
		return;
	}

	printf("nl_buff: %u entries:\n", nl_free);
	for (i = start; i < limit; i++) {
		struct newline *nl = &nl_buff[i];

		printf("nl_tk_diff = %d, nl_fs_diff = %d\n",
			nl->nl_tk_diff, nl->nl_fs_diff
		);
	}
}

#endif	/* DB_NL_BUFF */

PRIVATE
scrap_nl_buff() {
	if (nl_buff) {
		free((char *)nl_buff);
		nl_buff = 0;
	}
}


/********	The text interface	********/

PRIVATE unsigned int nl_next, nl_limit;	/* for pass 2 */
PRIVATE unsigned int nl_cnt;

int
OpenText(pass, txt)
	int pass;
	struct text *txt;
{
	last_tk_cnt = 0;
	last_fs_pos = 0L;

	switch (pass) {
		register int ok;

	case 1:
		if (nl_buff) {
			txt->tx_nl_start = nl_free;
		}
		ok= OpenStream(txt->tx_fname);
		if (!ok) {
			OpenStream("/dev/null");
		}
		return ok;

	case 2:
		if (nl_buff) {
			nl_next = txt->tx_nl_start;
			nl_limit = txt->tx_nl_limit;
			nl_cnt = 1;
			lex_nl_cnt = 1;
			lex_tk_cnt = 0;
			lex_fs_pos = 0L;
			ok = 1;
		}
		else {
			ok= OpenStream(txt->tx_fname);
			if (!ok) {
				OpenStream("/dev/null");
			}
		}
		return ok;
	
	default:
		/*NOTREACHED*/
		break;
	}
}

int
NextTextToken(pass)
	int pass;
{
	switch (pass) {
		register int ok;

	case 1:
		ok = NextStrToken();
		if (TOKEN_EQ(lex_token, EOL)) {
			store_newline();
		}
		return ok;

	case 2:
		/* get newline info from the buffer or from the file itself */
		if (nl_buff) {
			register struct newline *nl;

			if (nl_next == nl_limit) return 0;

			nl = &nl_buff[nl_next++];
			lex_nl_cnt = ++nl_cnt;
			lex_tk_cnt = (last_tk_cnt += nl->nl_tk_diff);
			lex_fs_pos = (last_fs_pos += nl->nl_fs_diff);
			lex_token = EOL;
			return 1;
		}
		else {
			while (	(ok = NextStrToken())
			&&	!TOKEN_EQ(lex_token, EOL)
			) {
				/* skip */
			}
			return ok;
		}

	default:
		/*NOTREACHED*/
		break;
	}
}

CloseText(pass, txt)
	int pass;
	struct text *txt;
{
	switch (pass) {
	case 1:
		if (nl_buff) {
			txt->tx_nl_limit = nl_free;
		}
		break;
	case 2:
		break;
	}
	CloseStream();
}


/********	A good realloc()	********/

/*	A realloc that will not destroy the first few words of the buffer
	if it fails (if there are really reallocs out there that are that
	unsympathetic).
*/

PRIVATE char *
good_realloc(ptr, size)
	char *ptr;
	unsigned int size;
{	/* I don't really believe this */
	int word0, word1;

	word0 = ((int*)ptr)[0];
	word1 = ((int*)ptr)[1];
	ptr = realloc(ptr, sizeof (char) * size);
	((int*)ptr)[0] = word0;
	((int*)ptr)[1] = word1;
	return ptr;
}
