/* ---------------------------------------------------------
   | Suzy 1.0 - RBF classifier system			   |
   | Copyright 1993 Tom Grove			           |
   |						           |
   | token.cpp - Training set member function definitions  |
   ---------------------------------------------------------

Defines the various member functions needed to parse the training
set, provide error messages, etc

*/

#include "token.hpp"


int Tset::load(char *filename)
	{
	if(!open(filename) || !parseheader() || !parsefactdefs())
		{
		return NULL;
		}
	if(!parsefacts(trainingset,tsetsize))
		{
		return NULL;
		}
	if(testsetsize>0)
		if(!parsefacts(testset,testsetsize))
			{
			return NULL;
			}
	fs.close();
	strcpy(name,filename);
	return !NULL;
	}


void Tset::clear(void)
	{
	int i;
	if(tokennames!=NULL)
		{
		for(i=0;i<novalues;i++) delete tokennames[i];
		delete tokennames;
		delete tokenvalues;
		tokennames = NULL;
		}
	if(trainingset!=NULL)
		{
		for(i=0;i<tsetsize;i++) delete trainingset[i];
		delete trainingset;
		trainingset = NULL;
		tsetsize = 0;
		}
	if(testset!=NULL)
		{
		for(i=0;i<testsetsize;i++) delete testset[i];
		delete testset;
		testset = NULL;
		}
		lines = 1;
	}

int Tset::getnextchar()
	{
	char c;
	c = (char) fs.peek();
	if(c<10) return NULL;
	switch(c) {
		case ',':			// Comma is treated as
			return NULL;            // white space
		case ' ':
			return NULL;
		case '\n':
			return NULL;
		default:
			return !NULL;
		   }
	}



int Tset::gettoken(fstream& fs)
	{
	int i = 0;
	char ch ;
	token[0] = NULL ;
	while(!token[0]) {

	while(!getnextchar()) {
	     if(fs.eof()) return NULL ;
	     fs.get(ch);
			    }

// Comments are started with colons.

	if(fs.peek() == (int)':')  { do { fs.get(ch);}
				     while(fs.peek()!=(int)'\n');
				     lines++;
				     }
	else {
	do { fs.get(ch);
	     token[i++] = ch ;
	   } while(getnextchar());
	     if(fs.peek() == (int)'\n') lines++;
	     }

	     }
	token[i] = NULL ;
	return !NULL ;
	}

// parseheader - get training set size, etc. Note that EXIT is a macro

int Tset::parseheader(void)
	{
	int i;
	if(gettoken(fs) == NULL || strcmp(token,"size") != 0)
		{
		EXIT("Syntax error: 'size' expected");
		}
	if(gettoken(fs) == NULL || (tsetsize = strtol(token,NULL,0)) == NULL)
		{
		EXIT("Training set size is invalid");
		}
	if(gettoken(fs) == NULL || strcmp(token,"test") != 0)
		{
		EXIT("Syntax error: 'test' expected");
		}
	if(gettoken(fs) == NULL)
		{
		EXIT("Unexpected end of file");
		}
	else
		testsetsize = strtol(token,NULL,0) ;
	if(gettoken(fs) == NULL || strcmp(token,"attributes") != 0)
		{
		EXIT("Syntax error: 'attributes' expected");
		}
	if(gettoken(fs) == NULL || (noattributes = strtol(token,NULL,0)) == NULL)
		{
		EXIT("Attribute number is invalid");
		}
	if(gettoken(fs) == NULL || strcmp(token,"classes") != 0)
		{
		EXIT("Syntax error: 'classes' expected");
		}
	if(gettoken(fs) == NULL || (noclasses = strtol(token,NULL,0)) == NULL)
		{
		EXIT("Class number is invalid");
		}
	if(gettoken(fs) == NULL || strcmp(token,"values") != 0)
		{
		EXIT("Syntax error: 'values' expected");
		}
	if(gettoken(fs) == NULL)
		{
		EXIT("Unexpected end of file");
		}
	else
		novalues = strtol(token,NULL,0);

	trainingset = new Fact *[tsetsize];
	for(i=0;i<tsetsize;i++) trainingset[i] = NULL;
	if(testsetsize>0)  { testset = new Fact *[testsetsize];
	for(i=0;i<testsetsize;i++) testset[i] = NULL;
			   }
	return !NULL;
	}


// parsefactdefs - parse and allocate memory for the symbolic constants

int Tset::parsefactdefs(void)
	{
	int i;
	if(novalues!=0)
		{
		tokenvalues = new float [novalues];
		tokennames = new  char *[novalues];
			for(i=0;i<novalues;i++) tokennames[i] = NULL;
		for(i=0;i<novalues;i++)
			{
			if(!gettoken(fs) || strcmp(token,"value") !=0 )
				{
				EXIT("Syntax error: 'value' expected");
				}
			if(!gettoken(fs))
				{
				EXIT("Unexpected end of file");
				}
			tokennames[i] = new char[strlen(token)+1];
			strcpy(tokennames[i],token);
			if(!gettoken(fs) || strcmp(token,"=") != 0)
				{
				EXIT("Syntax error: '=' expected");
				}
			if(!gettoken(fs))
				{
				EXIT("Unexpected end of file");
				}
			tokenvalues[i] = strtod(token,NULL);
			}
		}
	return !NULL;
	}

// parsefacts - used to parse both the training and the test sets

int Tset::parsefacts(Fact **dest,int number)
	{
	int i,j,k;
	for(j=0;j<number;j++)
		{
	dest[j] = new Fact(noattributes);
	for(i=0;i<noattributes;i++)
		{
		if(!gettoken(fs))
			{
			EXIT("Unexpected end of file");
			}
		else
			if(token[0]=='-' || token[0]=='+' || (token[0]>47 && token[0]<58))
				dest[j]->attribs[i] = strtod(token,NULL);
			else
				{
				for(k=0;k<novalues;k++)
					{
					if(strcmp(token,tokennames[k])==0)
						{
						dest[j]->attribs[i] = tokenvalues[k];
						break;
						}
					}
				if(k==novalues)
					{
					EXIT("Unknown attribute symbol");
					}
				}
			}
		if(!gettoken(fs))
			{
			EXIT("Unexpected end of file");
			}
		else
			if(token[0]>47 && token[0]<58)
				dest[j]->clss = (int) strtol(token,NULL,0);
			else
				{
				for(k=0;k<novalues;k++)
					{
					if(strcmp(token,tokennames[k])==0)
						{
						dest[j]->clss = (int) tokenvalues[k];
						break;
						}
					}
				if(k==novalues)
					{
					EXIT("Unknown class symbol");
					}
		}
		}
	return !NULL;
	}


