#include  "defns.i"
#include  "extern.i"

/****************************************************************************/
/*                                                                          */
/*  InitialiseTrainingSet(R) - set up the initial training set of tuples    */
/*                             for relation R, first in TrainingSet, which  */
/*                             will subsequently be overwritten while       */
/*                             finding the definition for R, and a copy in  */
/*                             InitialTrainingSet which will be unaltered   */
/*                             while finding the definition.                */
/*                                                                          */
/****************************************************************************/


    InitialiseTrainingSet(R)
/*  ---------------------  */
    Relation R;
{
    int i, j, MaxSize, SampledSize, Size;
    Tuple Case, *Scan;
    double drand48();

    /* Discard old training sets if they exist */

    if ( InitialTrainingSet ) Discard(InitialTrainingSet, true);

    if ( TrainingSet )
    {
	Discard(TrainingSet, true);
	free(Bits);
	free(OriginalCovers);
	free(NowCovers);
    }

    /* Establish number of variables hence size of tuples, and set variable 
       types equal to the type of variable in that position in the relation,
       and variable depths to zero */

    MaxVar = R->Arity;
    Size = (MaxVar+1) * sizeof(Const);

    ForEach(i, 1, MaxVar)
    {
	Variable[i]->Type = R->Type[i];
	Variable[i]->TypeRef = R->TypeRef[i];
	Variable[i]->Continuous = R->TypeRef[i]->Continuous;
	Variable[i]->TupleOrder = Nil;
	Variable[i]->Depth = 0;
	Variable[i]->DetDeps = 0;
	Variable[i]->NonDetOccurs = 0;
    }

    /* Test whether negative tuples are already defined in the relation */

    if ( R->Neg )
    {
	/*  If so, simply copy positive and negative tuples  */

	Pos = Number(R->Pos);
	Tot = Number(R->Neg) + Pos;

	if(Tot>MAXTUPLES)
	{
	    printf("Training Set Size exceeds tuple limit: ");
	    printf("%d > %d - Exiting\n", Tot, MAXTUPLES);
	    printf("Rerun with larger MAXTUPLES to proceed further\n");
	    exit(0);
	}

	TrainingSet = (Tuple *) pmalloc((Tot+1) * sizeof(Tuple));

	Tot = 0;

	for ( Scan = R->Pos ; *Scan ; Scan++ )
	{
	    TrainingSet[Tot] = (Tuple) pmalloc(Size);
	    memcpy(TrainingSet[Tot], *Scan, Size);
	    TrainingSet[Tot][0] = Tot | PosMark;
	    Tot++;
	}

	for ( Scan = R->Neg ; *Scan ; Scan++ )
	{
	    TrainingSet[Tot] = (Tuple) pmalloc(Size);
	    memcpy(TrainingSet[Tot], *Scan, Size);
	    TrainingSet[Tot][0] = Tot;
	    Tot++;
	}
    }
    else 
    /* Negative tuples not already defined */

    {
    /* Find maximum training set size */
	NewSize = 1;
	ForEach(j, 1, R->Arity)
	{
	    NewSize *= R->TypeRef[j]->NValues;
	}

	MaxSize = NewSize;
	SampledSize = NewSize;
	if(SAMPLE<1.0)
	{
	    SampledSize = Number(R->Pos) 
	              + (int)(SAMPLE*(double)(NewSize-Number(R->Pos)));
	}

	if(SampledSize>MAXTUPLES)
	{
	    printf("Training Set Size will exceed tuple limit: ");
	    printf("%d > %d - Exiting\n", SampledSize, MAXTUPLES);
	    printf("Rerun with larger MAXTUPLES to proceed further\n");
	    printf("(Or use smaller sample of negative tuples).\n");
	    exit(0);
	}

	if(NewSize>MAXTUPLES)
	{
	    NewSize = MAXTUPLES;
	}

	TrainingSet = (Tuple *) pmalloc((NewSize+1) * sizeof(Tuple));

	Tot = 0;

	/*  Copy positive tuples  */

	for ( Scan = R->Pos ; *Scan ; Scan++ )
	{
	    TrainingSet[Tot] = (Tuple) pmalloc(Size);
	    memcpy(TrainingSet[Tot], *Scan, Size);
	    TrainingSet[Tot][0] = Tot | PosMark;
	    Tot++;
	}
	Pos = Tot;

	if(MaxSize<=10*MAXTUPLES)
	{
	    /* Enumerate all possible tuples and add a sample of the negative
	       tuples to the training set  - note that if SAMPLE is 1, the
	       default, all negative tuples are added to the training set */

	    Case = Nil;

	    while ( (Case = NextConstTuple(R, Case)) && (Tot<NewSize))
	    {
	        if ( ! Join(R->Pos, R->PosIndex, DefVars, Case, MaxVar, true)
		      && drand48() <= SAMPLE )
	        {
		    TrainingSet[Tot] = (Tuple) pmalloc(Size);
		    *Case = Tot;
		    memcpy(TrainingSet[Tot], Case, Size);
		    Tot++;
		}
	    }
	}
	else
	{
	    /* Might take too long to enumerate all tuples, so generate
	       them randomly - can result in duplicate negative tuples */
	    while (Tot<SampledSize)
	    {
	        Case = RandomTuple(R);
	        if ( ! Join(R->Pos, R->PosIndex, DefVars, Case, MaxVar, true))
	        {
		    *Case = Tot;
		    TrainingSet[Tot] = Case;
		    Tot++;
		}
	    }
	}
    }



    TrainingSet[InitialTot = Tot] = Nil;

    Bits = pmalloc(InitialTot);

    OriginalCovers = (int*) pmalloc(InitialTot*sizeof(int));
    NowCovers = (int*) pmalloc(InitialTot*sizeof(int));


    InitialBaseInfo = BaseInfo = Info(Pos, Tot);

    /* Copy from TrainingSet into InitialTrainingSet */
    InitialTrainingSet = (Tuples) pmalloc((Tot+1) * sizeof(Tuple));
    memcpy(InitialTrainingSet, TrainingSet, (Tot+1) * sizeof(Tuple));
}


/*****************************************************************************/
/*                                                                           */
/*  NewTrainingSet(R, RSign, A) - updates the training set, with associated  */
/*                                tuple counts (Pos and Tot), new BaseInfo,  */
/*                                updated variable depths and types, and     */
/*                                weak literal count,                        */
/*                                by matching the current training set, in   */
/*                                TrainingSet, against relation R, with sign */
/*                                RSign and variable ordering A. This is now */
/*                                done simply by calls to other functions,   */
/*                                (which are called separately to process the*/
/*                                determinate literals).                     */
/*                                                                           */
/*****************************************************************************/

    NewTrainingSet(R, RSign, A)
/*  --------------  */
    Relation R;
    Boolean RSign;
    Vars A;
{
    OldMaxVar = MaxVar;
    FormNewTrainingSet(R, RSign, A);
    AcceptNewTrainingSet(R, RSign, A);
}

/*****************************************************************************/
/*                                                                           */
/*  FormNewTrainingSet(R, RSign, A) - this makes the trial new training set  */
/*                                    NewTS with associated "New" counts, by */
/*                                    matching the current training set, in  */
/*                                    TrainingSet, against relation R, with  */
/*                                    sign RSign and variable ordering A     */
/*                                    and also updates the variable type and */
/*                                    depth info.                            */
/*                                                                           */
/*****************************************************************************/


    FormNewTrainingSet(R, RSign, A)
/*  ------------------  */
    Relation R;
    Boolean RSign;
    Vars A;
{
    Tuple *TSP, Case, *Bindings, Instance, Extend();
    TupleFP CaseFP;
    int i, N, V, V1;

    AllCovered = true;
    OldBaseInfo = BaseInfo;

    N = R->Arity;

    NewMaxVar = MaxVar;

    if(RSign) /* Only unnegated literals can bind new variables */
    {
        ForEach(i, 1, N)
	{
	    NewMaxVar = Max(A[i], NewMaxVar);
	}
    }

    NewTS = (Tuple *) pmalloc((NewSize+1) * sizeof(Tuple));

    NewTot = NewPos = 0;

    if(R->ConstantCmp==2) /* R is threshold comparison */
    {
        V = A[1];
        for ( TSP = TrainingSet ; Case = *TSP++ ; )
	  {
            if(MissingValueEncountered&&MissingValue(R,A,Case))
	    {
                AllCovered = false;
                continue;
	    }
	    CaseFP = (TupleFP) Case;
            if( (  RSign && CaseFP[V] > R->TheConstant ) ||
                ( !RSign && CaseFP[V] <=R->TheConstant ) )
	    {
                CheckSize(NewTot, 1, &NewTS);

                NewTS[NewTot] = (Tuple) pmalloc((MaxVar+1) * sizeof(int));
                memcpy(NewTS[NewTot], Case, (MaxVar+1) * sizeof(int));
                NewTot++;
                if ( Positive(Case) ) NewPos++;
	    }
            else AllCovered = false;
	}
        NewTS[NewTot] = Nil;
        return;
    }

    if( R == CONTGT ) /* R is Continuous Greater Than */
    {
        V = A[1];
        V1= A[2];
        for ( TSP = TrainingSet ; Case = *TSP++ ; )
	{
            if(MissingValueEncountered&&MissingValue(R,A,Case))
	    {
                AllCovered = false;
                continue;
	    }
	    CaseFP = (TupleFP) Case;
            if( (  RSign && CaseFP[V] > CaseFP[V1] ) ||
                ( !RSign && CaseFP[V] <=CaseFP[V1] ) )
	    {
                CheckSize(NewTot, 1, &NewTS);

                NewTS[NewTot] = (Tuple) pmalloc((MaxVar+1) * sizeof(int));
                memcpy(NewTS[NewTot], Case, (MaxVar+1) * sizeof(int));
                NewTot++;
                if ( Positive(Case) ) NewPos++;
	    }
            else AllCovered = false;
	}
        NewTS[NewTot] = Nil;
        return;
    }

    for ( TSP = TrainingSet ; Case = *TSP++ ; )
    {
        if(MissingValueEncountered&&MissingValue(R,A,Case))
	{
	    AllCovered = false;
	    continue;
        }

	if ( RSign )
	{
	    /*  Add tuples from R->Pos  */

	    if ( Join(R->Pos, R->PosIndex, A, Case, N, false) )
	    {
		CheckSize(NewTot, NFound, &NewTS);

		Bindings = Found;
		while ( Instance = *Bindings++ )
		{
		    NewTS[NewTot] = Extend(Case, Instance, A, N, NewMaxVar);
		    NewTot++;
		    if ( Positive(Case) ) NewPos++;
		}
	    }
	    else AllCovered = false;
	}
	else
	if ( ! Join(R->Pos, R->PosIndex, A, Case, N, true) )
	{
	    CheckSize(NewTot, 1, &NewTS);

	    NewTS[NewTot] = (Tuple) pmalloc((MaxVar+1) * sizeof(int));
	    memcpy(NewTS[NewTot], Case, (MaxVar+1) * sizeof(int));
	    NewTot++;
	    if ( Positive(Case) ) NewPos++;
	}
	else AllCovered = false;
    }
    NewTS[NewTot] = Nil;
}

/*****************************************************************************/
/*                                                                           */
/*  AcceptNewTrainingSet(R,RSign,A) - Move the trial new training set from   */
/*                                    NewTS into TrainingSet and update the  */
/*                                    counts.                                */
/*                                                                           */
/*****************************************************************************/

    AcceptNewTrainingSet(R,RSign,A)
/*  --------------------  */
    Relation R;
    Boolean RSign;
    Vars A;

{
    int i, N, MaxDepth = 0;

    if ( TrainingSet != CopyTrainingSet ) Discard(TrainingSet, true);

    if ( NewMaxVar > MaxVar )
    /* New variable(s)? - if so, update type and depth info */
    {
        N = R->Arity;
        ForEach(i, 1, N)
	{
            if ( A[i] > MaxVar )
	    {
                Variable[A[i]]->Type = R->Type[i];
		Variable[A[i]]->TypeRef = R->TypeRef[i];
		Variable[A[i]]->Continuous = R->TypeRef[i]->Continuous;
		Variable[A[i]]->TupleOrder = Nil;
	    }
            else if ( A[i] <= OldMaxVar )
	    {
                MaxDepth = Max(MaxDepth, Variable[A[i]]->Depth);
	    }
	}
        MaxDepth++;
        ForEach(i, 1, N)
	{
            if ( A[i] > MaxVar )
	    {
                Variable[A[i]]->Depth = MaxDepth;
	    }
	}
    }

    /* Update TrainingSet and associated counts */

    Pos = NewPos;
    Tot = NewTot;
    MaxVar = NewMaxVar;
    TrainingSet = NewTS;

    if ( Tot < NewSize )
    {
	TrainingSet = (Tuple *) Resize(NewTS, (Tot+1) * sizeof(Tuple));
    }

    BaseInfo = Info(Pos, Tot);
    if ( AllCovered || BaseInfo >= OldBaseInfo )
    {
	WeakLiterals++;
	VERBOSE(3)printf("\tNow %d weak literals in sequence\n", WeakLiterals);
    }
    else
    {
	WeakLiterals = 0;
    }
}


    CheckSize(SoFar, Extra, TSAddr)
/*  ---------  */
    int SoFar, Extra;
    Tuples *TSAddr;
{
    if ( SoFar+Extra > NewSize )
    {
	NewSize += Max(Extra, 1000);
	*TSAddr = (Tuple *) Resize(*TSAddr, (NewSize+1) * sizeof(Tuple));
    }
}



    /*  Tack extra variables on a tuple  */

Tuple Extend(Case, Binding, A, N, NMV)
/*    ------  */
    Tuple Case, Binding;
    Vars A;
    int N, NMV;
{
    Tuple New;
    int i;

    New = (Tuple) pmalloc((NMV+1) * sizeof(int));
    memcpy(New, Case, (MaxVar+1) * sizeof(int));

    ForEach(i, 1, N)
    {
	New[A[i]] = Binding[i];
    }

    return New;
}


    /*  Discover relevant partial orders in training set  */

    DiscoverPartialOrders()
/*  --------------------- */
{
    int *Seq, RHSSeq, RHSC, LHSC;
    Var LHSVar, RHSVar; /* RHSVar now a misnomer - may be either */
    Boolean OKDec, OKInc;
    Tuple *Scan;

    ForEach(LHSVar, 1, Target->Arity)
    {
        if(!(Target->TypeRef[LHSVar]->Ordered)) continue;/* Skip as unordered*/

	Seq = Target->TypeRef[LHSVar]->CollSeq;

	ForEach(RHSVar, 1, MaxVar)
	{
	    if ( !Variable[RHSVar]->Continuous &&
		 PartialOrder[RHSVar][LHSVar] == '#' ) /* not(<,=,>) */
	    {
		OKDec = true;
		OKInc = !(Target->TypeRef[LHSVar]->FixedPolarity);

		for ( Scan = TrainingSet ; OKDec&& *Scan ; Scan++ )
		{
		    if(MissingValueEncountered)
		    {
		        OKDec = ( ((RHSC=(*Scan)[RHSVar])==MISSING_DISC)||
				  ((LHSC=(*Scan)[LHSVar])==MISSING_DISC)||
				  ((RHSSeq = Seq[RHSC])&&
				   (RHSSeq < Seq[LHSC])) );
		    }
		    else
		    {
		        OKDec = (RHSSeq = Seq[(*Scan)[RHSVar]])
			        && (RHSSeq < Seq[(*Scan)[LHSVar]]);
		    }
		}

		if ( OKDec )
		{
		    PartialOrder[RHSVar][LHSVar] = '<';
		    AnyPartialOrder = true;

		    VERBOSE(2)
		        printf("\tNote %s<%s\n", Variable[RHSVar]->Name,
                                                 Variable[LHSVar]->Name);
		}
		else if(OKInc)
		{
		    for ( Scan = TrainingSet ; OKInc&& *Scan ; Scan++ )
		    {
		        if(MissingValueEncountered)
			{
			    OKInc = ( ((RHSC=(*Scan)[RHSVar])==MISSING_DISC)||
				      ((LHSC=(*Scan)[LHSVar])==MISSING_DISC)||
				      ((RHSSeq = Seq[RHSC])&&
				       (RHSSeq > Seq[LHSC])) );
			}
			else
			{
			    OKInc = (RHSSeq = Seq[(*Scan)[RHSVar]])
			            && (RHSSeq > Seq[(*Scan)[LHSVar]]);
			}
		    }
		    if ( OKInc )
		    {
		        PartialOrder[RHSVar][LHSVar] = '>';
			AnyPartialOrder = true;

		        VERBOSE(2)
		            printf("\tNote %s>%s\n", Variable[RHSVar]->Name,
				   Variable[LHSVar]->Name);
		    }
		}
	    }
	}
    }
}



    /*  Rebuild a training set by applying the literals in a clause
	to the copy of the training set  */


    RecoverTrainingSet(C)
/*  ------------------  */
    Clause C;
{
    int i;

    if(TrainingSet!=CopyTrainingSet)
    {
        Discard(TrainingSet, true);
	TrainingSet = CopyTrainingSet;
    }
    BaseInfo = InitialBaseInfo;
    MaxVar = Target->Arity;
    WeakLiterals = 0;

    memset(PartialOrder, '#', (MAXARITY+1)*(MAXARITY+1));
    ForEach(i,1,MAXARITY) PartialOrder[i][i] = '=';
    AnyPartialOrder = false;

    while(NRecLitCl>NRecLitDef)
        free(RecLitCl[NRecLitCl--]);

    while ( *C )
    {
	NewSize = 0;
	if((*C)->Rel==Target) /* Recursive Literal */
	{
	    DiscoverPartialOrders();
	    AddOrderingsDueTo((*C));
	}
	NewTrainingSet((*C)->Rel, (*C)->Sign, (*C)->Args);
	C++;
    }
}



	/*  Generate in Case the next constant tuple taking note of type
	    constraints */

Tuple NextConstTuple(R, Case)
/*    --------------  */
    Relation R;
    Tuple Case;
{
    int i, v;
    TypeInfo T;

    if ( ! Case )
    {
        Case = (Tuple) pmalloc((R->Arity+1) * sizeof(Const));

	ForEach(i, 1, R->Arity)
	{
	    Case[i] = R->TypeRef[i]->Value[0];
	}
    }
    else
    {
        i = R->Arity;
	T = R->TypeRef[i];

	while ( Case[i] == T->Value[T->NValues-1] )
	{
	    if ( i <= 1 )
	    {
	        free(Case);
		return Nil;
	    }

	    Case[i] = T->Value[0];

	    T = R->TypeRef[--i];
	}

	for ( v = 1; Case[i] != T->Value[v-1] ; v++ )
	    ;
	Case[i] = T->Value[v];
    }
    return Case;
}

/*****************************************************************************/
/*                                                                           */
/*  CheckOriginalCaseCover() - Determines OrigPosCov and OrigTotCov, the     */
/*                             number of positive (and pos/neg) cases from   */
/*                             the initial training set covered by the       */
/*                             current training set.                         */
/*                                                                           */
/*****************************************************************************/

CheckOriginalCaseCover()

{
    Tuple *TSP, Case;

    OrigPosCov = OrigTotCov = 0;
    Bits = (char *) pcalloc(InitialTot,sizeof(char));

    for ( TSP = TrainingSet ; Case = *TSP++ ; )
    {
        if ( ! TestBit(Case[0]&Mask, TrueBit) )
	{
            SetBit(Case[0]&Mask, TrueBit);
	    OrigTotCov++;
	    if ( Positive(Case) ) OrigPosCov++;
	}
    }
}

/*****************************************************************************/
/*                                                                           */
/*  RandomTuple - generate a random tuple satisfying type constraints for    */
/*                relation R                                                 */
/*                                                                           */
/*****************************************************************************/



Tuple RandomTuple(R)
/*    -----------  */
    Relation R;

{
    int i, v;
    TypeInfo T;
    Tuple Result;
    double drand48();

    Result = (Tuple) pmalloc((R->Arity+1) * sizeof(Const));

    ForEach(i, 1, R->Arity)
    {
        T = R->TypeRef[i];
	v = (int)(T->NValues*drand48());
	Result[i] = T->Value[v];
    }
    return Result;
}

