#include  "defns.i"
#include  "extern.i"

static int ExtraRel = 0;
static int OCTot;
static int OCPos;

/*****************************************************************************/
/*                                                                           */
/* FindOriginalCovers(V): Create int array of number of times original tuples*/
/*                        are covered by current training set, excluding     */
/*                        instances with missing values of the variable V    */
/*                                                                           */
/*****************************************************************************/

    FindOriginalCovers(V)
/*  ------------------                */

VarInfo V;

{
    int i, N;
    Tuple Case;
    Tuples TSOrder;


    memset(OriginalCovers,0,InitialTot*sizeof(int));
    N = V->Tot;
    TSOrder = V->TupleOrder;

    OCTot = OCPos = 0;

    for (i = 0; i < N ; i++)
    {
        Case = TSOrder[i];
	if(!OriginalCovers[Case[0]&Mask]++)
	{
	    OCTot++;
	    if(Positive(Case))OCPos++;
	}
    }

}

/*****************************************************************************/
/*                                                                           */
/* Threshold(V,): Find best threshold for variable V, and enter as possible  */
/*                literal if gain justifies this, creating relation for the  */
/*                purpose                                                    */
/*                                                                           */
/*****************************************************************************/

   Threshold(V, As, Rs)
/* ---------   */

int V, As, Rs;

{
    int i, il, jl, OrigTupleNumber, CaseIsPositive, NCovered;
    int NowFTot=0, NowFPos=0, OrigFTot=0, OrigFPos=0;
    int NowTTot, NowTPos, OrigTTot, OrigTPos;
    int BestNTT, BestNTP, BestOTT, BestOTP, BestNFT, BestNFP, BestOFT, BestOFP;
    int BestThresholdIndex;
    int DiffValues = 0;
    Boolean BestSign, WeakPos, WeakNeg, CompactClT, CompactClF;
    float LitBits, MinUsefulGain, PosGain, NegGain, BestGain=0.0;

    Tuple Case;
    Tuples TSOrder;
    TuplesFP TSOrderFP;

    ConstFP t;

    Relation R;
    char A[2], *VName;

    Literal L;
    VarInfo Vl;

    long clock();


    VName = Variable[V]->Name;

    FindOriginalCovers(Variable[V]);
    memcpy(NowCovers,OriginalCovers,InitialTot*sizeof(int)); 

    TSOrder = Variable[V]->TupleOrder;
    TSOrderFP = (TuplesFP) TSOrder;

    NowTTot=Variable[V]->Tot; 
    NowTPos=Variable[V]->Pos;
    OrigTTot=OCTot;
    OrigTPos=OCPos;

    if(UNIFORMCODING)
    {
        LitBits = CostOfLit(1,RelnArgPairs,NLit);
    }
    else
    {
        LitBits = CostOfLit(Rs,As,NLit);
    }

    if ( LitBits > AvailableBits )
    {
        VERBOSE(1)
            printf("\tThresholding on %s requires %.1f bits\n", 
	           VName,LitBits);

        return;
    }

    /* Now find best threshold by stepping up the list of tuples ordered
       by the value of variable V, evaluating the potential gain for each
       possible threshold, keeping the best. The notation used re positive
       and negative tuples being in/out is similar to that in ComputeGain */

    for ( i = 0 ; 
          (i < Variable[V]->Tot-1)&&(BestGain<MaxPossibleGain); 
           i++ )
    {
	t = TSOrderFP[i][V];

        NowFTot++;
	NowTTot--;
        Case = TSOrder[i];
	CaseIsPositive = Positive(Case);
        if ( CaseIsPositive )
	{
	    NowFPos++;
	    NowTPos--;
	}
	OrigTupleNumber=Case[0]&Mask;
	if ( OriginalCovers[OrigTupleNumber] == NowCovers[OrigTupleNumber]) 
	{
	    OrigFTot++;
	    if (CaseIsPositive) OrigFPos++;
	}
	if (NowCovers[OrigTupleNumber]-- == 1) 
	{
	    OrigTTot--;
	    if (CaseIsPositive) OrigTPos--;
	}

	/* Proceed to consider next tuple in order if of same V value */
	if(Case[V]==TSOrder[i+1][V]) continue;

	DiffValues++;

	PosGain = Worth(NowTPos, NowTPos, NowTTot, 0);
	NegGain = Worth(NowFPos, NowFPos, NowFTot, 0);

	WeakPos = PosGain < 0.001 || NowFTot <= 0;

	WeakNeg = NegGain < 0.001 || NowFTot >= Tot;

	/*  Weak literal sequence check  */

	if ( WeakPos && WeakNeg && WeakLiterals >= PATIENCE )
	{
	    VERBOSE(2)
	    {
	        printf("\t");
		printf("%s>%g", VName, t);
		printf("\ttoo many weak literals\n");
	    }
	    continue;
	}

	/* Encoding length check */
	if ( ( (PosGain > NegGain && (NCovered = OrigTPos)) ||
	       (NegGain > 0       && (NCovered = OrigFPos)) ) &&
	     (Except(CycleTot, NCovered) < UsedSoFar + LitBits ) )
	{
	    VERBOSE(2)
	    {
	        printf("\t");
                printf("%s>%g", VName, t);
		printf("\tTrue %d, False %d, Covers %d: coding violation\n",
		        NowTPos, NowFPos, NCovered);
	    }
	    continue;
	}

	/* Check compact clause cover situation */

	CompactClT = ( ((OrigTPos==OrigTTot) && (OrigTPos>CompactClCover)) &&
		      (Except(CycleTot, OrigTPos) > UsedSoFar + LitBits) );
	CompactClF = ( ((OrigFPos==OrigFTot) && (OrigFPos>CompactClCover)) &&
		      (Except(CycleTot, OrigFPos) > UsedSoFar + LitBits) );

	if(CompactClT&&CompactClF) /* Possible due to missing values */
	{
	    if(OrigFPos>OrigTPos)
	        CompactClT = false;
	    else
	        CompactClF = false;
	}

	if(CompactClT||CompactClF)
	{
	    CompactClause = (Clause) pmalloc((NLit+2) * sizeof(Literal));
	    if(NLit)
	    {
	        for(il=0;il<NLit;il++)
		{
		    L = (Literal) pmalloc(sizeof(struct _lit_rec));
		    memcpy(L,NewClause[il],sizeof(struct _lit_rec));
		    CompactClause[il] = L;
		}
	    }

	    
	    t = TSOrderFP[i][V];

	    R = ThresholdRelation(V,t);

	    /* Now add literal to compact clause */

	    L = (Literal) pmalloc(sizeof(struct _lit_rec));
	    L->Rel  = R;
	    L->Sign = CompactClT;
	    L->Bits = LitBits;
	    L->Args = (Vars) pmalloc(2);
	    L->Args[1] = V;
	    L->FloatingDet = false;

	    /* Need to unfloat associated determinate literals if any */

	    Vl = Variable[V];
	    for(jl=0;jl<Vl->DetDeps;jl++)
	    {
	        CompactClause[Vl->DetLits[jl]]->FloatingDet = false;
	    }
	    
	    CompactClause[NLit] = L;
	    CompactClause[NLit+1] = Nil;
	    
	    CompactClCover = CompactClT ? OrigTPos : OrigFPos;

	    CompactClNLit = NLit+1;

	    VERBOSE(1)
	    {
		printf("Best clause so far, covering %d\n\t",
		       CompactClCover);
		PrintClause(Target, CompactClause);
	    }
	}

	VERBOSE(2)
	{
	    printf("\t");
	    printf("%s>%g", VName, t);
	    printf("\tTrue %d[%d,%d]: gain %.2f", NowTPos, NowTPos, 
		   NowTTot, PosGain);
            printf(";  False %d,%d: gain %.2f\n", NowFPos, NowFTot, NegGain);
	}

	if(PosGain>NegGain)
	{
	    if(PosGain>BestGain)
	    {
	        BestGain = PosGain;
	        BestSign = true;
		BestThresholdIndex = i;
		BestNTT = NowTTot;
		BestNTP = NowTPos;
		BestOTT = OrigTTot;
		BestOTP = OrigTPos;
		BestNFT = NowFTot;
		BestNFP = NowFPos;
		BestOFT = OrigFTot;
		BestOFP = OrigFPos;
	    }
	}
	else
	{
	    if(NegGain>BestGain)
	    {
 	        BestGain = NegGain;
	        BestSign = false;
		BestThresholdIndex = i;
                BestNTT = NowTTot;
                BestNTP = NowTPos;
                BestOTT = OrigTTot;
                BestOTP = OrigTPos;
                BestNFT = NowFTot;
                BestNFP = NowFPos;
                BestOFT = OrigFTot;
                BestOFP = OrigFPos;
	    }
	}
    }

    /* Is Gain for Best Threshold Good Enough To Propose The Literal ? */

    MinUsefulGain = NPossible < MAXPOSSLIT ? MINALTFRAC * BestLitGain :
                    Max(Possible[MAXPOSSLIT]->Gain, MINALTFRAC * BestLitGain);

    if ( ((BestGain>0.001)||( WeakLiterals<PATIENCE && BestGain>0.0 ))
       &&(Except(CycleTot, BestSign?BestOTP:BestOFP)>UsedSoFar+LitBits)
       &&(BestGain>MinUsefulGain) )
    {

        /* Then Propose a Literal with the Relation */

        t = TSOrderFP[BestThresholdIndex][V];

	R = ThresholdRelation(V,t);

        A[1]=V;
        if(BestSign)
	{
            ProposeLiteral(R, true, A, BestNTT,  LitBits, BestOTP, BestOTT, 
                           BestGain);
	}
        else
	{
            ProposeLiteral(R, false, A, BestNFT, LitBits, BestOFP, BestOFT, 
                           BestGain);
	}
    }


    VERBOSE(1)
    {
        printf("\t\t\t\t[%s> tried %d / %d, %.1f secs]\n",
                VName, DiffValues,
	        Variable[V]->PossValues-1, clock() / 1.0E6);
    }
}


/*****************************************************************************/
/*                                                                           */
/* MissingValue(R,A,T) : Is there a missing value in Tuple T when applying   */
/*                       relation R with arguments A ?                       */
/*                                                                           */
/*****************************************************************************/

Boolean MissingValue(R,A,T)
/*      ------------       */

Relation R;
Vars A;
Tuple T;

{
    int i;
    TupleFP TFP;

    TFP = (TupleFP) T;

    ForEach(i,1,R->Arity)
    {
        if(R==SAMEVAR)
	{
	    if(T[A[i]]==MISSING_DISC) return true;
	}
	else if(R==CONTGT)
	{
	    if(TFP[A[i]]==MISSING_FP) return true;
	}
        else if(R->TypeRef[i]->Continuous)
	{
	    if(TFP[A[i]]==MISSING_FP) return true;
	}
        else if(T[A[i]]==MISSING_DISC) return true;
    }
    return false;
}

/*****************************************************************************/
/*                                                                           */
/* UpdateTupleOrders() - For each continuous variable, order the tuples on   */
/*                       that variable, and record info re number of tuples, */
/*                       number of positive tuples, number of values etc.    */
/*                                                                           */
/*****************************************************************************/

UpdateTupleOrders()

{
    Tuple *TSP, Case;
    TupleFP *TSPFP, CaseFP, *TTFP;
    VarInfo V;
    Const last_value;

    int i, j, N;

    N = Number(TrainingSet); /* Find number of tuples */

    ForEach(i,1,MaxVar)
    {
        V = Variable[i];
	if(!V->Continuous) continue; /* Skip non continuous variables */

	/* Copy the tuples for sorting on value - exclude missing values */

	TTFP = (TuplesFP) pmalloc((N+1)*sizeof(TupleFP));

	j = 0;

	for( TSPFP = (TuplesFP) TrainingSet; CaseFP = *TSPFP++; )
	{
	    if(CaseFP[i]==MISSING_FP) continue; /* Skip missing values */

	    TTFP[j++] = CaseFP;
	}
	TTFP[j] = Nil;

	/* Initialise the assorted counts */

	V->Tot = 0;
	V->Pos = 0;
	V->PossValues = 0;

	if(j) /* Are there tuples to check? */
	{
	    /* Order the tuples on the ith value */
	    SortOnTupleElement(TTFP,i,0,(j-1));

	    /* Count */

	    TSP = (Tuples) TTFP;

	    last_value = (*TSP)[i];
	    last_value++; /* initialise last_value */

	    while(Case = *TSP++)
	    {
		V->Tot++;
		if(Positive(Case)) V->Pos++;

		if(last_value!=Case[i]) V->PossValues++;
		last_value = Case[i];
	    }
	}

	if(V->TupleOrder) free(V->TupleOrder);
	V->TupleOrder = (Tuples) TTFP;
    }

    return;
}

/*****************************************************************************/
/*                                                                           */
/* SortOnTupleElement(T,M,left,right) - sort the tuples T between left and   */
/*                                      right according to Mth value         */
/*                                                                           */
/*****************************************************************************/


SortOnTupleElement(T,M,left,right)

TuplesFP T;
int M, left, right;

{
    register int i, last, first, swap;
    register TupleFP temp;
    register ConstFP comp;

    if (left>=right) return;

    temp = T[left];
    T[left] = T[swap=(left+right)/2];
    T[swap] = temp;

    last = left;

    comp = T[left][M];

    for ( i = left + 1; i <= right; i++ )
    {
        if( T[i][M]<=comp )
	{
	    temp = T[++last];
	    T[last] = T[i];
	    T[i] = temp;
	}
    }

    temp = T[left];
    T[left] = T[last];
    T[last] = temp;

    first = last;

    for ( i = last - 1; i>=left ; i-- )
    {
        if( T[i][M]==comp )
	{
	    temp = T[--first];
	    T[first] = T[i];
	    T[i] = temp;
	}
    }


    SortOnTupleElement(T, M, left, first-1);
    SortOnTupleElement(T, M, last+1, right);
}

/*****************************************************************************/
/*                                                                           */
/*  ThresholdRelation(V,t) - create a threshold relation V>t                 */
/*                                                                           */
/*****************************************************************************/

Relation ThresholdRelation(V,t)

int V;
ConstFP t;

{
    Relation R;
    int T;

    R = (Relation) pmalloc(sizeof(struct _rel_rec));
    R->Arity = 1;
    R->ConstantCmp = 2;
    R->TheConstant = t;

    R->Type = (int*) pmalloc(2*sizeof(int));
    R->Type[0] = 0;
    R->TypeRef =  (TypeInfo*) pmalloc(2*sizeof(TypeInfo));
    R->TypeRef[0] = Nil;
    R->Pos = (Tuple *) pmalloc(sizeof(Tuple));
    R->Pos[0] = Nil;
    R->Neg = Nil;
    R->BinSym = false;

    T = Variable[V]->Type;
    R->Type[1] = T;
    R->TypeRef[1] = Type[T];
    Name[0] = '*' ;
    Name[1] = '>' ;
    Name[2] = '\0';

    if ( (MaxRel + ++ExtraRel) % 100 == 0 )
    {
        Reln = (Relation *) prealloc(Reln, 
                                       (MaxRel+ExtraRel+100)*sizeof(Relation));
    }
    Reln[MaxRel + ExtraRel] = R;

    return R;
}


