#include  "defns.i"
#include  "extern.i"


    /*  Compute the gain from literals of the form R(A) or ~R(A).
	If better than the best so far, save.

	NB: The various counts have components with the following meanings:
		Pos	plus tuples
		Tot	all tuples
		T	obtained from R(A)
		F	obtained using ~R(A)
		Now	number of current tuples
		Orig	number of tuples in original training set
		New	referring to training set if use R(A) or ~R(A)  */

    /*  This version uses the same kind of pruning cutoffs as args.c  */


float  ComputeGain(R, A, LitBits, CanUse)
/*     -----------  */
    Relation R;
    Vars A;
    float LitBits;
    Boolean CanUse;
{
    int Instances = 1,				/* ground tuples when all vars bound */
	TInstances, FInstances,			/* no of them in/out wrt relation */
	TOInstances,				/* all non-F instances */
	NowTPos = 0, TPos = 0, TTot = 0,	/* params if use R(A) as literal */
	NowFPos = 0, NowFNeg = 0, NowFTot,	/* ditto if use ~R(A) */
	OrigTPos = 0, OrigFPos = 0,		/* no original pos tuples that would pass */
	OrigTTot = 0, OrigFTot = 0,		/* no original tuples that would pass */
	Best, N, i, j, UnboundVars = 0, Number();
    Var V, W,  Unbound[MAXARITY+1], New[MAXARITY*MAXARITY], Previous[MAXARITY*MAXARITY];
    int Col, PossibleDuplicateVars = 0;
    Const PreviousVal, FirstBinding[MAXARITY+1];
    Tuple *TSP, Case;
    float Gain, PosGain, NegGain, MinUsefulGain, NowTPosThresh, NowFNegThresh,
	Worth(), Info();
    Boolean NewVar[MAXARITY+1], WeakPos, WeakNeg,  RuleOutT, RuleOutF,
	Determinate, IdenticalBindings, OK;

    /*  Table of constraints:

	TInstances + FInstances + OptInstances = Instances
	NewFTot/Pos = NowFTot/Pos
	NowTPos + NowFPos = Pos		*/

    N = R->Arity;

    /*  Compute number of ground tuples corresponding to args A  */

    memset(NewVar, true, MAXARITY+1);

    ForEach(i, 1, N)
    {
	if ( (V = A[i]) > MaxVar && NewVar[V] )
	{
	    Unbound[++UnboundVars] = i;
	    Instances *= Type[R->Type[i]]->NValues;
	    NewVar[V] = false;

	    ForEach(W, 1, MaxVar)
	    {
		New[PossibleDuplicateVars] = i;
		Previous[PossibleDuplicateVars++] = W;
	    }
	}
    }

    ClearBits;

    IdenticalBindings = Determinate = DETERMINATE && UnboundVars > 0;

    /*  The minimum gain that would be of interest is just enough to give
	a literal a chance to be saved by the backup procedure or, if
	there are determinate literals, to reach the required fraction
	of the maximum possible gain  */

    RuleOutT = false;
    RuleOutF = ! NEGLITERALS;

    MinUsefulGain = NPossible < MAXPOSSLIT ? MINALTFRAC * BestLitGain :
			Max(Possible[MAXPOSSLIT]->Gain, MINALTFRAC * BestLitGain);

    if ( NDeterminate && MinUsefulGain < DETERMINATE * MaxPossibleGain )
    {
	MinUsefulGain = DETERMINATE * MaxPossibleGain;
    }

    NowTPosThresh = MinUsefulGain / BaseInfo - 0.001;
    NowFNegThresh = (Pos + 1) * (pow(2.0, BaseInfo - MinUsefulGain / Pos) - 1) + 0.001;

    TSP = TrainingSet;
    while ( Case = *TSP++ )
    {
	if ( Join(R->Pos, R->PosIndex, A, Case, N, false) )
	{
	    TInstances = NFound;

	    if ( ! TTot )
	    {
		memcpy(FirstBinding, Found[0], (N+1) * sizeof(Const));
	    }
	    else
	    for ( i = 1 ; IdenticalBindings && i <= UnboundVars ; i++ )
	    {
		V = Unbound[i];
		IdenticalBindings = FirstBinding[V] == Found[0][V];
	    }

	    for ( i = 0 ; Positive(Case) && i < PossibleDuplicateVars ; )
	    {
		Col = New[i];
		PreviousVal = Case[Previous[i]];
		OK = true;

		for ( j = 0 ; OK && j < NFound ; j++ )
		{
		    OK = Found[j][Col] == PreviousVal;
		}

		if ( OK )
		{
		    i++;
		}
		else
		{
		    PossibleDuplicateVars--;
		    for ( j = i ; j < PossibleDuplicateVars ; j++ )
		    {
			New[j] = New[j+1];
			Previous[j] = Previous[j+1];
		    }
		}
	    }
	}
	else
	{
	    TInstances = 0;
	}

	/*if ( R->Neg && ( USEOPT || ! TInstances ) )*/
	if ( USEOPT && R->Neg && ! TInstances )
	{
	    Join(R->Neg, R->NegIndex, A, Case, N, false);
	    FInstances = NFound;
	}
	else
	{
	    FInstances = Instances - TInstances;
	}

	TOInstances = ( USEOPT ? Instances - FInstances : TInstances );

	Determinate &= ( Positive(Case) ? TInstances == 1 : TInstances <= 1 );

	if ( TOInstances )
	{
	    TTot += TOInstances;
	    if ( Positive(Case) )
	    {
		NowTPos++;
		TPos += TOInstances;
	    }

	    if ( ! TestBit(Case[0]&Mask, TrueBit) )
	    {
		SetBit(Case[0]&Mask, TrueBit);
		OrigTTot++;
		if ( Positive(Case) ) OrigTPos++;
	    }
	}

	if ( ! TInstances )
	{
	    if ( Positive(Case) )
	    {
		NowFPos++;

		if ( Pos - NowFPos < NowTPosThresh && ! RuleOutT )
		{
		    RuleOutT = true;
		    if ( RuleOutF )
		    {
			VERBOSE(2)
			{
			    printf("\t");
			    PrintLiteral(R, true, A);
			    printf("\tTrue pos <= %d -- abandoned\n", Pos - NowFPos);
			}

			return 0.0;
		    }
		}
	    }
	    else
	    {
		NowFNeg++;

		if ( NowFNeg > NowFNegThresh && ! RuleOutF )
		{
		    RuleOutF = true;
		    if ( RuleOutT )
		    {
			VERBOSE(2)
			{
			    printf("\t");
			    PrintLiteral(R, true, A);
			    printf("\tFalse neg >= %d -- abandoned\n", NowFNeg);
			}

			return 0.0;
		    }
		}
	    }

	    if ( ! TestBit(Case[0]&Mask, FalseBit) )
	    {
		SetBit(Case[0]&Mask, FalseBit);
		OrigFTot++;
		if ( Positive(Case) ) OrigFPos++;
	    }
	}
    }

    NowFTot = NowFPos + NowFNeg;

    PosGain = Worth(NowTPos, TPos, TTot, UnboundVars);
    WeakPos = PosGain < 0.001 || NowFTot <= 0;

    NegGain = NEGLITERALS ? Worth(NowFPos, NowFPos, NowFTot, 0) : 0.0;
    WeakNeg = NegGain < 0.001 || NowFTot >= Tot;

    /*  Weak literal sequence check  */

    if ( ! Determinate && CanUse && WeakPos && WeakNeg && WeakLiterals >= PATIENCE )
    {
	VERBOSE(2)
	{
	    printf("\t");
	    PrintLiteral(R, true, A);
	    printf("\ttoo many weak literals\n");
	}

	return 0.0;
    }

    /*  Encoding length check  */

    if ( CanUse &&
	 ( PosGain > NegGain && (Best = OrigTPos) ||
	   NegGain > 0 && (Best = OrigFPos) ) &&
         Except(CycleTot, Best) < UsedSoFar + LitBits )
    {
	VERBOSE(1)
	{
	    printf("\t");
	    PrintLiteral(R, true, A);
	    printf("\tTrue %d, False %d, Covers %d: coding voilation\n",
		NowTPos, NowFPos, Best);
	}

	return 0.0;
    }

    VERBOSE(1)
    if ( PosGain > 0 || NegGain > 0 || Determinate || Verbosity >= 2 )
    {
	if ( Determinate ) putchar('#');
	if ( IdenticalBindings) putchar('X');
	if ( PossibleDuplicateVars ) printf(" %c=%c", Variable[A[New[0]]], Variable[Previous[0]]);

	printf("\t");
	PrintLiteral(R, true, A);
	printf("\tTrue %d[%d,%d]: gain %.2f", NowTPos, TPos, TTot, PosGain);
	if ( NEGLITERALS )
	{
	    printf(";  False %d,%d: gain %.2f", NowFPos, NowFTot, NegGain);
	}
	newline;
    }

    if ( PossibleDuplicateVars /*||
	 Determinate && IdenticalBindings && TPos < Tot*/ ) return 0.0;

    Gain = Max(PosGain, NegGain);

    if ( CanUse )
    {
	if ( Determinate && ! IdenticalBindings &&
	     Gain < DETERMINATE * MaxPossibleGain &&
	     ProposeDeterminateLiteral(R, A, LitBits, UnboundVars) )
	{
	    return 0.001;
	}

	if ( PosGain > 1E-6 )
	{
	    ProposeLiteral(R, true, A, TTot, LitBits, OrigTPos, OrigTTot, PosGain);
	}

	if ( NegGain > 1E-6 )
	{
	    ProposeLiteral(R, false, A, NowFTot, LitBits, OrigFPos, OrigFTot, NegGain);
	}
    }

    return Gain;
}



    /*  Compute aggregate gain from a test on relation R, tuple T.
	The Basic gain is the number of positive tuples * information
	gained regarding each; but there is a minor adjustment:
	  - a literal that has some positive tuples and no gain,
	    but introduces one or more unbound variables, is
	    given a slight gain  */

float Worth(N, P, T, UV)
/*    -----  */
    int N, P, T, UV;
{
    float G, TG, Info();

    if ( T < MAXTUPLES )
    {
	TG = N * (G = BaseInfo - Info(P, T));

	if ( /*! DETERMINATE &&*/ G < 1E-6 && N && UV )
	{
	    return 0.0009 + UV * 0.0001;  /* very small notional gain */
	}
	else
	{
	    return TG;
	}
    }
    else
    {
	return -0.001;
    }
}



float Info(N, T)
/*    ----  */
    int N, T;
{
    return Log2(T+1) - Log2(N+1);
}
