 /***************************************************************************/
 /*                                                                         */
 /*      Copyright (C) 1991, 1992  Daniel Sleator and Davy Temperley        */
 /*  See file "README" for information about commercial use of this system  */
 /*                                                                         */
 /***************************************************************************/

#include "header.c"

/* This file contains the exhaustive search algorithm. */

int match(Connector *a, Connector *b) {
/* 
   Returns TRUE if s and t match according to the connector matching
   rules.  The connector strings must be properly formed, starting with
   zero or more upper case letters, followed by some other letters, and
   The algorithm is symmetric with respect to a and b.

   It works as follows:  The labels must match.  The priorities must be
   compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
   The sequence of upper case letters must match exactly.  After these comes
   a sequence of lower case letters "*"s or "^"s.  The matching algorithm
   is different depending on which of the two priority cases is being
   considered.  See the comments below.
*/
    char *s, *t;
    int x, y;
    if (a->label != b->label) return FALSE;
    x = a->priority;
    y = b->priority;

    s = a->string;
    t = b->string;
    
    while(isupper(*s) || isupper(*t)) {
	if (*s != *t) return FALSE;
	s++;
	t++;
    }

    if ((x==THIN_priority) && (y==THIN_priority)) {
	/*
	   Remember that "*" matches anything, and "^" matches nothing
	   (except "*").  Otherwise two characters match if and only if
	   they're equal.  ("^" can be used in the dictionary just like
	   any other connector.)
	*/
	while ((*s!='\0') && (*t!='\0')) {
	    if ((*s == '*') || (*t == '*') ||
		((*s == *t) && (*s != '^'))) {
		s++;
		t++;
	    } else return FALSE;
	}
	return TRUE;
    } else if ((x==UP_priority) && (y==DOWN_priority)) {
	/*
	   As you go up (namely from x to y) the set of strings that
	   match (in the normal THIN sense above) should get no larger.
	   Read the comment in and.c to understand this.
	   In other words, the y string (t) must be weaker (or at least
	   no stronger) that the x string (s).

	   This code is only correct if the strings are the same
	   length.  This is currently true, but perhaps for safty
	   this assumption should be removed.
	*/
	while ((*s!='\0') && (*t!='\0')) {
	    if ((*s == *t) || (*s == '*') || (*t == '^')) {
		s++;
		t++;
	    } else return FALSE;
	}
	return TRUE;
    } else if ((y==UP_priority) && (x==DOWN_priority)) {
	while ((*s!='\0') && (*t!='\0')) {
	    if ((*s == *t) || (*t == '*') || (*s == '^')) {
		s++;
		t++;
	    } else return FALSE;
	}
	return TRUE;
    } else return FALSE;
}

typedef struct Table_connector Table_connector;
struct Table_connector {
    short lw, rw;
    Connector * le, * re;
    int count;
    Table_connector * next;
};

static int table_size;
static Table_connector ** table;

void init_table(void) {
/* A piecewise exponential function determines the size of the hash table.      */
/* Probably should make use of the actual number of disjuncts, rather than just */
/* the number of words                                                          */
    int i;
    if (N_words >= 40) {
	table_size = (1<<16);
    } else if (N_words >= 10) {
	table_size = (1 << (((6*(N_words-10))/30) + 10));
    } else if (N_words >= 4) {
	table_size = (1 << (((6*(N_words-4))/6) + 4));
    } else {
	table_size = (1 << 4);
    }
    table = (Table_connector**) xalloc(table_size * sizeof(Table_connector*));
    for (i=0; i<table_size; i++) {
	table[i] = NULL;
    }
}

int hash(int lw, int rw, Connector *le, Connector *re) {
    int i;
    i = 0;
    
    i = i + (i<<1) + randtable[(lw + i) & (RTSIZE - 1)];
    i = i + (i<<1) + randtable[(rw + i) & (RTSIZE - 1)];
    i = i + (i<<1) + randtable[(((int)le + i) % (table_size+1)) & (RTSIZE - 1)];
    i = i + (i<<1) + randtable[(((int)re + i) % (table_size+1)) & (RTSIZE - 1)];
    return i & (table_size-1);
}

void free_table(void) {
    int i;
    Table_connector *t, *x;

    for (i=0; i<table_size; i++) {
	for(t = table[i]; t!= NULL; t=x) {
	    x = t->next;
	    xfree((char *) t, sizeof(Table_connector));
	}
    }
    xfree((char *) table, table_size * sizeof(Table_connector*));
}

int table_lookup(int lw, int rw, Connector *le, Connector *re) {
/* returns the count for this quadruple if there, -1 otherwise */    
    Table_connector *t;
    N_hash_lookups++;
    work_in_hash_lookups++;
    t = table[hash(lw, rw, le, re)];
    for (; t != NULL; t = t->next) {
	work_in_hash_lookups++;
	if ((t->lw == lw) && (t->rw == rw) &&
	    (t->le == le) && (t->re == re)) return t->count;
    }
    return -1;
}

void table_store(int lw, int rw, Connector *le, Connector *re, int count) {
/* Stores the value in the table.  Assumes it's not already there */
    Table_connector *t, *n;
    int h;
    N_in_table++;

    n = (Table_connector *) xalloc(sizeof(Table_connector));
    n->count = count;
    n->lw = lw; n->rw = rw; n->le = le; n->re = re;
    h = hash(lw, rw, le, re);
    t = table[h];
    n->next = t;
    table[h] = n;
}

void table_update(int lw, int rw, Connector *le, Connector *re, int count) {
/* Stores the value in the table.  Unlike table_store, it assumes it's already there */
    Table_connector *t;
    work_in_hash_lookups++;
    t = table[hash(lw, rw, le, re)];
    for (; t != NULL; t = t->next) {
	work_in_hash_lookups++;
	if ((t->lw == lw) && (t->rw == rw) &&
	    (t->le == le) && (t->re == re)) break;
    }
    assert(t != NULL, "This entry is supposed to be in the table.");
    t->count = count;
}

/*
 The evaluator works as follows:

 Assumes:

   lw and rw are word numbers in the sentence.  The words lw and
   rw are connected together by links outside the range spanned by
   them.

   le is a pointer to a right pointing connector
   of one of the clauses of lw.

   re is a pointer to a left pointing connector
   of one of the clauses of rw.

 Returns != 0  iff:

   There exists a way to complete the links in the region between
   lw and rw using the remainder of the connectors of the lists
   of connectors pointed to by le and re.

   Completing the connection means making sure that all the words
   between lw and rw are connected, no two words are connected
   together more than once, and all of the connectors of the chosen
   clauses are satisfied.  The direct connection between lw and rw
   is considered before this call, therefore don't consider it.

*/

int count(int lw, int rw, Connector *le, Connector *re) {
/* this version uses the power-pruning fast matcher */
/* only two lines changed when this was installed */

    Disjunct * d;
    int leftcount, rightcount, total;
    int i, start_word, end_word;
    int w;
    Match_node * m, *m1;

    i = table_lookup(lw, rw, le, re);
    if (i >= 0) return i;
    if (rw == 1+lw) {
	if ((le == NULL) && (re == NULL)) {
	    table_store(lw, rw, le, re, 1);
	    return 1;
	} else {
	    table_store(lw, rw, le, re, 0);
	    return 0;
	}
    }

    total = 0;
    if (le == NULL) {
	start_word = lw+1;
    } else {
	start_word = le->word;
    }
    if (re == NULL) {
	end_word = rw-1;
    } else {
	end_word = re->word;
    }
	  
    for (w=start_word; w <= end_word; w++) {
	m1 = m = form_match_list(w, le, lw, re, rw); 
	for (; m!=NULL; m=m->next) {
	    d = m->d;
	    parse_cost++;
	    if ( (le != NULL) && (d->left != NULL) && match(le, d->left)) {
		leftcount = count(lw, w, le->next, d->left->next);
		if (le->multi) leftcount += count(lw, w, le, d->left->next);
		if (d->left->multi)leftcount+=count(lw, w, le->next, d->left);
		if (le->multi && d->left->multi)
		  leftcount += count(lw, w, le, d->left);
	    } else {
		leftcount = 0;
	    }
	    if ((d->right != NULL) && (re != NULL) && match(d->right, re)) {
		rightcount = count(w, rw, d->right->next,re->next);
		if (d->right->multi)rightcount+=count(w,rw,d->right,re->next);
		if (re->multi) rightcount+= count(w, rw, d->right->next, re);
		if (d->right->multi && re->multi)
		  rightcount+= count(w, rw, d->right, re);
	    } else {
		rightcount = 0;
	    }
	    total += leftcount * rightcount; 
	            /* the number of ways to link w to both lw and rw */

	    if (leftcount > 0) {
		total +=  leftcount * count(w, rw, d->right, re);
		/* link on the left and not on the right */
	    }
	    if ((le == NULL) && (rightcount > 0)) {
		total += rightcount * count(lw, w, le, d->left);
		/* link on the right and not on the left */
	    }
	}
	put_match_list(m1);
    }
    table_store(lw, rw, le, re, total);
    return total;
}

int parse(void) {
/*
   Returns the number of ways the sentence can be parsed.
   Assumes that the hash table has already been initialized, and
   is freed later.
*/
    Disjunct * dis;
    int total;
    total = 0;

    for (dis = sentence[0].d; dis != NULL; dis = dis->next) {
	if (dis->left == NULL) {
	    total += count(0, N_words, dis->right, NULL);
	}
    }
    return total;
}

/*

  CONJUNCTION PRUNING.

  The basic idea is this.  Before creating the fat disjuncts,
  we run a modified version of the exhaustive search procedure.
  It's purpose is to mark the disjuncts that can be used in any
  linkage.  It's just like the normal exhaustive search, except that
  if a subrange of words are deletable, then we treat them as though
  they were not even there.  So, if we call the function in the
  situation where the set of words between the left and right one
  are deletable, and the left and right connector pointers
  are NULL, then that range is considered to have a solution.

  There are actually two procedures to implement this.  One is
  mark_region() and the other is region_valid().  The latter just
  checks to see if the given region can be completed (within it).
  The former actually marks those disjuncts that can be used in
  any valid linkage of the given region.  The process.

  As in the standard search procedure, we make use of the fast-match
  data structure (which requires power pruning to have been done), and
  we also use a hash table.  The table is used differently in this case.
  The meaning of values stored in the table are as follows:

  -1  Nothing known (Actually, this is not stored.  It's returned
      by table_lookup when nothing is known.)
   0  This region can't be completed (marking is therefore irrelevant)
   1  This region can be completed, but it's not yet marked
   2  This region can be completed, and it's been marked.
*/  

int region_valid(int lw, int rw, Connector *le, Connector *re) {
/* Returns 0 if this range cannot be successfully filled in with           */
/* links.  Returns 1 if it can, and it's not been marked, and returns      */
/* 2 if it can and it has been marked.                                     */

    Disjunct * d;
    int left_valid, right_valid, found;
    int i, start_word, end_word;
    int w;
    Match_node * m, *m1;
    
    i = table_lookup(lw, rw, le, re);
    if (i >= 0) return i;

    if ((le == NULL) && (re == NULL) && deletable[lw][rw]) {
	table_store(lw, rw, le, re, 2);
	return 2;
    }

    if (le == NULL) {
	start_word = lw+1;
    } else {
	start_word = le->word;
    }
    if (re == NULL) {
	end_word = rw-1;
    } else {
	end_word = re->word;
    }

    found = 0;
	  
    for (w=start_word; w <= end_word; w++) {
	m1 = m = form_match_list(w, le, lw, re, rw); 
	for (; m!=NULL; m=m->next) {
	    d = m->d;
	    mark_cost++;
	    /* in the following expressions we use the fact that 0=FALSE. Could eliminate
	       by always saying "region_valid(...) != 0"  */
	    left_valid = (((le != NULL) && (d->left != NULL) && prune_match(le, d->left)) &&
		((region_valid(lw, w, le->next, d->left->next)) ||
		 ((le->multi) && region_valid(lw, w, le, d->left->next)) ||
		 ((d->left->multi) && region_valid(lw, w, le->next, d->left)) ||
		 ((le->multi && d->left->multi) && region_valid(lw, w, le, d->left))));
	    if (left_valid && region_valid(w, rw, d->right, re)) {
		found = 1;
		break;
	    }
	    right_valid = (((d->right != NULL) && (re != NULL) && prune_match(d->right, re)) &&
	      ((region_valid(w, rw, d->right->next,re->next))    ||
	       ((d->right->multi) && region_valid(w,rw,d->right,re->next))  ||
	       ((re->multi) && region_valid(w, rw, d->right->next, re))  ||
	       ((d->right->multi && re->multi) && region_valid(w, rw, d->right, re))));
	    if ((left_valid && right_valid) || (right_valid && region_valid(lw, w, le, d->left))) {
		found = 1;
		break;
	    }
	}
	put_match_list(m1);
	if (found != 0) break;
    }
    table_store(lw, rw, le, re, found);
    return found;
}

void mark_region(int lw, int rw, Connector *le, Connector *re) {
/* Mark as useful all disjuncts involved in some way to complete the structure  */
/* within the current region.  Note that only disjuncts strictly between        */
/* lw and rw will be marked.  If it so happens that this region itself is not   */
/* valid, then this fact will be recorded in the table, and nothing else happens*/    

    Disjunct * d;
    int left_valid, right_valid, i;
    int start_word, end_word;
    int w;
    Match_node * m, *m1;

    i = region_valid(lw, rw, le, re);
    if ((i==0) || (i==2)) return;
    /* we only reach this point if it's a valid unmarked region, i=1 */
    table_update(lw, rw, le, re, 2);

    if (le == NULL) {
	start_word = lw+1;
    } else {
	start_word = le->word;
    }
    if (re == NULL) {
	end_word = rw-1;
    } else {
	end_word = re->word;
    }
	  
    for (w=start_word; w <= end_word; w++) {
	m1 = m = form_match_list(w, le, lw, re, rw); 
	for (; m!=NULL; m=m->next) {
	    d = m->d;
	    mark_cost++;
	    left_valid = (((le != NULL) && (d->left != NULL) && prune_match(le, d->left)) &&
	        ((region_valid(lw, w, le->next, d->left->next)) ||
		 ((le->multi) && region_valid(lw, w, le, d->left->next)) ||
		 ((d->left->multi) && region_valid(lw, w, le->next, d->left)) ||
		 ((le->multi && d->left->multi) && region_valid(lw, w, le, d->left))));
	    right_valid = (((d->right != NULL) && (re != NULL) && prune_match(d->right, re)) &&
	        ((region_valid(w, rw, d->right->next,re->next)) ||
		 ((d->right->multi) && region_valid(w,rw,d->right,re->next))  ||
		 ((re->multi) && region_valid(w, rw, d->right->next, re)) ||
		 ((d->right->multi && re->multi) && region_valid(w, rw, d->right, re))));

	    /* The following if statements could be restructured to avoid superfluous calls
	       to mark_region.  It didn't seem a high priority, so I didn't optimize this.
	    */

	    if (left_valid && region_valid(w, rw, d->right, re)) {
		d->marked = TRUE;
		mark_region(w, rw, d->right, re);
		mark_region(lw, w, le->next, d->left->next);
		if (le->multi) mark_region(lw, w, le, d->left->next);
		if (d->left->multi) mark_region(lw, w, le->next, d->left);
		if (le->multi && d->left->multi) mark_region(lw, w, le, d->left);
	    }

	    if (right_valid && region_valid(lw, w, le, d->left)) {
		d->marked = TRUE;
		mark_region(lw, w, le, d->left);
		mark_region(w, rw, d->right->next,re->next);
	        if (d->right->multi) mark_region(w,rw,d->right,re->next);
	        if (re->multi) mark_region(w, rw, d->right->next, re);
	        if (d->right->multi && re->multi) mark_region(w, rw, d->right, re);
	    }

	    if (left_valid && right_valid) {
		d->marked = TRUE;
		mark_region(lw, w, le->next, d->left->next);
		if (le->multi) mark_region(lw, w, le, d->left->next);
		if (d->left->multi) mark_region(lw, w, le->next, d->left);
		if (le->multi && d->left->multi) mark_region(lw, w, le, d->left);
		mark_region(w, rw, d->right->next,re->next);
	        if (d->right->multi) mark_region(w,rw,d->right,re->next);
	        if (re->multi) mark_region(w, rw, d->right->next, re);
	        if (d->right->multi && re->multi) mark_region(w, rw, d->right, re);
	    }
	}
	put_match_list(m1);
    }
}

void conjunction_prune(void) {
/*
  We've already built the sentence disjuncts, and we've pruned them
  and power_pruned(GENTLE) them also.  The sentence contains a
  conjunction.  deletable[][] has been initialized to indicate the
  ranges which may be deleted in the final linkage.

  This routine deletes irrelevant disjuncts.  It finds them by first
  marking them all as irrelevant, and then marking the ones that
  might be useable.  Finally, the unmarked ones are removed.

*/
    Disjunct * d, * d_head, * dx;
    int w;

    /* we begin by unmarking all disjuncts.  This would not be necessary if
       whenever we created a disjunct we cleared its marked field.
       I didn't want to search the program for all such places, so
       I did this way.
    */
    for (w=0; w<N_words; w++) {
	for (d=sentence[w].d; d != NULL; d=d->next) {
	    d->marked = FALSE;
	}
    }

    init_fast_matcher();
    init_table();

    for (d = sentence[0].d; d != NULL; d = d->next) {
	if ((d->left == NULL) && region_valid(0, N_words, d->right, NULL)) {
	    mark_region(0, N_words, d->right, NULL);
	    d->marked = TRUE;
	}
    }

    for (w=0; w<N_words; w++) {
	d_head = NULL;
	for (d=sentence[w].d; d != NULL; d=dx) {
	    dx = d->next;
	    if (d->marked) {
		d->next = d_head;
		d_head = d;
	    } else {
		d->next = NULL;
		free_disjuncts(d);
	    }
	}
	sentence[w].d = d_head;
    }

    free_fast_matcher();
    free_table();    
}
