 /***************************************************************************/
 /*                                                                         */
 /*      Copyright (C) 1991, 1992  Daniel Sleator and Davy Temperley        */
 /*  See file "README" for information about commercial use of this system  */
 /*                                                                         */
 /***************************************************************************/

#include "header.c"
/*
   This file does the post-processing.  The input is a global array
   called "pp_pp_link_array" whose length is the global "N_links".
   The main routine is "post_process()".  It uses the link names only,
   and not the connectors.  (Since this is now taking a significant
   fraction of total parsing time, it should be rewritten to use hashing
   instead of linear search (in the arrays of pointers to strings, see
   below).)
   
   A domain is a set of links.  Each domain has a defining link.
   Only certain types of links serve to define a domain.  These
   parameters are set by the lists of link names below.

   The domains are nested: given two domains, either they're disjoint,
   or one contains the other, i.e. they're tree structured.  The set of links
   in a domain (but in no smaller domain) are called the "group" of the
   domain.  Data structures are built to store all this stuff.
   The tree structured property is not mathematically guaranteed by
   the domain construction algorithm.  Davy simply claims that because
   of how he built the dictionary, the domains will always be so
   structured.  The program checks this and gives an error message
   if it's violated.

   Define the "root word" of a link (or domain) to be the word at the
   left end of the link.  The other end of the defining link is called
   the "right word".
   
   The domain corresponding to a link is defined to be the set of links
   reachable by starting from the right word, following links and never
   using the root word or any word to its left.

   There are some minor exceptions to this.  The "restricted_link" lists
   those connectors that, even if they point back before the root word,
   are included in the domain.  Some of the starting links are included
   in their domain, these are listed in the "domain_contains_links" list.

   Such was the way it was.  Now Davy tells me there should be another type
   of domain that's quite different.  Let's call these "bad" domains.
   Certain type of connectors start bad domains.  They're listed below.
   In a bad domain, the search includes the root word.  It does a separate
   search to find bad domains.

   Restricted links should work just as they do with ordinary domains. If they
   come out of the right word, or anything to the right of it (that's
   in the domain), they should be included but should not be traced
   further. If they come out of the root word, they should not be
   included.
*/   

/*
   I also, unfortunately, want to propose a new type of domain. These
   would include everything that can be reached from the root word of the
   link, to the right, that is closer than the right word of the link.
   (They would not include the link itself.)

   In the following sentence, then, the "Death Domain" of the G link
   would include only the "O" link:

      +-----G----+    
      +---O--+   +-AI+
      |      |   |   |
   hitting dogs is fun.a 

   In the following sentence it would include the "O", the "TT", the "I",
   the second "O", and the "A".

      +----------------G---------------+    
      +-----TT-----+  +-----O-----+    |    
      +---O---+    +-I+    +---A--+    +-AI+
      |       |    |  |    |      |    |   |
   telling people to do stupid things is fun.a 

   This would allow us to judge the following:

           kicking dogs bores me
           *kicking dogs kicks dogs
           explaining the program is easy
           *explaining the program is running

   (These are distinctions that I thought we would never be able to make,
   so I told myself they were semantic rather than syntactic. But with
   domains, they should be easy.)

*/

static char * domain_starter_links[] =
               {"Wi", "WA", "CLb", "CLe", "Cc", "C", "Ct", "Zb", "CLc", 
		"Md", "Wq", "R*x", "Rix", "Wx", "Mv", "Mx", "Mj", "Wj", "Qq",
                "TVn", "TVi", "TVic", "Mg", "EVix", "GIs", "CLg", "CLl",
                "Ss*d", "Ssid", "Ssxd", "Bsd", "ER", "Cce",
                NULL};
  /* these links start a domain */
  /* each of these must be given a name in CDNP below */

static char * bad_domain_starter_links[] = {"TT", "I*j", "Iij", "Ixj",
                "AI*j", "AIij", NULL};
  /* these start a bad domain.  These are also included in the domain */

static char * death_domain_starter_links[] = {"G", NULL};
  /* these start a death domain.  They are not included in the domain */

typedef struct connector_domain_name_pair_struct CDNP;
struct connector_domain_name_pair_struct {char * string; int n;};

CDNP CDNP_array[] = {{"CLb", 'b'}, {"C", 'r'}, {"Cc", 'r'}, {"Ct", 'r'},
		       {"Zb", 'b'}, {"CLe", 'e'}, {"Mx", 'm'},
		       {"Md", 'r'}, {"Wq", 'q'}, {"Rix", 'a'},
                       {"Wx", 'y'}, {"Mv", 'b'}, {"R*x", 'a'},
		       {"TVn", 'b'}, {"TVi", 'b'}, {"TVic", 'b'}, 
                       {"EVix", 'b'}, {"CLl", 'b'},
		       {"Wi", 'g'}, {"WA", 'g'}, {"TV", 'b'}, 

		       {"CLc", 'c'}, {"Mg", 'b'}, 
                       {"Mj", 'j'}, {"Wj", 'j'}, {"Qq", 'q'},

		       {"TT", 'x'}, {"I*j", 'x'}, {"Iij", 'x'}, {"Ixj", 'x'},
		       {"AI*j", 'x'}, {"AIij", 'x'},
		       {"G", 'd'}, {"GIs", 'l'}, {"CLg", 'g'},
                       {"Ss*d", 'w'}, {"Ssxd", 'w'}, {"Ssid", 'w'}, 
                       {"Bsd", 'w'}, {"ER", 'e'}, {"Cce", 'r'},
		       {NULL, 0}};

static char * domain_contains_links[] = {"Mg", "Mx", "Bsd", "Ss*d",
  "Ssxd", "Ssid", NULL};
  /* these links start a domain and they're also part of the domain */
  /* this must be a sublist of the domain_starter_list */

static char * ignore_these_links[] = {NULL};
  /* These links are not put in the word/link graph.  */
  /* They also cannot be the starter links for a domain */
  /* Apparently this isn't needed */

static char * must_be_connected_without[] = {"C", "Cx", "Ct", "Xx", 
"Cc", "Cce", "TVt", "F", "EXx", "HA", NULL};
  /* the linkage must still be connected when these links are deleted */

static char * restricted_links[] = {"Bs", "Bp", "B",
"Ds*w", "Dmcw", "Bsn", "EVx", "Xx", "HL",
     "Dmuw", "Eah", NULL};
  /* these links are not traced further if they point back 
     before the root word */

int find_domain_name(char * string) {
/* return the name of the domain used for this string */
/* return 0 if not in the list */
    int i;
    for (i=0; CDNP_array[i].string != NULL; i++) {
	if (strcmp(string, CDNP_array[i].string) == 0) {
	    return CDNP_array[i].n;
	}
    }
    return 0;
}

typedef struct d_tree_leaf_struct D_tree_leaf;
typedef struct domain_struct Domain;
struct domain_struct{
    char * string;
    int size;
    List_o_links * lol;
    int start_link;  /* the link that started this domain */
    int type;        /* one letter name */
    D_tree_leaf * child; 
    Domain * parent;
};

struct d_tree_leaf_struct{
    Domain * parent;
    int link;
    D_tree_leaf * next;
};

static Domain domain_array[MAX_LINKS];
/* the domains, sorted by size. */
static int N_domains;       /* the number of domains */
static int N_domain_trees;  /* the number of domain trees */


static List_o_links * word_links[MAX_SENTENCE];
  /* a pointer to the list of links out of this word */


void print_domain(int d) {
    List_o_links * lol;
    for (lol=domain_array[d].lol; lol != NULL; lol = lol->next) {
	print_a_link(lol->link);
    }
    printf("\n");
}

void print_domains(void) {
    int d;
    for (d=0; d<N_domains; d++)  {
	printf("domain %d \"%s\"\n", d, domain_array[d].string);
	print_domain(d);
    }
}

int post_process_match(char * s, char * t) {
/*
   The same as match() when the connectors are of THIN_priority, except
   that matching is tested for strings instead of connectors.  
*/
    while(isupper(*s) || isupper(*t)) {
	if (*s != *t) return FALSE;
	s++;
	t++;
    }

    while ((*s!='\0') && (*t!='\0')) {
	if ((*s == '*') || (*t == '*') ||
	    ((*s == *t) && (*s != '^'))) {
	    s++;
	    t++;
	} else return FALSE;
    }
    return TRUE;
}

int match_in_list(char * s, char * a[]) {
/* returns FALSE if the string s does not match anything the
   the array a, o.w. TRUE */
    int i;
    for (i=0; a[i] != NULL; i++) {
	if (post_process_match(s,a[i])) {
	    return TRUE;
	}
    }
    return FALSE;
}

int string_in_list(char * s, char * a[]) {
/* returns FALSE if the string s does not exactly equal anything the
   the array a, o.w. TRUE */
    int i;
    for (i=0; a[i] != NULL; i++) {
	if (strcmp(s,a[i]) == 0) {
	    return TRUE;
	}
    }
    return FALSE;
}

void build_graph(void) {
/* fill in the word_links array with a list of words neighboring each
   word (actually a list of links).  The dir fields are not set, since this
   (after fat-link-extraction) is an undirected graph.
*/
    int i, link;
    List_o_links * lol;
    for (i=0; i<N_words; i++) {
	word_links[i] = NULL;
    }
    for (link=0; link<N_links; link++) {
	if (pp_link_array[link].l == -1) continue;
	if (string_in_list(pp_link_array[link].name, ignore_these_links)) {
	    continue;
	}
	lol = (List_o_links *) xalloc(sizeof(List_o_links));
	lol->next = word_links[pp_link_array[link].l];
	word_links[pp_link_array[link].l] = lol;
	lol->link = link;
	lol->word = pp_link_array[link].r;

	lol = (List_o_links *) xalloc(sizeof(List_o_links));
	lol->next = word_links[pp_link_array[link].r];
	word_links[pp_link_array[link].r] = lol;
	lol->link = link;
	lol->word = pp_link_array[link].l;
    }
}

void add_link_to_domain(int link) {
    List_o_links *lol;
    lol = (List_o_links *) xalloc(sizeof(List_o_links));
    lol->next = domain_array[N_domains].lol;
    domain_array[N_domains].lol = lol;
    domain_array[N_domains].size++;
    lol->link = link;
}

static int visited[MAX_SENTENCE];
/* for the depth-first search */

void depth_first_search(int w, int root, int start_link) {
    List_o_links * lol;
    visited[w] = TRUE;
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if (lol->word < w && lol->link != start_link) {
	    add_link_to_domain(lol->link);
	}
    }
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if (!visited[lol->word] && (lol->word != root) &&
	    !(lol->word < root && lol->word < w &&
	    string_in_list(pp_link_array[lol->link].name, restricted_links))) {
	    depth_first_search(lol->word, root, start_link);
	}
    }
}

void bad_depth_first_search(int w, int root, int start_link) {
    List_o_links * lol;
    visited[w] = TRUE;
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if ((lol->word < w)  && (lol->link != start_link) && (w != root)) {
	    add_link_to_domain(lol->link);
	}
    }
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if ((!visited[lol->word]) && !(w == root && lol->word < w) &&
	    !(lol->word < root && lol->word < w &&
	    string_in_list(pp_link_array[lol->link].name, restricted_links))) {
	    bad_depth_first_search(lol->word, root, start_link);
	}
    }
}

void d_depth_first_search(int w, int root, int right, int start_link) {
    List_o_links * lol;
    visited[w] = TRUE;
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if ((lol->word < w) && (lol->link != start_link) && (w != root)) {
	    add_link_to_domain(lol->link);
	}
    }
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if (!visited[lol->word] && !(w == root && lol->word >= right) &&
            !(w == root && lol->word < root) &&
	    !(lol->word < root && lol->word < w &&
	    string_in_list(pp_link_array[lol->link].name, restricted_links))) {
	    d_depth_first_search(lol->word, root, right, start_link);
	}
    }
}

int domain_compare(Domain * d1, Domain * d2) {
/* for sorting the domains by size */
    return (d1->size - d2->size) ;
}

void build_domains(void) {
    int link, i, d;
    char * s;

    N_domains = 0;
    for (link = 0; link<N_links; link++) {
	if (pp_link_array[link].l == -1) continue;
	if (string_in_list(pp_link_array[link].name, ignore_these_links)) {
	    continue;
	}
	s = pp_link_array[link].name;
	if (string_in_list(s, domain_starter_links)) {
	    for (i=0; i<N_words; i++) {
		visited[i] = FALSE;
	    }
	    domain_array[N_domains].string = s;
	    domain_array[N_domains].lol = NULL;
	    domain_array[N_domains].size = 0;
	    domain_array[N_domains].start_link = link;
	    if (string_in_list(s, domain_contains_links)) {
		add_link_to_domain(link);
	    }
	    depth_first_search(pp_link_array[link].r, pp_link_array[link].l, link);
	    N_domains++;
	} else if (string_in_list(s, bad_domain_starter_links)) {
	    for (i=0; i<N_words; i++) {
		visited[i] = FALSE;
	    }
	    domain_array[N_domains].string = s;
	    domain_array[N_domains].lol = NULL;
	    domain_array[N_domains].size = 0;
	    domain_array[N_domains].start_link = link;
	    add_link_to_domain(link);
                 /* always add the starter link to its bad domain */
	    bad_depth_first_search(pp_link_array[link].r,pp_link_array[link].l,link);
	    N_domains++;
	} else if (string_in_list(s, death_domain_starter_links)) {
	    for (i=0; i<N_words; i++) {
		visited[i] = FALSE;
	    }
	    domain_array[N_domains].string = s;
	    domain_array[N_domains].lol = NULL;
	    domain_array[N_domains].size = 0;
	    domain_array[N_domains].start_link = link;
                 /* do not add the starter link to its death domain */
	    d_depth_first_search(
	       pp_link_array[link].l,pp_link_array[link].l,pp_link_array[link].r,link);
	    N_domains++;
	}
    }
    qsort((void *)domain_array, N_domains, sizeof(Domain),
	  (int (*)(void *, void *))domain_compare);
    /* sort the domains by size */

    for (d=0; d<N_domains; d++) {
	i = find_domain_name(domain_array[d].string);
	if (i==0) {
	    printf("something wrong with domain name %s\n",
		    domain_array[d].string);
	    exit(1);
	}
	domain_array[d].type = i;
    }
}

int contained_in(Domain * d1, Domain * d2) {
/* returns TRUE if domain d1 is contained in domain d2 */
    char mark[MAX_LINKS];
    List_o_links * lol;
    int i;
    for (i=0; i<N_links; i++) {
	mark[i] = FALSE;
    }
    for (lol=d2->lol; lol != NULL; lol = lol->next) {
	mark[lol->link] = TRUE;
    }
    for (lol=d1->lol; lol != NULL; lol = lol->next) {
	if (!mark[lol->link]) return FALSE;
    }
    return TRUE;
}

int link_in_domain(int link, Domain * d) {
/* returns the predicate "the given link is in the given domain" */    
    List_o_links * lol;
    for (lol = d->lol; lol != NULL; lol = lol->next) {
	if (lol->link == link) return TRUE;
    }
    return FALSE;
}

int check_domain_nesting() {
/* returns TRUE if the domains actually form a properly nested structure */
    Domain * d1, * d2;
    int counts[4];
    char mark[MAX_LINKS];
    List_o_links * lol;
    int i;
    for (d1=domain_array; d1 < domain_array + N_domains; d1++) {
	for (d2=d1+1; d2 < domain_array + N_domains; d2++) {
	    for (i=0; i<N_links; i++) {
		mark[i] = 0;
	    }
	    for (lol=d2->lol; lol != NULL; lol = lol->next) {
		mark[lol->link] = 1;
	    }
	    for (lol=d1->lol; lol != NULL; lol = lol->next) {
		mark[lol->link] += 2;
	    }
	    counts[0] = counts[1] = counts[2] = counts[3] = 0;
	    for (i=0; i<N_links; i++) counts[mark[i]]++;
	    if ((counts[1] > 0) && (counts[2] > 0) && (counts[3] > 0)) {
		return FALSE;
	    }
	}
    }
    return TRUE;
}
    
void build_domain_forest() {
    int d, d1, link;
    D_tree_leaf * dtl;
    if (N_domains > 0) {
	domain_array[N_domains-1].parent = NULL;
    }
    N_domain_trees = 1;
    for (d=0; d < N_domains-1; d++) {
	for (d1 = d+1; d1 < N_domains; d1++) {
	    if (contained_in(&domain_array[d], &domain_array[d1])) {
		domain_array[d].parent = &domain_array[d1];
		break;
	    }
	}
	if (d1 == N_domains) {
          /* we know this domain is a root of a new tree */
	    domain_array[d].parent = NULL;
	    N_domain_trees++;
	 /* It's now ok for this to happen.  It used to do:
            printf("I can't find a parent domain for this domain\n");
	    print_domain(d);
	    exit(1); */
	}
    }
    /* the parent links of domain nodes have been established.
       now do the leaves */
    for (d=0; d < N_domains; d++) {
	domain_array[d].child = NULL;
    }
    for (link=0; link < N_links; link++) {
	if (pp_link_array[link].l == -1) continue; /* probably not necessary */
	for (d=0; d<N_domains; d++) {
	    if (link_in_domain(link, &domain_array[d])) {
		dtl = (D_tree_leaf *) xalloc(sizeof(D_tree_leaf));
		dtl->link = link;
		dtl->parent = &domain_array[d];
		dtl->next = domain_array[d].child;
		domain_array[d].child = dtl;
		break;
	    }
	}
    }
}

void free_list_o_links(List_o_links *lol) {
/* free the list of links pointed to by lol
   (does not free any strings) 
*/
    List_o_links * xlol;
    while(lol != NULL) {
	xlol = lol->next;
	xfree((char *)lol, sizeof(List_o_links));
	lol = xlol;
    }
}

void free_D_tree_leaves(D_tree_leaf *dtl) {
    D_tree_leaf * xdtl;
    while(dtl != NULL) {
	xdtl = dtl->next;
	xfree((char *)dtl, sizeof(D_tree_leaf));
	dtl = xdtl;
    }
}

void free_post_processing_structures(void) {
    int w, d;
    for (w=0; w<N_words; w++) {
	free_list_o_links(word_links[w]);
    }
    for (d=0; d<N_domains; d++) {
	free_list_o_links(domain_array[d].lol);
	free_D_tree_leaves(domain_array[d].child);
    }
}

void print_domain_tree() {
    int d;
    D_tree_leaf * dtl;
    printf("Domain     It's parent \n");
    for (d=0; d<N_domains-1; d++) {
	printf(" %3d        %3d\n", d, domain_array[d].parent - domain_array);
    }
    for (d=0; d<N_domains; d++) {
	printf("Link children of domain %d:\n", d);
	for (dtl=domain_array[d].child; dtl != NULL; dtl = dtl->next) {
	    print_a_link(dtl->link);
	}
    }
}

int CCG_rule() {
/* The parent of every c domain is a c or g domain */

    int d, t;
    for (d=0; d<N_domains; d++) {
	if ((domain_array[d].type == 'c') && (domain_array[d].parent!=NULL)) {
	    t = domain_array[d].parent->type;
	    if ((t != 'g') && (t != 'c')) return FALSE;
	}
    }
    return TRUE;
}

int domain_bounded(int d_type) {
/* Checks to see that all domains with this name have the property that
   all of the words that touch a link in the domain are not to the left
   of the root word of the domain.
*/
    int d, lw;
    List_o_links * lol;

    for (d=0; d<N_domains; d++) {
	if (domain_array[d].type != d_type) continue;
	lw = pp_link_array[domain_array[d].start_link].l;
	for (lol = domain_array[d].lol; lol != NULL; lol = lol->next) {
	    if (pp_link_array[lol->link].l < lw) return FALSE;
	}
    }
    return TRUE;
}

int link_inhabits(char * link_name, int d_type){
/* Checks to see that every connector of the given name is member of
   the group of a domain of the specified type
*/
    int d;
    D_tree_leaf * c; 

    for (d=0; d<N_domains; d++) {
	for (c = domain_array[d].child; c != NULL; c = c->next) {
	    if (strcmp(pp_link_array[c->link].name, link_name)==0) {
		if (domain_array[d].type != d_type) return FALSE;
	    }
	}
    }
    return TRUE;
}

int group_type_contains(int d_type, char * link_name) {
/* every d_type group must contain a link_name link within it. */
    int d;
    D_tree_leaf * c;
    for (d=0; d<N_domains; d++) {
	if (domain_array[d].type != d_type) continue;
	for (c=domain_array[d].child; c != NULL; c = c->next) {
	    if (strcmp(pp_link_array[c->link].name, link_name) == 0) break;
	}
	if (c == NULL) return FALSE;
    }
    return TRUE;
}

int group_type_contains_all(int d_type, char * name_list[]) {
/* every d_type group must contain all of the specified links */
    int i;
    for (i=0; name_list[i] != NULL; i++) {
	if (!group_type_contains(d_type, name_list[i])) return FALSE;
    }
    return TRUE;
}

int group_type_contains_one(int d_type, char * name_list[]) {
/* Checks that every d_type group contains exactly one of the listed links */
    int d, i;
    int count;
    D_tree_leaf * c;
    for (d=0; d<N_domains; d++) {
	if (domain_array[d].type != d_type) continue;
	count = 0;
	for (i=0; name_list[i] != NULL; i++) {
	    for (c=domain_array[d].child; c != NULL; c = c->next) {
		if (strcmp(pp_link_array[c->link].name, name_list[i])==0) count++;
	    }
	}
	if (count != 1) return FALSE;
    }
    return TRUE;
}

int contains_none(char * selector, char * incompat[]) {
/* returns TRUE if and only if:
     all groups containing the selector link do not contain anything
     from the incompat set.  Uses exact string matching.
*/
    D_tree_leaf * dtl;
    int d;
    for (d=0; d<N_domains; d++) {
	for (dtl = domain_array[d].child; dtl != NULL; dtl = dtl->next) {
	    if ((strcmp(pp_link_array[dtl->link].name, selector) == 0)) break;
	}
	if (dtl != NULL) {
	    for (dtl = domain_array[d].child; dtl != NULL; dtl = dtl->next) {
		if (string_in_list(pp_link_array[dtl->link].name, incompat)) {
		    return FALSE;
		}
	    }
	}
    }
    return TRUE;
}

int ordering_constraint(char * X, char * Y) {
/*
  Returns TRUE if for every X link and Y link in the same group
  the right word of Y is strictly to the right of the right word
  of the X.  FALSE otherwise.
*/
    D_tree_leaf * dtl;
    int d, leftmost_y, rightmost_x;
    for (d=0; d<N_domains; d++) {
	leftmost_y = N_words;
        rightmost_x = -1;
	for (dtl = domain_array[d].child; dtl != NULL; dtl = dtl->next) {
	    if ((strcmp(pp_link_array[dtl->link].name, Y) == 0)) {
		if (leftmost_y > pp_link_array[dtl->link].r) {
		    leftmost_y = pp_link_array[dtl->link].r;
		}
	    } else if ((strcmp(pp_link_array[dtl->link].name, X) == 0)) {
		if (rightmost_x < pp_link_array[dtl->link].r) {
		    rightmost_x = pp_link_array[dtl->link].r;
		}
	    }
	}
	if (leftmost_y <= rightmost_x) return FALSE;
    }
    return TRUE;
}

int contains_one(char * selector, char * required[]) {
/* returns TRUE if and only if:
     all groups containing the specified link contain at least
     one from the required list.  (as determined by exact
     string matching)
*/
    D_tree_leaf * dtl;
    int d, count;
    for (d=0; d<N_domains; d++) {
	for (dtl = domain_array[d].child; dtl != NULL; dtl = dtl->next) {
	    if ((strcmp(pp_link_array[dtl->link].name, selector) == 0)) break;
	}
	if (dtl != NULL) {
	    count = 0;
	    for (dtl = domain_array[d].child; dtl != NULL; dtl = dtl->next) {
		if (string_in_list(pp_link_array[dtl->link].name, required)) {
		    count++;
		}
	    }
	    if (count < 1) return FALSE;
	}
    }
    return TRUE;
}

void connectivity_dfs(int w, char * s[]){
    List_o_links *lol;
    visited[w] = TRUE;
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	if (!visited[lol->word] &&
	    !string_in_list(pp_link_array[lol->link].name, s)) {
	    connectivity_dfs(lol->word, s);
	}
    }
}

void mark_reachable_words(int w){
    List_o_links *lol;
    if (visited[w]) return;
    visited[w] = TRUE;
    for (lol = word_links[w]; lol != NULL; lol = lol->next) {
	mark_reachable_words(lol->word);
    }
}

int is_connected_without(char * s[]) {
/* Returns true if the linkage is connected when ignoring the links
   whose names are in the given list of link names.
*/
    int i;
    for (i=0; i<N_words; i++) {
	visited[i] = FALSE;
    }
    mark_reachable_words(0);
    for (i=0; i<N_words; i++) {
	visited[i] = !visited[i];
    }
    connectivity_dfs(0, s);
    for (i=0; i<N_words; i++) {
	if (visited[i] == FALSE) {
	    return FALSE;
	}
    }
    return TRUE;
}

static char * incompat_with_THi[] =  {"I", "T", "TO", "AI", "AIc", "I*j", 
          "Ss", "Sp", "Spx", "Ssi", "S", "S*i", "Spii", "SI",
          "SIs", "SIp", "SIpx", "SIsi", "SI*i", "SIpii", "Ss*t", "Ssit",
          "SIs*t", "SIsit", "GI", "I*d", "V", "AI*j", "Ss*d", "Ssxd",
          "Ssid", NULL};
static char * SXiSXIi[] =  {"SXsi", "SXIsi", "OXi", "SXIsij", NULL};
static char * Necessary_for_SX[] =  {"TV", "TVi", "THi", "Ri", "Rix", 
          "TSi", NULL};
static char * Necessary_for_EVx[] = {"Dmuc", "Dmcc", "Eac", "Eec", "EVc", 
	  "AIc", "AIic", "IXc", "Ezc", "EVcb", "Oc", NULL};
static char * Nec_for_EVy[] = {"Dmuz", "Dmcz", "Eaz", "Eez", "EVm", NULL};
static char * Opt[] = {"O*t", "Opt", NULL};
static char * Ost[] = {"O*t", "Ost", NULL};
static char * Oxt[] = {"Ost", "Opt", "O*t", NULL};
static char * Obj[] = {"Ost", "Opt", "O*t", NULL};
static char * Nec_for_THb[] = {"Ss*t", "SIsit", "Ssit", "SIsit", NULL};
static char * Necessary_for_Thcom[] = {"TH", "THi", NULL};
static char * Necessary_for_Tvcom[] = {"TVi", NULL};
static char * Necessary_for_Tocom[] = {"TOi", NULL};
static char * Necessary_for_Tvcom_Trans[] = {"TVt", NULL};
static char * EVc[] = {"EVc", NULL};
static char * EV[] = {"EV", "EVat", "EVpt", NULL};
static char * Ezc[] = {"Ezc", NULL};
static char * AI_or_AIi[] = {"AI", "AIi", NULL};
static char * Nec_for_Bc[] = {"Dmcc", "Dmuc", "Dmcz", "Dmuz", "Oc", "Om", NULL};
static char * Nec_for_VC[] = {"Eec", "EVcb", "Dmcc", "Dmuc", "Eez", "Dmcz",
     "EVc", "Dmuz", "Oc", "Om", NULL};
static char * Nec_for_IXt[] = {"AIc", "AIic", "Eac", "Ds*c", "Eaz", "IXc", 
     NULL};
static char * Nec_for_CLl[] = {"Eec", "Eez", "EVc", "EVm", NULL};
static char * Nec_for_Loose[] = {"Eac", "Eec", "Ds*c", "Dmcc", "Dmuc", 
     "AIic", "AIc", "IXc", "EVc", "EVcb", "Eaz", "Eez", "Dmuz", "Dmcz", 
     "Oc", "Om", "EVm", NULL};
static char * Nec_for_Mc[] = {"Dmcc", "Dmuc", NULL};
static char * Nec_for_Sp_com[] = {"Dmcc", "Dmcz", "Oc", "Om", NULL};
static char * Nec_for_Ss_com[] = {"Dmuc", "Dmuz", "Oc", "Om", NULL};
static char * Nec_for_S_com[] = {"Dmuc", "Dmuz", "Dmcc", "Dmcz", "Oc",
   "Om", NULL};
static char * Nec_for_Osc[] = {"Ost", "Opt", NULL};
static char * Nec_for_y[] = {"Bsx", "Bpx", "Ss", "Sp", "S", "S*i", "Ssi", 
   "Spx", "Spii", "Ss*n", "Sp*n", "S**n", "Ssin", "Spxn", "S*in",
   "SXp", "SXsi", "SXst", "Ss*t", "Ssit", "G", NULL};
static char * Nec_for_y_2[] = {"Ds*w", "Dmuw", "Dmcw", "Ss*n", "Sp*n", "S**n",
   "Ssin", "Spxn", "S*in", "HNx", NULL};
static char * Nec_for_Bsx[] = {"Ds*w", "Dmuw", "Dmcw", "HNx", "AA", NULL};
static char * Nec_for_q[] = {"SI", "SIp", "SIs", "SXI", "SXIst",
     "SXIsi", "SXIp", "SIpx", "SIsi", "SI*i", "SIpii", "SIs*t", "SIsit",
     "GIs", NULL};
static char * Jx[] = {"Jx", NULL};
static char * Mx[] = {"Mx", NULL};
static char * Nec_for_j[] = {"JW", "Jw", NULL};
static char * ZZ[] = {"ZZ", NULL};
/*
contains_none("xx", array_name)
  returns false if a domain containing xx has a link that matches
  something in the array.   Uses exact string matching (not smart matching)

contains_one("xx", array_name)
  return true if every domain containing xx has at least one occurrence
  of a thing in the array.  Uses exact string matching (not smart matching)
*/

void free_PP_node(PP_node * p) {
    D_type_list * dtl, * dtlx;
    Violation_list * v, *vx;
    int i;
    for (i=0; i<N_links; i++){
	for (dtl = p->d_type_array[i]; dtl != NULL; dtl = dtlx) {
	    dtlx = dtl->next;
	    xfree((char *) dtl, sizeof(D_type_list));
	}
    }
    xfree((char *) p->d_type_array, N_links * sizeof(D_type_list *));
    for (v = p->v; v!=NULL; v = vx) {
	vx = v->next;
	xfree((char *) v, sizeof(Violation_list));
    }
    xfree((char *) p, sizeof (PP_node));
}
     

D_type_list ** build_type_array(void) {
    D_type_list ** array, * dtl;
    int d, i;
    List_o_links * lol;
    array = (D_type_list **) xalloc(N_links * sizeof(D_type_list *));
    for (i=0; i<N_links; i++) {
	array[i] = NULL;
    }
    for (d=0; d<N_domains; d++) {
	for (lol=domain_array[d].lol; lol != NULL; lol = lol->next) {
	    dtl = (D_type_list *) xalloc(sizeof(D_type_list));
	    dtl->next = array[lol->link];
	    array[lol->link] = dtl;
	    dtl->type = domain_array[d].type;
	}
    }
    return array;
}

PP_node * bogus_pp_node(void) {
/* Construct and return a vacuus pp_node.
   This is for when post processing is turned off.
*/
    PP_node * pp_return;
    int link;
    pp_return = (PP_node *) xalloc(sizeof(PP_node));
    pp_return->d_type_array =
      (D_type_list **) xalloc(N_links * sizeof(D_type_list *));
    for (link=0; link<N_links; link++) {
	pp_return->d_type_array[link] = NULL;
    }
    pp_return->v = NULL;
    return pp_return;
}
 
Violation_list * issue_violation(Violation_list * v, char * string) {
    Violation_list * v1;
    v1 = (Violation_list *) xalloc(sizeof (Violation_list));
    v1->string = string;
    v1->next = v;
    return v1;
}

#define PP(condition,message) {                                 \
    if ((condition)) {                                          \
	v = issue_violation(v, (message));                      \
	goto done_checking;                                     \
    }                                                           \
}

/* The above goto could be avoided by using "elseif", but some compilers  */
/* have arbitrary limits on how many of these you can have.               */

PP_node * post_process(void){
/* Takes as input:
      N_words (to know how big to make the graph).
      N_links, pp_link_array[]  (of course also uses the connectors, etc.
           that are reachable from the pp_link_array).
      pp_link_array[i].l = -1 means that this connector is to be ignored.
   Returns:
      For each link, the domain structure of that link.
      A list of the violation strings.
*/      

    Violation_list * v;
    PP_node * pp_return;
    if(!postprocess_defined) return bogus_pp_node();
    pp_return = (PP_node *) xalloc(sizeof(PP_node));
    build_graph();
    build_domains();
    build_domain_forest();
    v = NULL;
    if(!check_domain_nesting()) {
	if (verbosity > 1) {
	    printf("FYI: The domains are not nested.");
	}
    }
    PP(!is_connected_without(must_be_connected_without),
                                      "Connectivity violation");
    PP(!domain_bounded('a'), "Unbounded a domain");
    PP(!domain_bounded('e'), "Unbounded e domain");
    PP(!domain_bounded('r'), "Unbounded r domain");

/* The next 15 or so rules relate to the new question-word stuff */

/*  These six rules ensure that question inversion - which always uses an
    SI of some sort - can only occur in questions. 'q' domains are started
    by Wq connectors (which connect question words to the wall) and Qq
    connectors (which connect invertable verbs to the wall and also to
    "[prep] which" expressions). */

    PP(!link_inhabits("SI", 'q'),    "question inversion violated");
    PP(!link_inhabits("SIs", 'q'),   "question inversion violated");
    PP(!link_inhabits("SIp", 'q'),   "question inversion violated");
    PP(!link_inhabits("SIpx", 'q'),  "question inversion violated");
    PP(!link_inhabits("SIsi", 'q'),  "question inversion violated");
    PP(!link_inhabits("SIpii", 'q'), "question inversion violated");
    PP(!link_inhabits("SI*i", 'q'),  "question inversion violated");
    PP(!link_inhabits("SXIsi", 'q'), "question inversion violated");
    PP(!link_inhabits("SXIst", 'q'), "question inversion violated");
    PP(!link_inhabits("SXIp", 'q'),  "question inversion violated");
    PP(!link_inhabits("GIs", 'q'),   "question inversion violated");

/* In certain indirect questions like "I wonder who to hit", "to" does
   not connect to anything before it. In such cases its I+ connector will
   be an 'Ia'. This rule ensures that the 'Ia' will only be used in
   indirect questions. (Indirect questions are started by R#x connectors,
   found on "who", "which", "whose", "what", and "how [many/much]".)
   (Problem here with "I" subscripts?) */
    PP(!link_inhabits("Ia", 'a'),"incorrect use of 'to'");

/* This rule ensures that every S-V inverted 
   question contains an SI of some kind. 
   The "group_type_contains_one" rule requires that the group contain
   exactly one of the link type specified, not more. In general, this is
   fine, since a clause generally contains exactly one "S" or "SI". In
   sentences with "and", this rule will only work correctly if the 
   sentence is treated as several separate sentences (one for each element
   of the and-list), each of which must follow the rule. Another 
   problem is comparatives. In a sentence like "John is smarter than Joe is",
   there are two S's in the same group. This results in some false positives
   like "Joe is smarter than is John". (This should be mentioned in 
   "choices") */
    PP(!group_type_contains_one('q', Nec_for_q),"question inversion violated");

/* 'j' domains have groups containing expressions of the form
   "[prep] which". (Unlike most domains, their groups do not contain whole
   clauses.) The first two rules below ensures that phrases like "at which" 
   only occur in questions and relatives ("The dog at which I threw the rock 
   died", "*I threw the rock at which"). The third rule ensures that in a 
   preposition-type question (where a Wj connects the preposition to the wall),
   or a preposition-type relative (where an Mj connects the preposition to
   the preceding noun), a question word such as "which" or "whom" will be used.
   ("*The dog at John I threw the rock died".) "Jw" connects prepositions to
   "which" and "whom"; "JW" connects them to "which" or "whose", forcing 
   these words to connect to a noun. */

    PP(!link_inhabits("JW", 'j'),"Misuse of question word");
    PP(!link_inhabits("Jw", 'j'),"Misuse of question word");   
    PP(!group_type_contains_one('j', Nec_for_j),"Misuse of preposition");

/* Nouns have "B#x" connectors. These are only to be used when nouns take
   question-word determiners: "what", "whose", "which", or "how[many/much]".
   To ensure this, we give such words "D**w+" connectors and insist that a
   "B#x" can only occur in a group with a "D##w". Furthermore, whenever
   one of these question-word determiners is used, either a 'q' domain is
   started (using the Wq on the determiner) or a 'y' domain is started
   (using the Wx on the determiner). 'q' domains occur in object-type, i.e.
   s-v-inverted, questions; 'y' domains occur in subject-type questions.
   The second rule below ensures that
   a 'y' group must contain an S# connector. In the same rule we say that
   a 'y' group may not contain a B#x (taking advantage, in a rather slippery
   way, of the "exactly-one" property of the group_type_contains_one rule.
   A stronger way to do this would be to have a "group_type_contains_none"
   rule, where we would say that a 'y' group may not contain a B#x). We 
   therefore correctly judge "Whose dog bit Joe", "*Whose dog Joe bit".

   But supposing the "B#x" is in a subordinate clause, as in "*Whose dog
   Dave thinks Joe bit"? Supposing, in this sentence, the "Wx" on "whose"
   is used. Now a 'y' domain will be started. It will contain the "S" between
   "Dave" and "thinks"; the program will think this a subject-type question,
   thus it will be allowed. How do we prevent this? We prevent it by saying
   that a 'y' group must also contain a "D##w", the type of D connector on
   indirect questions (this is the third rule, below) (or it may contain a
   "S#n", found on question-words like "who").
   We further say that B#x links are NOT restricted
   links; D##w are, however. This means that, in the above sentence, the
   'b' domain started by "thinks" will spread back through the "Bsx" to
   "dog"; from there it will continue through the "Ds*w" to "whose". Now,
   "whose" is no longer in the 'y' group! The sentence will therefore be
   rejected. */
    PP(!contains_one("Bsx", Nec_for_Bsx),"Bsx rule violated");
    PP(!contains_one("Bpx", Nec_for_Bsx),"Bpx rule violated");
    PP(!group_type_contains_one('y', Nec_for_y),"'y' group requirements violated");
    PP(!group_type_contains_one('y', Nec_for_y_2),"A 'y' group must contain a D**w");

/*  Nouns have the complex "Mx- & Bs+", orred with their 
   "S+" connectors. "Which" and "whom" have "Jx-" connectors, anded
   with their "Z" connectors.  All this ugliness is
   simply to allow the construction "These are the books the
   height of the covers of which is regulated by the government". In
   the above sentence, the "Mx-" connector on "height" connects to the
   "M+" on books. The "Bs+" on "height" connects to "is"; "which" provides
   the needed "Z+": "which" also connects to "of" with its "Jx". 

   There are two false-positive problems to be dealt with here. First of
   all, we have to ensure that in the above sentence, "which" is used,
   and not just an ordinary noun (*"The books the height of the books is
   regulated are here"). So we say that "Mx" links start "m" domains
   (which they themselves are included in); we also say that a group with
   an Mx must contain a Jx. This is the first rule below. Secondly,
   we need to ensure that the construction "of which" does not just occur
   any old place ("*The dog of which died"). We therefore say that a "Jx"
   must occur in the same domain as an "Mx". This is the second rule below. */

    PP(!contains_one("Mx", Jx),"An Mx must have a Jx");
    PP(!contains_one("Jx", Mx),"A Jx must have an Mx");   

/* The next 20 or so rules relate to "it/there" problems. There should be
   a nice way of condensing these. */
    PP(!contains_none("THi", incompat_with_THi),"THi rule 1 violated");
    PP(!contains_one("THi", SXiSXIi),"THi rule 2 violated");
    PP(!contains_none("TSi", incompat_with_THi),"TSi rule 1 violated");
    PP(!contains_one("TSi", SXiSXIi),"TSi rule 2 violated");
    PP(!contains_none("Ri", incompat_with_THi),"Ri rule 1 violated");
    PP(!contains_one("Ri", SXiSXIi),"Ri rule 2 violated");
    PP(!contains_none("Rix", incompat_with_THi),"Ri rule 1 violated");
    PP(!contains_one("Rix", SXiSXIi),"Ri rule 2 violated");
    PP(!contains_none("TVi", incompat_with_THi),"TV rule 1 violated");
    PP(!contains_one("TVi", SXiSXIi),"TV rule 2 violated");
    PP(!contains_none("SXst", incompat_with_THi),"There rule 1 violated");
    PP(!contains_none("SXIst", incompat_with_THi),"There rule 1 violated");
    PP(!contains_none("SXIstj", incompat_with_THi),"There rule 1 violated");
    PP(!contains_none("OXt", incompat_with_THi),"OXt rule 1 violated");
    PP(!contains_none("SXp", incompat_with_THi),"There rule 1 violated");
    PP(!contains_none("SXIp", incompat_with_THi),"There rule 1 violated");
    PP(!contains_none("SXIp*j", incompat_with_THi),"There rule 1 violated");
    PP(!contains_one("SXst", Ost),"There rule 2 violated");
    PP(!contains_one("SXIst", Ost),"There rule 2 violated");
    PP(!contains_one("SXIstj", Ost),"There rule 2 violated");
    PP(!contains_one("OXt", Oxt),"SXs rule 2 violated");
    PP(!contains_one("SXsi", Necessary_for_SX),"It rule violated");
    PP(!contains_one("SXIsi", Necessary_for_SX),"It rule violated");
    PP(!contains_one("SXIsij", Necessary_for_SX),"It rule violated");
    PP(!contains_none("G", Necessary_for_SX),"Special subject rule violated");
    PP(!contains_none("GIs", Necessary_for_SX),"Special subject rule violated");
    PP(!contains_none("G", incompat_with_THi),"Special subject rule violated");
    PP(!contains_none("GIs", incompat_with_THi),"Special subject rule violated");
    PP(!contains_none("G", Obj),"Special subject rule violated");
    PP(!contains_none("GIs", Obj),"Special subject rule violated");
    PP(!contains_one("THb", Nec_for_THb),"be-that rule violated");
    PP(!contains_one("OXi", Necessary_for_SX),"SXIsi rule violated");

    PP(!contains_one("SXp", Opt),"There rule 2 violated");
    PP(!contains_one("SXIp", Opt),"There rule 2 violated");
    PP(!contains_one("SXIp*j", Opt),"There rule 2 violated");

/* The next 25 rules or so relate to comparatives. There are a lot of
   them, and it may be difficult to condense them much, but there's a
   fair amount of arbitrary information relating to comparatives that has
   to be stored somehow, and I actually think the domain approach handles
   this problem pretty well. */

    PP(!contains_one("THc", Necessary_for_Thcom),"That-comparative rule violated");
    PP(!contains_one("TOic", Necessary_for_Tocom),"To-comparative rule violated");
    PP(!contains_one("TVic", Necessary_for_Tvcom),"To-comparative rule violated");
    PP(!contains_one("TVtc", Necessary_for_Tvcom_Trans),"To-comparative rule violated");
    PP(!contains_one("EVx", Necessary_for_EVx),"Than rule violated");
    PP(!contains_one("EVy", Nec_for_EVy),"As rule violated");
    PP(!CCG_rule(),"C-C-G Rule violated");
    PP(!ordering_constraint("Eac", "EVx"),"Than ordering Rule violated");
    PP(!ordering_constraint("Eec", "EVx"),"Than ordering Rule violated");
    PP(!ordering_constraint("EVc", "EVx"),"Than ordering Rule violated");
    PP(!ordering_constraint("Dmuc", "EVx"),"Than ordering Rule violated");
    PP(!ordering_constraint("Dmcc", "EVx"),"Than ordering Rule violated");
    PP(!contains_one("AIit", Ezc),"Than-adj 1 violated");
    PP(!contains_one("AIit", AI_or_AIi),"Than-adj 2 violated");
    PP(!contains_one("EVat", EVc),"Than-adv 1 violated");
    PP(!contains_one("EVpt", EVc),"Than-prep 1 violated");
    PP(!contains_one("EVat", EV),"Than-adv 2 violated");
    PP(!contains_one("EVpt", EV),"Than-prep 2 violated");
    PP(!contains_one("Bc", Nec_for_Bc),"Than-obj 1 violated");
    PP(!contains_one("Ust", Nec_for_Bc),"Than-obj 2 violated");
    PP(!contains_one("Upt", Nec_for_Bc),"Than-obj 2 violated");
    PP(!contains_one("Osc", Nec_for_Osc),"Osc violated");
    PP(!contains_one("O*c", Nec_for_Osc),"O*c violated");
    PP(!contains_one("VC", Nec_for_VC),"VC violated");
    PP(!contains_one("IXt", Nec_for_IXt),"IXt violated");
    PP(!contains_one("CLl", Nec_for_CLl),"CLl violated");
    PP(!contains_one("Osl", Nec_for_Loose),"Osl violated");
    PP(!contains_one("Opl", Nec_for_Loose),"Opl violated");
    PP(!contains_one("O*l", Nec_for_Loose),"O*l violated");
    PP(!contains_one("Mpl", Nec_for_Loose),"Mpl violated");
    PP(!contains_one("Mc", Nec_for_Mc),"Mc violated");
    PP(!contains_one("Sp*c", Nec_for_Sp_com),"Sp_com violated");
    PP(!contains_one("Spxc", Nec_for_Sp_com),"Sp_com violated");
    PP(!contains_one("Ssic", Nec_for_Ss_com),"Ss_com violated");
    PP(!contains_one("S*ic", Nec_for_Ss_com),"Ss_com violated");
    PP(!contains_one("Ss*c", Nec_for_Ss_com),"Ss_com violated");
    PP(!contains_one("S**c", Nec_for_S_com),"S_com violated");
    PP(!contains_one("Ixd", ZZ),"Can't use \"do\" with that verb");
    PP(!contains_one("Oxn", ZZ),"Can't use a tag word after a pronoun");
    PP(!contains_one("Spxi", ZZ),"bad n-v agreement");
    PP(!contains_one("Spi", ZZ),"bad n-v agreement");

done_checking:

    pp_return->d_type_array = build_type_array();
    pp_return->v = v;
    free_post_processing_structures();
    return pp_return;
}
