 /***************************************************************************/
 /*                                                                         */
 /*      Copyright (C) 1991, 1992  Daniel Sleator and Davy Temperley        */
 /*  See file "README" for information about commercial use of this system  */
 /*                                                                         */
 /***************************************************************************/

#include "header.c"

/*
  This file does the grungy job of chopping the user's input sentence
  into words, processing the special commands, and calling all the
  functions necessary to parse the input sentence.

  The following sequence of steps is used to parse:

     * Break the input line into appropriate words.
     * Construct the dictionary expressions for each of the words.
     * Apply expression pruning to these expressions.
     * Build the disjuncts for the expressions.
     * If the sentence contains a conjunction then:
          * Apply gentle power_pruning
          * Clear out further irrelevant disjuncts with conjunction-pruning.
          * Build the "and" tables and put fat disjuncts on all the words.
	  * Apply pruning to the resulting disjuncts.
     * Apply ruthless power pruning.
     * Exhaustively search for a valid linkage.
     * For each of these linkages:
          * If there is a conjunction, expand the sentence
	    into each of the sentences it represents. and apply
	    post-processing to each of these.  All must pass, or the
	    sentence is considered to have a violation.
	  * If there is not a conjunction, then just postprocess
	    the linkage.
	  * Compute the costs of each of the linkage.  (A p.p. violation
	    essentially has infinite cost.)
     * Sort all the linkages by cost.
     * Display the solutions one at a time, according to the whim of the
       user.

  Pruning is always applied before power pruning, because it saves more
  time in power-pruning than it uses.  Power-pruning is said to be
  "ruthless" in that doing it before generating the and disjuncts will
  not work -- it will remove too much stuff.  Pruning has been modified
  to be "gentile" -- it doesn't do as much as it might so that it avoids
  deleting things that are needed after and-disjunct generation.
  It might be effective to make a ruthless version of pruning, and install
  it where appropriate.
*/

/*
Notes to myself:

  If the wall isn't defined, and you try a sentence with
  "and" where one element of the and list contains the leftmost
  word, then it won't work.  This is not worth fixing.

  Create two different lists of andable connectors -- one for "and"
      and "or", and another for everything else ("nor", "but")
  Davy suggests:
   Yeah, I would suggest having one list for "and", and another list for
   all the others ("either...or", "neither...nor", "both...and", "not
   only...but", "but").
  I say:
    Not only Mary but John came                     sounds ok
             Mary but John came                     sounds wrong
  Davy:
   Yeah, there are differences between "not only...but" and "but". But I
   think the most egregious differences are between "and" and the others:
   "the not only dog but cat died", etc..


   How to avoid "Not only Jim, John, but Mary came":
 
   1.Make it impossible for the conjunction "but" to connect to a comma.
     As you go up the fat-link tree, the fat links get more and more
     restrictive (see the extensive comment in and.c).   Append an
     extra character to the end of all of the match strings for fat links.
     This character will be, say "^" on comma, and on "and", but it will
     be "*" on but, and on all the normal words.  This will kill the
     possible connection between comma and "but", but all others will
     remain intact.

   2.Actually, I think a much easier way to do it is in post-processing.
     There, we'd just look for a fat link up from a comma to a "but",
     and flag this as a violation.  Easy.

  I wonder if the choice of which fat disjuncts to generate on a word could
  made to depend on where the word is in the sentence?  Would this make
  any difference?  Perhaps pruning does anything this might do.

  Limit the number of linkages to which post-processing is applied (after
  a correct linkage has been found)?

  It might be useful to get rid of the idiom suffixes (.Ixx) at the
  beginning (instead of at the end when you print it) because then
  identical disjuncts would be eliminated.  At the moment it's not too
  much of a problem because most of the expressions get killed of in
  expression pruning.  If there were a huge number of idioms, other
  bottlenecks might appear.
  
  Would RUTHLESS pruning be useful?

  Mystery: why does "dogs and cats who died died" behave differently
  in the current system versus the transcript system.

  Figure out why folding essentially doesn't help.
  
  End of sentence feature -- display a "." or "?" whichever is
  appropriate.

  "Bill ran" fails because it uses the noun "bill" and never considers
  the proper name "Bill".  I "fixed" this to allow it to try to use a
  proper name for the first word if it's capitalized.  This led it to
  accept a lot of junk ("The died", etc), so I removed it.  The correct
  way to solve this has been installed -- namely, all of these ambiguous
  names have been explicitely listed in the dictionary.

  Should I fix it so that using "*" as the first word gives an error
  message rather than subtituting the dictionary?  -- or perhaps
  get rid of "*"  completely.

*/

Word sentence[MAX_SENTENCE];
int N_words;  /* the number of words in the current sentence */
int wall_defined;    /* TRUE if the dictionary contains "WALL" */
int postprocess_defined;  /* TRUE if the dict contains "POSTPROCESS */
int andable_defined; /* TRUE if the dict contains the andable connector list */
int verbosity = 0; /* 0 -- 2 from least to most verbose */
int screen_width = 79;     /* the width of the screen for display purposes */
int display_short = TRUE;  /* abbreviate height of display */
int display_on = TRUE;     /* turn the display on */
int display_links = FALSE;  /* the link display */
int display_bad = FALSE; /* display even the linkages with P.P. violations */
int display_fat = FALSE;   /* display the fat linkage too */
int display_multiple = TRUE;  /* display the component sentences  */
int display_postscript = FALSE;  /* turn on postscript data generation */
int echo_on = FALSE;/* whether or not to echo the input sentence */
char is_conjunction[MAX_SENTENCE]; /* TRUE if this word is a conjunction */
char * deletable[MAX_SENTENCE];  /* deletable[i][j] if =TRUE  */
  /* the words i+1...j-1 could be non existant in one of      */
  /* the multiple linkages.                                   */

int parse_cost;          /* counts iterations of inner parse loop */
int mark_cost;           /* estimate of the cost of the marking phase */
int N_in_table;          /* hashing statistics for parse */
int N_hash_lookups;      /* ditto */
int work_in_hash_lookups;/* ditto */

int issue_sentence_word(char * s) {
/* the string s is the next word of the sentence */
/* do not issue the empty string */
    if (*s == '\0') return TRUE;
    if (strlen(s) >= MAX_WORD) {
	printf("The word \"%s\" is too long.\n", s);
	printf("A word can have a maximum of %d characters.\n", MAX_WORD-1);
	return FALSE;
    }
    if (N_words == MAX_SENTENCE) {
	printf("The sentence has too many words.\n");
	return FALSE;
    }
    strcpy(sentence[N_words].string, s);
    N_words++;
    return TRUE;
}

int separate_sentence(char * s) {
/* The string s has just been read in from standard input.
   This function breaks it up into words and stores these words in
   the sentence[] array.  Returns TRUE if all is well, FALSE otherwise.

 (1) replace blanks by '\0's, and make starts point to the beginnings
     of these groups.

 (2) Repeatedly rip off commas (and colons and semi-colons) from the end
     of each group.   These become words.

 (3) If the remaining group is in the dictionary, use it.

 (4) If it isn't, and it ends with "'" or "'s" break it into two words.
 
     (Things are a little more complicated, to be able to handle
      "He's there" and "he's there".  Contractions aren't allowed to
      be exclusively capitalized words.)
*/
    char * starts[MAX_SENTENCE];
    char *x, *y;
    char punc[50];
    char pc[2];
    int N_starts, i, N_puncs, word_there;

    for (i = strlen(s)-1; isspace(s[i]); i--) {
	s[i] = '\0';
    }
    if ((s[i] == '.') || (s[i] == '?') || (s[i] == '!')) {
	s[i] = '\0';
    }

    x = s;
    N_starts = 0;
    for(;;) {
	if (N_starts == MAX_SENTENCE) {
	    printf("The sentence has too many words.\n");
	    return FALSE;
	}
	while (isspace(*x)) x++;
	starts[N_starts] = x;
	N_starts++;
	while ((!isspace(*x)) && (*x != '\0')) x++;
	if (*x == '\0') break;
	*x = '\0';
	x++;
    }
    N_words = 0;
    if (wall_defined) {
	strcpy(sentence[0].string,WALL_WORD);
	N_words++;
    }
    for (i=0; i<N_starts; i++) {
	y=starts[i];
	while (*y!='\0') y++;
	y--;
	N_puncs = 0;
	while ((y >= starts[i]) && ((*y==',') || (*y==';') || (*y==':'))) {
	    if (N_puncs > 49) {
		printf("Too many punctuation marks\n");
		return FALSE;
	    }
	    punc[N_puncs] = *y;
	    N_puncs++;
	    y--;
	}
	y++;
	*y='\0';
	y = starts[i];
	/* see if it contains an "'". */
	while((*y != '\'') && (*y != '\0')) y++;
	if ((i==0) && (isupper(*starts[i]))) {
	    *starts[i] = tolower(*starts[i]);
	    word_there = boolean_dictionary_lookup(starts[i]);
	    *starts[i] = toupper(*starts[i]);
	} else {
	    word_there = boolean_dictionary_lookup(starts[i]);
	}
	if ((*y == '\'') && !word_there &&
	      ((strcmp(y,"\'")==0) || (strcmp(y,"\'s")==0))) {
	    *y = '\0';
	    if (!issue_sentence_word(starts[i])) return FALSE;
	    *y = '\'';
	    if (!issue_sentence_word(y)) return FALSE;
	} else {		
	    if (!issue_sentence_word(starts[i])) return FALSE;
	}
	pc[1] = '\0';
	for (N_puncs--; N_puncs>=0; N_puncs--) {
	    pc[0] = punc[N_puncs];
	    if (!issue_sentence_word(pc)) return FALSE;
	}
    }
    if (wall_defined) {
	return (N_words > 1);
    } else {
	return (N_words > 0);
    }
}

int isnumber(char * s) {
    if (!isdigit(*s)) return FALSE;
    for (; *s != '\0'; s++) {
	if ((!isdigit(*s)) && (*s != '.') && (*s != ',')) {
	    return FALSE;
	}
    }
    return TRUE;
}

int ispunctuation(char * s) {
    return ((strcmp(s,",")==0) || (strcmp(s,";")==0) || (strcmp(s,":")==0));
}

int ishyphenated(char * s) {
/* returns TRUE iff it's an an appropritenly formed hyphenated word
   This means all letters, numbers, or hyphens, not beginning and
   ending with a hyphen.
*/
    int hyp, nonalpha;
    hyp = nonalpha = 0;
    if (*s == '-') return FALSE;
    while (*s != '\0') {
	if (!isalpha(*s) && !isdigit(*s) && (*s!='.') && (*s!=',')
	    && (*s!='-')) return FALSE;
	if (*s == '-') hyp++;
	s++;
    }
    return ((*(s-1)!='-') && (hyp>0));
}
/*
    sentence[0].e = build_word_expressions(WALL_WORD);
    if (sentence[0].e == NULL) {
	printf("Your dictionary needs a definition of the word \"%s"\n",
	         WALL_WORD);
	return FALSE;
    }
*/

void free_sentence_disjuncts(void) {
    int i;
    for (i=0; i<N_words; i++) {
	free_disjuncts(sentence[i].d);
    }
}

void free_sentence_expressions(void) {
    int i;
    for (i=0; i<N_words; i++) {
	free_X_nodes(sentence[i].x);
    }
}

int special_string(int i, char * s) {
    X_node * e;
    if (boolean_dictionary_lookup(s)) {
	sentence[i].x = build_word_expressions(s);
	for (e = sentence[i].x; e != NULL; e = e->next) {
	    e->string = sentence[i].string;
	}
	return TRUE;
    } else {
	printf("To process this sentence your dictionary ");
	printf("needs the word \"%s\".\n", s);
	return FALSE;
    }
}

int build_sentence_expressions(void) {
/* Corrects case of first word, fills in other proper nouns, and
   builds the expression lists for the resulting words.

Algorithm:  
    Apply the following step to all words w:
         if w is in the dictionary, use it.
         else if w is upper case use PROPER_WORD disjuncts for w.
         else if it's hyphenated, use HYPHENATED_WORD
         else if it's a number, use NUMBER_WORD.

    Now, we correct the first word, w.
         if w is upper case, let w' be the lower case version of w.
             if both w and w' are in the dict, concatenate these disjncts.
	     else if w' is in dict, use disjuncts of w'
	     else leave the disjuncts alone
         else leave w alone.
*/
    int i;
    int first_word;  /* the index of the first word after the wall */
    char * s, *t;
    X_node * e;
    if (wall_defined) {
	first_word = 1;
    } else {
	first_word = 0;
    }

    for (i=0; i<N_words; i++) {
	s = sentence[i].string;
	if (boolean_dictionary_lookup(s)) {
	    sentence[i].x = build_word_expressions(s);
	} else if (isupper(s[0])) {
	    if (!special_string(i,PROPER_WORD)) return FALSE;
	} else if (ispunctuation(s)) {
	    if (strcmp(s,",")==0) {
		if (!special_string(i,COMMA_WORD)) return FALSE;
	    } else if (strcmp(s,":")==0) {
		if (!special_string(i,COLON_WORD)) return FALSE;
	    } else {
		if (!special_string(i,SEMICOLON_WORD)) return FALSE;
	    }
	} else if (isnumber(s)){
            /* we know it's a plural number, or 1 */
            /* we'll only be here if 1's not in the dictionary */
	    if (!special_string(i,NUMBER_WORD)) return FALSE;
	} else if (ishyphenated(s)) {
	    /* singular hyphenated */
	    if (!special_string(i,HYPHENATED_WORD)) return FALSE;
	} else if (i == first_word) {
	    sentence[i].x = NULL; /* probably an upper-case first word */
	} else {
	    assert(FALSE, "A word which should be there is not.");
	}
    }

    s = sentence[first_word].string;
    if (isupper(s[0])) {
	t = (char *) xalloc(strlen(s)+1);
	free_this_string_later(t, strlen(s)+1);
	strcpy(t,s);
	t[0] = tolower(t[0]);
	if (boolean_dictionary_lookup(t)) {
	    if (boolean_dictionary_lookup(s)) {
		e = build_word_expressions(t);
		sentence[first_word].x =
		  catenate_X_nodes(sentence[first_word].x, e);
	    } else {
		s[0] = tolower(s[0]);
		e = build_word_expressions(s);
		free_X_nodes(sentence[first_word].x);
		sentence[first_word].x = e;
	    }
	}
    }

/* The only difference between the code above and that below is that
   in the code above: if the first word is capitalized version of a lower
   case word in the dictionary, then it changes it to lower case in the
   sentence[first_word].string.  The version below never changes
   this array.  The change is only made in a new string that's allocated.
*/
/*
    s = sentence[first_word].string;
    if (isupper(s[0])) {
	t = (char *) xalloc(strlen(s)+1);
	free_this_string_later(t, strlen(s)+1);
	strcpy(t,s);
	t[0] = tolower(t[0]);
	if (boolean_dictionary_lookup(t)) {
	    e = build_word_expressions(t);
	    if (boolean_dictionary_lookup(s)) {
		sentence[first_word].e =
		  catenate_disjuncts(sentence[first_word].e, e);
	    } else {
		free_disjuncts(sentence[first_word].e);
		sentence[first_word].e = e;
	    }
	}
    }
*/

    return TRUE;
}

void print_statistics(void) {
    printf("\n");
    printf("%d parse cost\n", parse_cost);
    printf("%d mark cost\n", mark_cost);
    printf("%d in hash table\n", N_in_table);
    printf("%d hash lookups\n", N_hash_lookups);
    printf("%f cost per lookup\n",(float)work_in_hash_lookups/N_hash_lookups);
    printf("%d maximum bytes ever in use\n", max_space_in_use);
    printf("%d unreclaimed space\n", space_in_use);
}

void clean_up_string(char * s) {
/* gets rid of all the white space in the string s.  Changes s */
    char * x, * y;
    y = x = s;
    while(*x != '\0') {
	if (!isspace(*x)) {
	    *y = *x; x++; y++;
	} else {
	    x++;
	}
    }
    *y = '\0';
}

struct {char * s; int * p;} user_variable[] = {
              {"verbosity",           &verbosity},
	      {"screen_width",        &screen_width},
	      {"display_on",          &display_on},
	      {"display_multiple",    &display_multiple},
	      {"display_short",       &display_short},
	      {"display_postscript",  &display_postscript},
	      {"display_links",       &display_links},
	      {"display_bad",         &display_bad},
	      {"display_fat",         &display_fat},
	      {"echo_on",             &echo_on},
	      {"",                    NULL}
	  };

int is_numerical_rhs(char *s) {
/* return TRUE if s points to a number:
   optional + or - followed by 1 or more
   digits.
*/
    if (*s=='+' || *s == '-') s++;
    do {
	if (!isdigit(*s)) return FALSE;
	s++;
    } while (*s != '\0');
    return TRUE;
}

void special_command(char * s) {
    char *x, *y;
    clean_up_string(s);
    s++;  /* pass up the ! sign */

    if (strcmp(s,"dictionary") == 0) {
	print_dictionary_words();
        return;
    }
    if (strcmp(s,"graphics") == 0) {
	if (display_on = !display_on) {
	    printf("Display turned on.\n");
	} else {
	    printf("Display turned off.\n");
	}
        return;
    }
    if (strcmp(s,"multiple") == 0) {
	if (display_multiple = !display_multiple) {
	    printf("Start displaying extracted sentences.\n");
	} else {
	    printf("Stop displaying extracted sentences.\n");
	}
        return;
    }
    if (strcmp(s,"graphics-height") == 0) {
	if (display_short = !display_short) {
	    printf("Display made short.\n");
	} else {
	    printf("Display made tall.\n");
	}
        return;
    }
    if (strcmp(s,"postscript") == 0) {
	if (display_postscript = !display_postscript) {
	    printf("Start generating postscript data.\n");
	    display_on = TRUE;
	} else {
	    printf("Stop generating postscript data.\n");
	}
        return;
    }
    if (strcmp(s,"links") == 0) {
	if (display_links = !display_links) {
	    printf("List of links will be shown.\n");
	} else {
	    printf("The list of links will not be shown.\n");
	}
        return;
    }
    if (strcmp(s,"bad-linkages") == 0) {
	if (display_bad = !display_bad) {
	    printf("Display all linkages.\n");
	} else {
	    printf("Don't display linkages with P.P. violations.\n");
	}
        return;
    }
    if (strcmp(s,"fat-linkages") == 0) {
	if (display_fat = !display_fat) {
	    printf("Display linkages containing fat links.\n");
	} else {
	    printf("Don't display linkages containing fat links.\n");
	}
        return;
    }
    if (strcmp(s,"echo") == 0) {
	if (echo_on = !echo_on) {
	    printf("Echoing input turned on.\n");
	} else {
	    printf("Echoing input turned off.\n");
	}
        return;
    }
    if (strcmp(s,"quit") == 0) {
	if (!files_need_saving()) {
	    exit(1);
	} else {
	    printf("You have changed some files and not saved them.\n");
	    printf("Either save your files with \"!save\" and then quit,\n");
	    printf("or get out without saving your files with control-C.\n");
	}
	return;
    }
    if (strcmp(s,"save") == 0) {
	save_files();
	return;
    }
    if (strcmp(s, "show-variables")==0) {
	int i;
	printf("      Variable name       Value  \n");
	printf("      -------------       -----  \n");
	for (i=0; user_variable[i].p != NULL; i++) {
	    printf("      ");
	    left_print_string(user_variable[i].s, "                    ");
	    printf("%5d\n", *user_variable[i].p);
	}
	return;
    }
    if (s[0] == '!') {
    /* display the information about the given word */
	Dict_node * dn;
	Disjunct * d1, * d2;
	int len;
/*	if ((dn=abridged_lookup(s+1)) == NULL) {  */
	if ((dn=dictionary_lookup(s+1)) == NULL) {
	    printf("    \"%s\" matches nothing in the dictionary.\n", s+1);
	    return;
	}
	printf("Matches:\n");
	for (;dn != NULL; dn = dn->right) {
	    len=0;
	    d1 = build_disjuncts_for_dict_node(dn);
	    for(d2 = d1 ; d2!=NULL; d2 = d2->next){
		len++;
	    }
	    free_disjuncts(d1);
	    printf("          ");
	    left_print_string(dn->string, "                  ");
	    printf(" %5d  ", len);
	    if (dn->file != NULL) {
		printf("<%s>", dn->file->file);
	    }
	    printf("\n");
	}
	return;
    }
    if (*s == '-') {
	/* deleting of a word (or words) from the dictionary */
	Dict_node * dn, *dn1;
	int count;
	dn=abridged_lookup(s+1);
	count = 0;
	if (dn == NULL) {
	    printf("    \"%s\" matches nothing in the dictionary.\n", s+1);
	    return;
	}
/*
This is the version that will not delete nodes from the main dictionary.
To reinstall, the function "find_one_non_idiom_node" must also be changed.

        for (dn1 = dn; dn1 != NULL; dn1 = dn1->right) {
	    if (dn1->file != NULL) count++;
	}
	if (count==0) {
	 printf("    \"%s\" matches only words in the main dictionary.\n",s+1);
	 return;
	}
	printf("Deleting:\n");
	for (;dn != NULL; dn = dn->right) {
	    if (dn->file == NULL) continue;
	    printf(" %s ", dn->string);
	}
*/	
        for (dn1 = dn; dn1 != NULL; dn1 = dn1->right) {
	    if (dn1->file == NULL) count++;
	}
	if (count != 0) {
	    printf("Note: \"%s\" ", s+1);
	    printf("matches the following words in the main dictionary:\n");
	    for (dn1 = dn; dn1 != NULL; dn1 = dn1->right) {
		if (dn1->file == NULL) printf(" %s ", dn1->string);
	    }
	    printf("\n");
	}
	printf("Deleting:\n");
	for (dn1 = dn; dn1 != NULL; dn1 = dn1->right) {
	    printf(" %s ", dn1->string);
	}

	if (!delete_dictionary_words(s+1)) {
	    assert(FALSE, "Attempt to delete a word not in the dictonary.");
	}
	printf("\n");
	return;
    }
    /* test here for an equation */
    for (x=s; (*x != '=') && (*x != '\0') ; x++)
      ;
    if (*x == '=') {
	Dict_node * dn, * dn_new;
	int i;
	*x = '\0';
	y = x+1;
	x = s;
	/* now x is the first word and y is the rest */

	if (is_numerical_rhs(y)) {
	    for (i=0; user_variable[i].p != NULL; i++) {
		if (strcmp(x, user_variable[i].s) == 0) break;
	    }
	    if (user_variable[i].p == NULL) {
		printf("There is no user variable called \"%s\".\n", x);
	    } else {
		*(user_variable[i].p) = atoi(y);
		printf("%s set to %d\n", x, atoi(y));
	    }
	    return;
	}
	if (is_idiom_word(x) || is_idiom_word(y)) {
	    printf("    words ending \".Ix\" (x a number) ");
	    printf("are reserved for idioms\n");
	    return;
	}
	if ((dn=abridged_lookup(x)) != NULL) {
	    printf("    \"%s\" matches words already in the dictionary:\n",s);
	    for (;dn != NULL; dn = dn->right) {
		printf("%s ", dn->string);
	    }
	    printf("\n");
	    return;
	}
	if ((dn=abridged_lookup(y)) == NULL) {
	    printf("    \"%s\" is not in the dictionary.\n", y);
	    return;
	}
	if ((dn->right != NULL) || (strcmp(y,dn->string) != 0)) {
	    printf("    Please be more precise.  \"%s\" matches:\n", y);
	    for (;dn != NULL; dn = dn->right) {
		printf("%s ", dn->string);
	    }
	    printf("\n");
	    return;
	}
	if (dn->file == NULL) {
	    printf("    \"%s\" is in the dictionary but not in a file.\n",y);
	    printf("    This word will not be saved.\n");
	}
	dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
	dn_new->string = (char *) xalloc(strlen(x)+1);
	strcpy(dn_new->string, x);
	dn_new->left = dn_new->right = NULL;
	dn_new->file = dn->file;
	dn_new->exp = dn->exp;
	dict_root = insert_dict(dict_root, dn_new);
	printf("    \"%s\" added to the dictionary.\n", x);
	N_dict++;
	if (dn->file != NULL) dn->file->changed = TRUE;
	return;
    }
    if (strcmp(s,"help") == 0) {
#define P(s) {printf(s);printf("\n");}	
P("Changing the state:");
P("                                                                         ");
P("  !links            *Toggle links display.                               ");
P("  !bad-linkages     *Toggle the display of linkages with P.P violations. ");
P("  !fat-linkages     *Toggle the display of linkages with fat links.      ");
P("  !multiple         *Toggle the display of expanded sentences.           ");
P("  !graphics         *Toggle graphical display.                           ");
P("  !graphics-height  *Toggle the height of the graphics display.          ");
P("  !echo             *Toggle echoing of the input.                        ");
P("  !postscript       *Toggle the generation of postscript data.           ");
P("  !<var>=<number>   *Set the specified variable to the specified number. ");
P("                     This applies to all the above variables             ");
P("                     and a few others.                                   ");
P("                                                                         ");
P("Other commands:                                                          ");
P("                                                                         ");
P("  !show-variables   *Print name and value of all user-settable variables.");
P("  !save             *Save all changes to the word files.                 ");
P("  !dictionary       *Print all the words of the dictionary.              ");
P("  !help             *This display.                                       ");
P("  !quit             *Exit the program.                                   ");
P("  !!<string>        *Print all the dictionary words matching <string>.   ");
P("                     Also print the number of disjuncts of each.         ");
P("  !-<string>        *Delete all the dictionary words matching <string>.  ");
P("  !<string>=<word>  *This indicates that a new word (<string>) is to be  ");
P("                     added to the dictionary.  Its definition will be the");
P("                     same as that of <word>, and, if appropriate, it will");
P("                     be added to the word file of <word>.                ");
#undef P
        return;
    }
    printf("Try \"!help\".\n");
}

int sentence_in_dictionary(void){
    int w, ok_so_far;
    char * s;

    ok_so_far = TRUE;
    for (w=0; w<N_words; w++) {
	s = sentence[w].string;
	if (!boolean_dictionary_lookup(s) &&
	    !isupper(s[0]) &&
	    !ishyphenated(s) &&
	    !ispunctuation(s) &&
	    !isnumber(s)) {
	    if (ok_so_far) {
		printf("The following words are not in the dictionary:");
		ok_so_far = FALSE;
	    }
	    printf(" \"%s\"", sentence[w].string);
	}
    }
    if (!ok_so_far) printf("\n");
    return ok_so_far;
}

int sentence_contains(char * s) {
/* Returns TRUE if one of the words in the sentence is s */
    int w;
    for (w=0; w<N_words; w++) {
	if (strcmp(sentence[w].string, s) == 0) return TRUE;
    }
    return FALSE;
}

void set_is_conjunction(void) {
    int w;
    char * s;
    for (w=0; w<N_words; w++) {
	s = sentence[w].string;
	is_conjunction[w] = ((strcmp(s, "and")==0) || (strcmp(s, "or" )==0) ||
			     (strcmp(s, "but")==0) || (strcmp(s, "nor")==0));
    }
}

int sentence_contains_conjunction(void) {
/* Return true if the sentence contains a conjunction.  Assumes
   is_conjunction[] has been initialized.
*/
    int w;
    for (w=0; w<N_words; w++) {
	if (is_conjunction[w]) return TRUE;
    }
    return FALSE;
}

int conj_in_range(int lw, int rw) {
/* Returns true if the range lw...rw inclusive contains a conjunction     */
    for (;lw <= rw; lw++) {
	if (is_conjunction[lw]) return TRUE;
    }
    return FALSE;
}

void build_deletable(void) {
/* Initialize the array deletable[i][j] to indicate if the words           */
/* i+1...j-1 could be non existant in one of the multiple linkages.  This  */
/* array is used in conjunction_prune and power_prune.  Regions of length  */
/* 0 are always deletable.  A region of length two with a conjunction at   */
/* one end is always deletable.  Another observation is that for the       */
/* comma to form the right end of a deletable region, it must be the case  */
/* that there is a conjunction to the right of the comma.  Also, when      */
/* considering deletable regions with a comma on their left sides, there   */
/* must be a conjunction inside the region to be deleted. Finally, the     */
/* words "either", "neither", "both", "not" and "not only" are all         */
/* deletable.                                                              */
   
    int i,j,k;
    for (i=0; i<N_words; i++) {
	deletable[i] = (char *) xalloc(N_words+1);
	for (j=0; j<N_words+1; j++) {
	    if (j == i+1) {
		deletable[i][j] = TRUE;
	    } else if ((j>i+2)&&(is_conjunction[i+1] || is_conjunction[j-1] ||
                                (strcmp(",",sentence[i+1].string)==0 && conj_in_range(i+2,j-1)) ||
 	                        (strcmp(",",sentence[j-1].string)==0 && conj_in_range(j,N_words-1)))){
		deletable[i][j] = TRUE;
	    } else if (j > i) {
		for (k=i+1; k<j; k++) {
		    if ((strcmp("either", sentence[k].string) == 0) ||
			(strcmp("neither", sentence[k].string) == 0) ||
			(strcmp("both", sentence[k].string) == 0) ||
			(strcmp("not", sentence[k].string) == 0)) continue;
		    if ((strcmp("only", sentence[k].string)==0) && (k > i+1) &&
                                   (strcmp("not", sentence[k-1].string)==0)) continue;
		    break;
		}
		deletable[i][j] = (k==j);
	    }
	    else {
		deletable[i][j] = FALSE;
	    }
	}
    }
}

void free_deletable(void) {
    int w;
    for (w=0; w<N_words; w++) {
	xfree((char *)deletable[w],N_words+1);
    }
}

void install_fat_connectors(void) {
/* Installs all the special fat disjuncts on all of the words of the   */
/* sentence */  
    int i;
    for (i=0; i<N_words; i++) {
	if (is_conjunction[i]) {
	    sentence[i].d = catenate_disjuncts(sentence[i].d,
			       build_AND_disjunct_list(sentence[i].string));
	} else {
	    sentence[i].d = catenate_disjuncts(sentence[i].d,
			       explode_disjunct_list(sentence[i].d));
	    if (strcmp(sentence[i].string, ",") == 0) {
		sentence[i].d = catenate_disjuncts(sentence[i].d,
					       build_COMMA_disjunct_list());
	    }
	}
    }
}

int compare_parse(Linkage_info * p1, Linkage_info * p2) {
/* for sorti
ng the linkages */
    if (p1->N_violations != p2->N_violations) {
	return (p1->N_violations - p2->N_violations);
    } else if (p1->disjunct_cost != p2->disjunct_cost) {
	return (p1->disjunct_cost - p2->disjunct_cost);
    } else if (p1->and_cost != p2->and_cost) {
	return (p1->and_cost - p2->and_cost);
    } else {
	return (p1->link_cost - p2->link_cost);
    }
}

#if ! defined FOR_RELEASE

static double when_last_called;
static double when_parse_started;

double current_useage_time(void) {
/* returns the current useage time clock in seconds */
    struct rusage u;
    getrusage (RUSAGE_SELF, &u);
    return (u.ru_utime.tv_sec + ((double) u.ru_utime.tv_usec) / 1000000.0);
}

void clear_time(void) {
/* This clears the time variables */    
    when_last_called = when_parse_started = current_useage_time();
}

void print_time(char * s) {
/* print out the cpu ticks since this was last called */
    double new_t;
    new_t = current_useage_time();
    if (verbosity > 0) {
	printf("++++");
	left_print_string(s, "                                           ");
	printf("%7.2f seconds\n", new_t - when_last_called);
    }
    when_last_called = new_t;
}

void print_total_time(void) {
/* print out the cpu ticks since this was last called */
    double new_t;
    new_t = current_useage_time();
    if (verbosity > 0) {
	printf("++++");
	left_print_string("Total","                                           ");
	printf("%7.2f seconds\n", new_t - when_parse_started);
    }
    when_parse_started = new_t;
}

#else
void clear_time(void) {
}
void print_time(char * s) {
    if (verbosity > 0) {
	printf("++++%s\n", s);
    }
}
void print_total_time(void) {
}

#endif


void prepare_to_parse() {
/* assumes that the sentence expression lists have been generated     */
/* this does all the necessary pruning and building of and            */
/* structures.                                                        */
    int i;
    if (echo_on) print_sentence(2);
    if (verbosity > 1) {
	printf("Raw expression sizes:\n");
	print_expression_sizes();
    }
    expression_prune();
    print_time("Done expression pruning");
    build_sentence_disjuncts();
    if (verbosity > 1) {
	printf("After expanding expressions into disjuncts:\n");
	print_disjunct_counts();
    }
    print_time("Built disjuncts");

    for (i=0; i<N_words; i++) {
	sentence[i].d = eliminate_duplicate_disjuncts(sentence[i].d);
    }
    print_time("Eliminated duplicate disjuncts");
    if (verbosity > 1) {
	printf("\nAfter expression pruning and duplicate elimination:\n");
	print_disjunct_counts();
    }

    set_is_conjunction();

    if (sentence_contains_conjunction()) {
	build_deletable();
	power_prune(GENTLE);
	if (verbosity > 1) {
	    printf("\nAfter Gentle power pruning:\n");
	    print_disjunct_counts();
	}
	print_time("Finished gentle power pruning");
	N_in_table = N_hash_lookups = work_in_hash_lookups = mark_cost = 0;
	conjunction_prune();
	if (verbosity > 1) {
	    printf("\nAfter conjunction pruning:\n");
	    print_disjunct_counts();
	    print_statistics();
	}
	free_deletable();
	print_time("Done conjunction pruning");
	build_conjunction_tables();
	install_fat_connectors();
	construct_either();       /* special connectors for "either" */
	construct_neither();      /* special connectors for "neither" */
	construct_notonlybut();   /* special connectors for "not...but..." */
	                          /* and               "not only...but..." */
	construct_both();         /* special connectors for "both...and..." */
	construct_comma();        /* special connectors for extra comma */
	if (verbosity > 1) {
	    printf("After conjunctions, disjuncts counts:\n");
	    print_disjunct_counts();
	}
	print_time("Constructed fat disjuncts");

	prune();
	print_time("Pruned fat disjuncts");

	for (i=0; i<N_words; i++) {
	    sentence[i].d = eliminate_duplicate_disjuncts(sentence[i].d);
	}
	if (verbosity >1) {
	    printf("After pruning and duplicate elimination:\n");
	    print_disjunct_counts();
	}
	print_time("Eliminated duplicate disjuncts (again)");

	if (verbosity > 1) print_AND_statistics();
    }

    power_prune(RUTHLESS);

    if (verbosity >1) {
	printf("\nAfter power-pruning:\n");
	print_disjunct_counts();
    }
    print_time("Ruthlessly power pruned");
    
    N_in_table = N_hash_lookups = work_in_hash_lookups = parse_cost = 0;
    
    init_fast_matcher();
    init_table();
    print_time("Initialized fast matcher and hash table");
}

void unprepare_parse(void) {
/* frees all the tables used for parsing.   */
    free_sentence_disjuncts(); /* won't work with folded connectors  */
    free_sentence_expressions();
    if (sentence_contains_conjunction()) free_AND_tables();
    free_fast_matcher();
    free_strings();
    free_lookup_list();  /* only necessary to make space_useage
			    statistic come out to zero.
			    (It will be freed later anyway) */
    free_table();
}

int compute_linkage_arrays(int index) {
/*  Simply calls extract_links(index) and set_has_fat_down().
    returns TRUE if there is a fat link in this linkage, FALSE
    otherwise
*/
    extract_links(index);
    return (set_has_fat_down());
}

void loop(void) {
    int i, N_to_display, index;
    int N_linkages;
    int N_canonical_linkages;
    int N_valid_linkages;
    int has_fatlinks;
    Linkage_info * linkage=NULL;
    char s[MAX_LINE], message[80];
    int s_set = FALSE;

    for(;;) {

	free_lookup_list(); /* to prevent spurious space-use message */
	free_strings();     /* ditto */
	max_space_in_use = 0;
	space_in_use = 0;

	clear_time();  /* initialize the timing variables         */
	               /* the above freeing operatins will not be */
	               /* counted anywhere                        */

	if (!s_set) {
	    printf("> ");
	    if (fgets(s, MAX_LINE, stdin) == NULL) exit(1);
	}
	s_set = FALSE;
	if (s[strlen(s)-1] != '\n') {
	    printf("Your sentence is too long\n");
	    continue;
	}
	if (s[0] == '!') {
	    special_command(s);
	    continue;
	}
	if (!separate_sentence(s)) continue;
	if (!sentence_in_dictionary()) continue;
	if (!build_sentence_expressions()) continue;

	print_time("Constructed disjunct expressions");

	prepare_to_parse();
	N_linkages = parse();

	sprintf(message, "Counted %d linkges", N_linkages);
	print_time(message);

	N_canonical_linkages = N_valid_linkages = 0;

	if (N_linkages != 0) {
	    linkage = (Linkage_info *)xalloc(N_linkages*sizeof(Linkage_info));

	    for (index=0; index < N_linkages; index++) {
		has_fatlinks = compute_linkage_arrays(index);
		if (has_fatlinks) {
		    if (!is_canonical_linkage()) continue;
		    linkage[N_canonical_linkages]=analyze_fat_linkage(FALSE);
		} else {
		    linkage[N_canonical_linkages]=analyze_thin_linkage(FALSE);
		}
		if (linkage[N_canonical_linkages].N_violations == 0) {
		    N_valid_linkages++;
		}
		linkage[N_canonical_linkages].index = index;
		N_canonical_linkages ++;
	    }
	    qsort((void *)linkage, N_canonical_linkages, sizeof(Linkage_info),
	      (int (*)(void *, void *))compare_parse);
	}

	assert(! ((N_canonical_linkages == 0) && (N_linkages > 0)),
	       "None of the linkages is canonical");

	print_time("Postprocessed all linkages");

	s_set = FALSE;
	if (N_canonical_linkages == 0) {
	    printf("Not accepted (no linkage exists)\n");
	} else if (N_canonical_linkages==1) {
	    if  (N_valid_linkages == 0) {
		printf("Not accepted");
		printf(" (unique linkage with P.P. violation)\n");
	    } else {
		printf("Accepted\n");
	    }
	} else {
	    if (N_valid_linkages == 0) {
	      printf("Not accepted");
	      printf(" (%d linkages, all with P.P. violations)\n",
		     N_canonical_linkages);
	    } else {
		printf("Accepted (%d linkages, %d with no P.P. violations)\n",
		       N_canonical_linkages, N_valid_linkages);
	    }
	}

	if (display_bad) {
	    N_to_display = N_canonical_linkages;
	} else {
	    N_to_display = N_valid_linkages;
	}

	if (display_on) {
	    for (index=0; index < N_to_display; index++) {
		has_fatlinks = compute_linkage_arrays(linkage[index].index);
		compute_chosen_words();
		if (N_canonical_linkages > 1) {
		    printf("  Linkage %d", index+1);
		} else {
		    printf("  Unique linkage");
		}
		if (linkage[index].N_violations > 0) {
		    printf(" (bad), ");
		} else {
		    printf(", ");
		}
		printf("cost vector = (%d, %d, %d)\n",
		       linkage[index].disjunct_cost,
		       linkage[index].and_cost,
		       linkage[index].link_cost);
		if (has_fatlinks) {
		    (void) analyze_fat_linkage(TRUE);
		} else {
		    (void) analyze_thin_linkage(TRUE);
		}
		if (index == N_to_display-1) break; 
		printf("(press return for another)\n");
		printf("> ");
		if (fgets(s, MAX_LINE, stdin) == NULL) exit(1);
		for (i=0; s[i] != '\0'; i++) if (!isspace(s[i])) break;
		if (s[i] != '\0') {
		    s_set = TRUE;
		    break;
		}
	    }
	}
	if (N_linkages != 0) {
	    xfree((char *) linkage, N_linkages*sizeof(Linkage_info));
	}
	print_time("Displayed linkages");
	unprepare_parse();
	print_time("Freed all data structures");

	if (verbosity > 1) print_statistics();
	if (space_in_use != 0){
	    printf("Warning: %d bytes of space leaked.\n", space_in_use);
	}
	print_total_time();
    }
}

void batch_process(void) {
    int i; 
    int N_linkages;
    int N_canonical_linkages;
    int N_valid_linkages;
    int index;
    int has_fatlinks;
    Linkage_info * linkage=NULL;

    char s[MAX_LINE];
    int errors;
    int mc;  /* the magic char */
    
    errors = 0;

    for(;;) {
	free_lookup_list(); /* to prevent spurious space-use message */
	free_strings();     /* ditto */

	max_space_in_use = 0;
	space_in_use = 0;
	clear_time();

	if (fgets(s, MAX_LINE, stdin) == NULL) break;
	if (s[strlen(s)-1] != '\n') {
	    printf("Sentence too long\n");
	    continue;
	}

	for (i=0; s[i]!='\0'; i++) {
	    if (s[i] == '%') {
		s[i] = '\0';
		break;
	    }
	}

	for (i=0; isspace(s[i]); i++)
	  ;
	if (s[i] == '\0') continue;
	if ((s[i] == '*') || (s[i] == '+') || (s[i] == '#') || (s[i] == ':')) {
	    mc = s[i];
	    s[i] = ' ';
	} else {
	    mc = ' ';
	}
	 
	if (s[0] == '!') {
	    special_command(s);
	    continue;
	}
	if (!separate_sentence(s)) continue; /* blank lines are not an error */
	if (!sentence_in_dictionary()) {
	    errors++;      /* an error if a word is not in the dictionary */
	    printf("--- in the following sentence:\n");
	    printf("%c ", mc);
	    print_sentence(2);
	    printf("\n");
	    continue;
	}
	if (!build_sentence_expressions()) {
	    errors++;
	    continue;
	}
	print_time("Constructed disjunct expressions");
	if (echo_on) printf("%c ", mc);
	prepare_to_parse();

	N_linkages = parse();
	print_time("Counted linkages");

	N_canonical_linkages = N_valid_linkages = 0;

	if (N_linkages != 0) {
	    linkage = (Linkage_info *)xalloc(N_linkages*sizeof(Linkage_info));

	    for (index=0; (index < N_linkages) && (N_valid_linkages == 0); index++) {
		/* note that we break after finding the first valid linkage. */
		has_fatlinks = compute_linkage_arrays(index);
		if (has_fatlinks) {
		    if (!is_canonical_linkage()) continue;
		    linkage[N_canonical_linkages]=analyze_fat_linkage(FALSE);
		} else {
		    linkage[N_canonical_linkages]=analyze_thin_linkage(FALSE);
		}
		if (linkage[N_canonical_linkages].N_violations == 0) {
		    N_valid_linkages++;
		}
		linkage[N_canonical_linkages].index = index;
		N_canonical_linkages ++;
	    }
	    qsort((void *)linkage, N_canonical_linkages, sizeof(Linkage_info),
	                             (int (*)(void *, void *))compare_parse);
	}

	print_time("Postprocessed some linkages");

	if (N_canonical_linkages == 0) {
	    if (!((mc == '*') || (mc == '#'))) {
		errors++;
		if (!echo_on) {
		    printf("--- No linkage for: ");
		    if (echo_on) printf("%c ", mc);
		    print_sentence(13);
		} else {
		    printf("--- No linkage for this\n");
		}
	    }
	} else if (N_valid_linkages == 0) {
	    if (!((mc == '+') || (mc == '*'))) {
		if (N_canonical_linkages == 1) {
  	        printf("--- unique linkage, with post-processing violation\n");
		} else {
		    printf("--- %d linkages", N_canonical_linkages);
		    printf(", all with post-processing violations\n");
		}
		errors++;
		has_fatlinks = compute_linkage_arrays(linkage[0].index);
		compute_chosen_words();
		if (has_fatlinks) {
		    (void) analyze_fat_linkage(TRUE);
		} else {
		    (void) analyze_thin_linkage(TRUE);
		}
	    }
	} else if ((mc == '+') || (mc == '*') || (mc == '#')) {
	    errors++;
	    if (!echo_on) {
		printf("--- Input: ");
		printf("%c ", mc);
		print_sentence(13);
	    } else {
		printf("--- Accepted:\n");
	    }
	    has_fatlinks = compute_linkage_arrays(linkage[0].index);
	    compute_chosen_words();
	    if (has_fatlinks) {
		(void) analyze_fat_linkage(TRUE);
	    } else {
		(void) analyze_thin_linkage(TRUE);
	    }
	} else if ((linkage[0].disjunct_cost == 0) && (mc == ':')) {
	    errors++;
	    if (!echo_on) {
		printf("--- Input: : ");
		print_sentence(13);
	    } else {
		printf("--- Accepted:\n");
	    }
	    printf("p.p. violations: %d, disjunct cost: %d\n",
		   linkage[0].N_violations, linkage[0].disjunct_cost);
	    has_fatlinks = compute_linkage_arrays(linkage[0].index);
	    compute_chosen_words();
	    if (has_fatlinks) {
		(void) analyze_fat_linkage(TRUE);
	    } else {
		(void) analyze_thin_linkage(TRUE);
	    }
	}

	if (N_linkages != 0) {
	    xfree((char *) linkage, N_linkages*sizeof(Linkage_info));
	}

	fflush(stdout);
	print_time("Displayed linkages");
        unprepare_parse();
	print_time("Freed all data structures");
	
	if (verbosity > 1) print_statistics();
	if (space_in_use != 0){
	    printf("Warning: %d bytes of space leaked.\n", space_in_use);
	}
	print_total_time();
    }
    if (errors == 0) {
	printf("No errors!\n");
    } else if (errors == 1) {
	printf("One error.\n");
    } else {
	printf("%d errors.\n", errors);
    }
}

void main(int argc, char** argv) {
    int batch;

/*    argc = ccommand(&argv);   this  is for the mac */
    if (argc == 2) {
	batch = FALSE;
    } else if ((argc==3) && (strcmp(argv[2], "-batch") == 0)) {
	batch = TRUE;
    } else {
	printf("Useage: %s dictionary-file [-batch]\n",argv[0]);
	exit(1);
    }

    open_dictionary(argv[1]);
    printf("\n Reading the dictionary files: ");
    read_dictionary();
    printf("\n %d words stored in %d bytes\n\n\n", N_dict, space_in_use);

    printf("\
                      Welcome to the Link Parser                       \n\
                     (Version of March 26th 1992)                     \n\
         \n");
    printf("          ");
    if ((((int) time(NULL)) % 2) == 0) {
	printf("Copyright 1991, 1992 Daniel Sleator and Davy Temperley\n\n");
    } else {
	printf("Copyright 1991, 1992 Davy Temperley and Daniel Sleator\n\n");
    }

    init_randtable();

    wall_defined = boolean_dictionary_lookup(WALL_WORD);
    postprocess_defined = boolean_dictionary_lookup(POSTPROCESS_WORD);
    andable_defined = boolean_dictionary_lookup(ANDABLE_CONNECTORS_WORD);

    if (andable_defined) {
	init_andable_hash_table();
    }

    if (batch) {
	printf("Processing sentences in batch mode\n");
	printf("\n");
	batch_process();
    } else {
	printf("Type your sentence and press Return ");
	printf("(\"!help\" for options).\n");
	printf("\n");
	loop();
    }
}
