/* Phrase.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/*
 * Deal with (WID, FID, Offfset) triples
 * Liam Quin, September 1989
 *
 * $Id: Phrase.c,v 1.14 92/07/30 22:25:52 lee Exp $
 *
 */

/** Unix system calls that need to be declared: **/
extern void exit();
/** Unix/C Library Functions: **/
extern unsigned int sleep();
#ifndef tolower
 extern int tolower();
#endif
/** lqtext functions: **/
extern int TooCommon();
extern char *UnFlag();
/** functions within this file that need forward declarations **/
static int CheckFlags();
/** **/

#include "globals.h" /* defines and declarations for database filenames */

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <fcntl.h>
#include <malloc.h>
#include <sys/types.h>
#include <ctype.h>
#ifdef BSD
# include <strings.h>
#else
# include <string.h>
#endif

#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "pblock.h"
#include "phrase.h"
#include "wordrules.h"

#include "emalloc.h"

#ifndef STREQ
# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl))))
#endif

#ifndef new
# define new(type) ((type *) emalloc(sizeof (type)))
#endif

#ifndef MAXPHRASELEN
# define MAXPHRASELEN 2000
#endif

extern int AsciiTrace;

t_Phrase *
String2Phrase(String)
    char *String;
{
    extern t_WordInfo *WID2WordInfo();
    extern t_WID Word2WID();
    extern char *WordRoot();

    t_Phrase *Result;
    t_PhraseItem **ThisWord;
    /* (* 3 because in the worst case, "a a a" expands to "[a] [a] [a]") */
    register char *p;
    register char *q;
    char *LastStart = 0;
    char *PrevLastEnd = (char *) 0;
    int InWord = 0;
    int Flags = 0;
    int FoundLetters = 0;

    if (AsciiTrace > 50) {
	fprintf(stderr, "String2Phrase(%s)\n", String);
    }
    Result = (t_Phrase *) emalloc(sizeof(t_Phrase));
    Result->Next = (t_Phrase *) 0;
    p = Result->ModifiedString = emalloc(strlen(String) * 3 + 1);

    Result->HasUnknownWords = 0;

    *(ThisWord = &Result->Words) = (t_PhraseItem *) 0;

    /* March along the supplied phrase, looking for keywords.
     * surround unindexed or short words with [brackets].
     * Also converts to lower case and strips plurals.
     */
    for (q = String; /*LOTSOFTIMES*/; q++) {

	if (AsciiTrace > 50) {
	    fputc(*q, stderr);
	}

	if (!InWord && !StartsWord(*q)) {
	    if (!*q) {
		break;
	    } else {
		if (!LastStart) continue;
	    }
	}

	if (!InWord) {
	    LastStart = q;
	    if (StartsWord(*q)) {
		InWord = 1;
	    }
	    continue;
	} else if (isalpha(*q)) {
	    /* in a word and found letters, so remember in case we skip
	     * this word...
	     */
	    FoundLetters = 1;
	}

	/* ASSERT: inword == 1 */

	if (*q == '\'') {
	    if (!WithinWord(q[1])) {
		InWord = 0;
	    }
	}

	if (!*q || !WithinWord(*q)) {
	    InWord = 0;
	}


	if (LastStart && !InWord) {
	    int Length = q - LastStart;
	    int UsedABracket = 0;

	    if (p > Result->ModifiedString) *p++ = ' ';

	    /* we have reached the end of a word, is it long enough? */
	    if (!FoundLetters) {
		*p++ = '[';
		UsedABracket = 1;
	    } else if (Length < MinWordLength) {
		*p++ = '[';
		UsedABracket = 1;
		if (FoundLetters) {
		    Flags |= WPF_LASTHADLETTERS;
		    FoundLetters = 0;
		}
	    } else {
		t_WID WID;
		t_WordInfo *W;
		char SaveEnd = (*q);
		t_WordInfo TryRoot;
		register char *p2;
		char RootBuffer[MaxWordLength + 1];
		char *R = RootBuffer;

		/* Add the word to the chain, too: */
		*q = '\0';

		FoundLetters = 0; /* unnecessary now (?) */
		TryRoot.Length = Length;
		TryRoot.WordPlace.Flags = Flags;
		if (isupper(*LastStart)) {
		    TryRoot.WordPlace.Flags |= WPF_UPPERCASE;
		}
		Flags = 0;

		for (p2 = LastStart; *p2; p2++) {
		    *R++ = isupper(*p2) ? tolower(*p2) : *p2;
		}
		*R = '\0';
		TryRoot.Word = RootBuffer;

		R = WordRoot(&TryRoot);

		if (TooCommon(&TryRoot)) {
		    *p++ = '[';
		    *p++ = '*';
		    UsedABracket = 1;
		    Flags |= WPF_LASTWASCOMMON;
		    if (AsciiTrace > 10) {
			fprintf(stderr, " Common(%s) ", TryRoot.Word);
		    }
		} else if (!(WID = Word2WID(TryRoot.Word, TryRoot.Length)) ||
			    (W = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
		    *p++ = '[';
		    *p++ = WID ? '@' : '?';
		    UsedABracket = 1;
		    if (AsciiTrace > 10) {
			fprintf(stderr, " Unknown(%s) ", TryRoot.Word);
		    }
		    Result->HasUnknownWords++;
		} else {
		    if ((*ThisWord = new(t_PhraseItem)) == (t_PhraseItem *) 0) {
			fprintf(stderr,
			"Not enough memory for PHRASE \"%s\"", String);
			return (t_Phrase *) 0;
		    }
		    W->WordPlace.Flags |= TryRoot.WordPlace.Flags;
		    if (PrevLastEnd == (char *) 0) {
			W->WordPlace.StuffBefore = 0;
		    } else {
			W->WordPlace.StuffBefore = LastStart - PrevLastEnd;
		    }
		    PrevLastEnd = &q[1];

		    if (AsciiTrace) {
			fprintf(stderr, "Word %s --> %s, %lu matches\n",
			    LastStart,
			    UnFlag(W, W->WordPlace.Flags),
			    W->NumberOfWordPlaces);
		    }
		    /* point to the new space */
		    (*ThisWord)->Word = W;
		    (*ThisWord)->WordStart = LastStart;
		    (*ThisWord)->Next = (t_PhraseItem *) 0;
		    (*ThisWord)->SearchIndex = 0L;
		    ThisWord = &(*ThisWord)->Next;

		    /** (void) strcpy(p, R); **/
		    /** p += TryRoot.Length; **/

		    (void) strcpy(p, LastStart);
		    p += q - LastStart; /* q points one beyond the end */

		    LastStart = q;

		}
		*q = SaveEnd;
	    }
	    while (LastStart < q) {
		*p++ = *LastStart++;
	    }
	    if (UsedABracket) {
		*p++ = ']';
	    }
	    LastStart = 0;
	} /* if */
	if (!*q) break;
    } /* for */
    *p= '\0';

    if (ThisWord == &Result->Words) {
	/* There were no words in the phrase! */
	return (t_Phrase *) 0;
    }

    Result->OriginalString = emalloc(q - String + 2);
    (void) strcpy(Result->OriginalString, String);

    Result->NumberOfMatches = 0;
    Result->Matches = (t_MatchList *) 0;
    if (AsciiTrace > 1) {
	fprintf(stderr, "phrase \"%s\",\n", Result->OriginalString, String);
	fprintf(stderr, "Canonical form \"%s\"\n", Result->ModifiedString);
    }
    return Result;
}

#define MaxDistance 20

t_Answer *
GetFiles(Phrase)
    t_Phrase *Phrase;
{
    char *MakeOneDescription();

    t_Answer *Result = 0;
    t_Answer **RP = &Result;
    t_MatchList *MP;
    t_FID LastFID;
    unsigned long ThisFIDNumberOfMatches = 0L;

    if (!Phrase || !Phrase->Matches) {
	return Result;
    }

    LastFID = Phrase->Matches->Match->Where->FID;

    for (MP = Phrase->Matches; MP; MP = MP->Next) {
	if (MP->Match->Where->FID != LastFID) {
	    char *p;
	    
	    p = MakeOneDescription(LastFID, ThisFIDNumberOfMatches);

	    if ((*RP = new(t_Answer)) == (t_Answer *) 0) {
		return Result;
	    }
	    (*RP)->Answer = p;
	    RP = &(*RP)->Next;
	    *RP = (t_Answer *) 0;
	    ThisFIDNumberOfMatches = 0L;
	} else {
	    ++ThisFIDNumberOfMatches;
	}

	LastFID = MP->Match->Where->FID;
    }

    if (ThisFIDNumberOfMatches) {
	char *p = MakeOneDescription(LastFID, ThisFIDNumberOfMatches);

	if ((*RP = new(t_Answer)) == (t_Answer *) 0) {
	    return Result;
	}
	(*RP)->Answer = p;
	RP = &(*RP)->Next;
	*RP = (t_Answer *) 0;
	ThisFIDNumberOfMatches = 0L;
    }

    return Result;
}

char *
MakeOneDescription(FID, NumberOfMatches)
    t_FID FID;
    unsigned long NumberOfMatches;
{
    extern char *ctime();
    extern t_FileInfo *GetFileInfo();

    char *Date;
    char *p;
    t_FileInfo *FileInfo;
    char NumBuf[20];

    if (!FID) return (char *) 0;

    if (!(FileInfo = GetFileInfo(FID))) return (char *) 0;

    Date = ctime(&(FileInfo->Date));
    /** Tue Oct  3 00:57:11 BST 1989 **/
    Date[10] = '\0';

    (void) sprintf(NumBuf, "%lu", NumberOfMatches);

    p = emalloc((unsigned) (strlen(FileInfo->Name) + strlen(NumBuf) + 11));
    (void) sprintf(p, "%-.5s %s %s", NumBuf, Date, FileInfo->Name);
    efree(FileInfo->Name);
    efree(FileInfo);
    return p;
}

void
ResetPhraseMatch(Phrase)
    t_Phrase *Phrase;
{
    t_PhraseItem *Word;

    if (!Phrase || !Phrase->Words) return;

    for (Word = Phrase->Words; Word; Word = Word->Next) {
	Word->SearchIndex = 0;
    }
    Phrase->NumberOfMatches = 0;
}

/* Default is to check case, etc. only if given in the input phrase.
 * This is an enum from phrase.h, and only used in MakeMatches().
 */

extern t_PhraseCaseMatch PhraseMatchLevel;

long
MakeMatches(Phrase)
    t_Phrase *Phrase;
{
    /* Each word has a pointer (SearchIndex) to the last Word Place
     * that was examined.  This enables an O(NumberOfWords) search instead
     * of O(NumberOfWords * NumberOfWords) search.
     */
    static int ContinuesMatch();

    unsigned long PIFB; /* PlaceInFirstBlock */
    t_MatchList **MLPP = &(Phrase->Matches);
    t_Match **MPP;
    t_Match **OldMatch;
    t_WordPlace *pp;
    t_PhraseItem *Word;
    long Result = 0L;
    long LastResult = (-1L); /* to detect new matches */
    t_PhraseItem *LeastWord;
    int HowGood;

    if (!Phrase) {
	return 0L;
    }

    ResetPhraseMatch(Phrase);
    /* Each iteration over this list either produces a match or rejects a
     * possible phrase starting place.
     */

    if (AsciiTrace > 1) {
	fprintf(stderr, "Match(%s)\n", Phrase->ModifiedString);
    }

    /* A phrase with garbage words can't match anything */
    if (Phrase->HasUnknownWords && PhraseMatchLevel != PCM_AnyCase) {
	return 0L;
    }

    /* Ensure that the matches for the first word have been read */
    if (Phrase->Words->Word->WordPlacesInHere <
		    Phrase->Words->Word->NumberOfWordPlaces) {
	extern t_WordPlace *GetWordPlaces();
	t_WordInfo *W = Phrase->Words->Word; /* less indirection! */

	if (W->WordPlaces) {
	    (void) efree(W->WordPlaces);
	}

	W->WordPlaces = GetWordPlaces(
	    W->WID,
	    W->WordPlaceStart,
	    (unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
	    W->Offset,
	    W->NumberOfWordPlaces
	);
	W->WordPlacesInHere = W->NumberOfWordPlaces;
    }

    /* Find the word in the phrase with least matches: */
    LeastWord = Phrase->Words;
    for (Word = Phrase->Words; Word; Word = Word->Next) {
	if (Word->Word->NumberOfWordPlaces <
					LeastWord->Word->NumberOfWordPlaces) {
	    LeastWord = Word;
	}
    }

    /* For each match in the first word in the phrase: */
    for (PIFB = 0; PIFB < Phrase->Words->Word->NumberOfWordPlaces; PIFB++) {
	t_WordPlace *LastFOP = (t_WordPlace *) 0;

	/* The idea is that the next two loops are are likely to reduce
	 * considerably the number of places we have to consider in the
	 * case that the first word in the phrase has a lot of matches
	 * and there is a subsequent word with relatively few matches.
	 * Experiments suggest that this is fairly common.
	 *
	 * This is still a nearly (i.e. slightly-better-than) linear
	 * algorithm w.r.t the total number of matches in all of the
	 * words added up.  Note that I alter LeastWord->SearchIndex in
	 * one of the two loops that follow, so when WordPlaces from that
	 * word are considered, we don't have to look at any twice.
	 *
	 * In order to do better, one would have to be able to avoid
	 * looking at some or (better!) most of the WordPlaces.
	 *
	 * For example, not fetching so many from disk:
	 * if we didn't do the fetches until we needed to, and we gave
	 * GetWordPlaces a minimum FID to look for, we might be able
	 * to reduce things by (say) 15%.
	 * If all of the FIDS were stored separately, we would not
	 * have to look at the (Block, Word, Flags, StuffBefore) stuff at
	 * all, and that would be much faster.  One way to do that might be
	 * to store the list of FIDs with the word (as now), and perhaps
	 * some flags and the count of words/fid, and to store the rest
	 * in a per-file data structure.
	 *
	 * That would be a major, major hack...
	 * 					... sigh.o
	 *
	 */

	while (LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID <
		    Phrase->Words->Word->WordPlaces[PIFB].FID) {
	    if (++(LeastWord->SearchIndex) >=
					LeastWord->Word->NumberOfWordPlaces) {
		goto GiveUp;
	    }
	}

	while (Phrase->Words->Word->WordPlaces[PIFB].FID <
		LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID) {
	    if (++PIFB >= Phrase->Words->Word->NumberOfWordPlaces) {
		goto GiveUp;
	    }
	}

	/* The following comment tells Sabre_C not to moan about "if (0)" */
	/*SUPPRESS558*/
	if (0) {
GiveUp:
	    break;
	}
	/* end of attempted speed improvement */

	/* Optimistically allocate a new match: */
	if (1 || Result != LastResult) {
	    *MLPP = (t_MatchList *) emalloc(sizeof(t_MatchList));
	    (*MLPP)->Match = (t_Match *) 0;
	    OldMatch = MPP = &((*MLPP)->Match);
	    MLPP = &(*MLPP)->Next;
	    *MLPP = (t_MatchList *) 0;
	}
	LastResult = Result;

	pp = &Phrase->Words->Word->WordPlaces[Phrase->Words->SearchIndex = PIFB];
	/* When we have a partially completed match,
	 * FOP (declared below) will point to the WordPlace currently
	 * being considered to see if it extends the partial match;
	 * LastFOP points to the previous WordPlace in the match.
	 */

	/* For each word in the phrase: */
	for (Word = Phrase->Words; Word; Word = Word->Next) {
	    int GotOne = 0;

	    /* Ensure that the matches word have been read */
	    if (Word->Word->WordPlacesInHere <
			    Word->Word->NumberOfWordPlaces) {
		extern t_WordPlace *GetWordPlaces();
		t_WordInfo *W = Word->Word; /* less indirection! */

		if (W->WordPlaces) {
		    (void) efree(W->WordPlaces);
		}
		W->WordPlaces = GetWordPlaces(
		    W->WID,
		    W->WordPlaceStart,
		    (unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
		    W->Offset,
		    W->NumberOfWordPlaces
		);
		W->WordPlacesInHere = W->NumberOfWordPlaces;
	    }

	    /* For each occurrence of that word: */
	    for (; Word->SearchIndex < Word->Word->NumberOfWordPlaces;
							++Word->SearchIndex) {
		register t_WordPlace *FOP =
			&Word->Word->WordPlaces[Word->SearchIndex];
		
#if 0
		/* Speedup -- binary search to find next candidate...
		 * this is commented out because it actually seems to
		 * make things run slower!
		 */
		{
		    int low = Word->SearchIndex;
		    int high = Word->Word->NumberOfWordPlaces - 1;
		    t_WordPlace *Places = Word->Word->WordPlaces;
		    int guess = (high + low) / 2;

		    while (low < high) {
			if (Places[guess].FID < pp->FID) {
			    /* not gone far enough */
			    low = guess + 1;
			} else {
			    high = guess;
			}
			guess = (high + low) / 2;
		    }
		    if (guess != Word->SearchIndex) {
			Word->SearchIndex = guess;
			FOP = &Word->Word->WordPlaces[Word->SearchIndex];
		    }
		}
#endif

		if (!LastFOP) {
		    LastFOP = FOP;
		}

		/** So:
		 ** | int PIFB = each match in the first word in the phrase
		 ** | t_WordPlace *pp = each match in the phrase
		 **    | t_PhraseItem *Word = each word in the phrase
		 **	   | unsigned SearchIndex = each match of that word
		 **	   | t_WordPlace *FOP = each occurrence of that word
		 **
		 ** Hence, we are comparing pp and FOP, hoping that each time
		 ** round the (Word) loop we will advance FOP.
		 ** Once we have decided that FOP and pp relate to the
		 ** same file and that FOP is no earlier than pp in the
		 ** file, we must then check that FOP is advancing the
		 ** chain by comparing it to the previous element in the
		 ** list (LastFOP).
		 **
		 ** When we break from this inner list, we must either have
		 ** eliminated this particular (PIFB) as starting a match-
		 ** chain, or have decided that we have extended the
		 ** current match chain (by setting GotOne).
		 **/


		if (LastFOP == FOP) {
		    HowGood = CheckFlags(Word->Word, FOP);
		} else {
		    HowGood = ContinuesMatch(Word->Word, pp, LastFOP, FOP);
		}

		switch (HowGood) {
		case 0:
		    /* G O T C H A !!!! */
		    /* extend the HitList, since it's OK so far. */

		    *MPP = (t_Match *) emalloc(sizeof(t_Match));
		    (*MPP)->WID = Word->Word->WID;
		    (*MPP)->Where = FOP;
		    (*MPP)->Next = (t_Match *) 0;
		    MPP = &(*MPP)->Next;
		    GotOne++;
		    break;
		case 1: /* gone too far */
		    if (AsciiTrace > 10) {
			t_WordInfo WW;
			
			WW = *(Word->Word);

			if (LastFOP == FOP) {
			    /* UnFlag() returns a pointer to a static buffer,
			     * so I have to use two printf() calls here.
			     */
			    fprintf(stderr, "Reject(%s (%d) != ",
				    UnFlag(&WW, WW.WordPlace.Flags),
				    WW.WordPlace.Flags);
			    fprintf(stderr, "%s (%d)) [flags]\n",
				    UnFlag(&WW, FOP->Flags), FOP->Flags);
			} else {
			    fprintf(stderr, "Reject(%s) -- too far\n",
				UnFlag(&WW, WW.WordPlace.Flags));
			}
		    }
		    break;
		case -1:
		    continue; /* not there yet */
		default:
		    fprintf(stderr, "\n\rInternal Error %s: %d\n", __FILE__,
							__LINE__ - 1);
		    (void) sleep(4); /* for curses stuff... */
		    exit(1);
		}

		/* Remember where we got up to... so that we can extend
		 * the list when we start looking at the next word.
		 */
		LastFOP = FOP;

		if (AsciiTrace >= 4) {
		    t_WordInfo WW;
		    
		    WW = *(Word->Word);
		    /* UnFlag() returns a pointer to a static buffer */
		    fprintf(stderr, "Partial match %s",
				UnFlag(&WW, Word->Word->WordPlace.Flags));
		    fprintf(stderr, "(Word (%s,%lu,%u) in file %lu)\n",
				    UnFlag(&WW, FOP->Flags),
				    FOP->BlockInFile, FOP->WordInBlock,
				    FOP->FID
		    );
		}
		/* If we got to here, we extended the list, which is fine;
		 * otherwise, if we hit a continue, we try to carry on
		 * looking at matches of this word, and if we hit a break
		 * before we set "GotOne", we give up on this match
		 * altogether.
		 */
		break;
	    } /* For each occurrence of that word: */

	    if (!GotOne) {
		t_Match *MP;
		/* This word isn't here, so neither is the phrase found
		 * in this file starting here.
		 */

		for (MP = (*OldMatch); MP != (t_Match *) 0; /*void*/) {
		    t_Match *Next = MP->Next;

		    efree((char *) MP);
		    MP = Next;
		}

		*OldMatch = (t_Match *) 0;
		break;
	    } else {
		/* If we've reached the end of the phrase, i.e. if
		 * Word->Next is zero, we have successfully added a new
		 * phrase!
		 */
		if (Word->Next == (t_PhraseItem *) 0) {
		    if (AsciiTrace > 10) {
			fprintf(stderr, "Result now %d\n", Result + 1);
		    }
		    Result++;
		}
	    }

	} /* end for (each word in the phrase) */
    } /* end (for each FID/Offset pair in the first word */
    return Phrase->NumberOfMatches = Result;
}


static int
ContinuesMatch(QueryWord, First, Prev, New)
    t_WordInfo *QueryWord;
    t_WordPlace *First;
    t_WordPlace *Prev;
    t_WordPlace *New;
{
    /* Return Value is
     *  -1 --- if New occurs before Prev (and thus isn't part of the match)
     *  0  --- if it's the next word in the match
     *  +1 --- if we've gone past it
     * Note: you can use these values in a switch() if you want.
     */

    /* First check we are looking at the right file:
     * Have we gone far enough?
     */
    if (New->FID < First->FID) {
	return -1; /* not far enough */
    } else if (New->FID > First->FID) {
	return 1; /* too far */
    } else if (Prev == New) {
	return 0;
    }

    /* Hey everybody, they're the same!
     * That means that this might be a candidate for a MATCH!!!!
     */
    
    /* if (SimplyAnywhereWillDo) { OK; break; } */

    /* Clearly later words in the phrase can't be in earlier
     * blocks...
     */
    if (New->BlockInFile < First->BlockInFile) {
	/* Although we are in the right file, we have not
	 * yet reached the correct offset.
	 */
	return -1;
    } 

    /* If we get to here,
     * . we are in the right file
     * . we are at least as far into the file as the start
     *   of the phrase
     */

    /* Now check that we are a reasonable distance past
     * the preceding word (checking that we are not on the first
     * match in the list, of course):
     */
    if (New->BlockInFile < Prev->BlockInFile) {
	/* not gone far enough */
	return -1;
    }
    if (New->BlockInFile > Prev->BlockInFile + 1) {
	/* If they are more than one block apart, I
	 * don't believe them to be part of a phrase!
	 */
	return 1;
    }
    if (New->BlockInFile == Prev->BlockInFile) {
	/* If they are in the same block, one must be
	 * exactly one word beyond the other.  I don't
	 * think they can ever be the same, unless there
	 * is a serious bug somewhere!
	 */
	if (New->WordInBlock <= Prev->WordInBlock) {
	    /* too early in the block */
	    return -1;
	}
	switch (PhraseMatchLevel) {
	case PCM_AnyCase:
	    if (New->WordInBlock > Prev->WordInBlock + 4) {
		return 1; /* gone too far */
		/* We allow a few words slop in this case, though... */
	    }
	    break; /* clearly OK */
	case PCM_SameCase:
	case PCM_HalfCase:
	default:
	    if (New->WordInBlock > Prev->WordInBlock + 1) {
		return 1; /* gone too far */
	    }
	}
    } else {
	/* they are in adjacent blocks */
	if (New->WordInBlock > 0 ||
			!(Prev->Flags & WPF_LASTINBLOCK)) {
	    /* there is another word between them, so
	     * we have gone too far.
	     * I went to a lot of effort in addfile.c to
	     * mantain that flag, just for this!
	     */
	    return 1;
	}
    }
    /* So they are adjacent words.
     * Now, I wonder if they are plausible distances
     * apart, and whether the common words skipped are
     * the same?
     * Also, what about other flag details?
     */
    
    /* NOTDONE */
    
    /* Now we check that the word matches the given word
     * -- in other words, that possessive/plural/case
     *    is correct if required.  Do this later as it is
     *    relatively expensive I expect, and we will not
     *    usually care about case.
     *
     * Since the word is in the right place, if it fails here there
     * is no point in looking at the next word in this block!
     */

    return CheckFlags(QueryWord, New);
}

static int
CheckFlags(QueryWord, New)
    t_WordInfo *QueryWord;
    t_WordPlace *New;
{
    /* First check case */
    switch (PhraseMatchLevel) {

    default: /* defensive! */
	fprintf(stderr, "\n\rinternal error %d %s\n", __LINE__, __FILE__);
	(void) sleep(4);
	break;
    case PCM_AnyCase:
	break; /* clearly OK */
    case PCM_SameCase:
	if ((QueryWord->WordPlace.Flags & (WPF_UPPERCASE|WPF_POSSESSIVE)) != 
			(New->Flags & (WPF_UPPERCASE|WPF_POSSESSIVE))) {
	    /* The cases are different, no good */
	    return 1; /* give up on this match */
	}
	if (QueryWord->WordPlace.StuffBefore > 0) {
	    int Difference;

	    Difference = QueryWord->WordPlace.StuffBefore - New->StuffBefore;
	    if (Difference < -2 || Difference > 4) {
		return 1; /* give up on this match */
	    }
	}
	/* Now, what about skipped common words? */
	if ((New->Flags & WPF_LASTWASCOMMON) !=
		    (QueryWord->WordPlace.Flags & WPF_LASTWASCOMMON)) {
	    return 1; /* give up on this match */
	}

	/* plurals: this should be separate */
	if ((QueryWord->WordPlace.Flags & WPF_WASPLURAL) &&
				!(New->Flags & WPF_WASPLURAL)) {
	    return 1; /* give up on this match */
	}

	/* Only do this test if we are being awfully strict.
	 * Remember also that the first word in the phrase will
	 * not usually have this set.
	 */
	if ((QueryWord->WordPlace.Flags & WPF_LASTHADLETTERS) &&
			    !(New->Flags & WPF_LASTHADLETTERS)) {
	    return 1; /* give up on this match */
	}
	break;
    case PCM_HalfCase:
	/* In this case, we are lax about things, but if the
	 * user typed  plural/possessive/capital, we only
	 * match one with the same attribute.
	 */
	if ((QueryWord->WordPlace.Flags & WPF_UPPERCASE) &&
				    !(New->Flags & WPF_UPPERCASE)) {
	    if (AsciiTrace > 4) {
		fprintf(stderr, "Reject [uppercase]\n");
	    }
	    return 1; /* give up on this match */
	}

	/* plurals: this should be separate */
	if ((QueryWord->WordPlace.Flags & WPF_WASPLURAL) &&
	    !(New->Flags & WPF_WASPLURAL)) {
	    if (AsciiTrace > 4) {
		fprintf(stderr, "Reject [plural]\n");
	    }
	    return 1; /* give up on this match */
	}

	/* Now, what about skipped common words? */
	if ((QueryWord->WordPlace.Flags & WPF_LASTWASCOMMON) &&
				!(New->Flags & WPF_LASTWASCOMMON)) {
	    if (AsciiTrace > 4) {
		fprintf(stderr, "Reject [last was common]\n");
	    }
	    return 1; /* give up on this match */
	}

	/* Stuff before, if given, must be present: */
	if (QueryWord->WordPlace.StuffBefore > 1) {
	    if (New->StuffBefore < QueryWord->WordPlace.StuffBefore - 1) {
		if (AsciiTrace > 4) {
		    fprintf(stderr, "Reject [Stuff Before %d != Q%d]\n",
			    QueryWord->WordPlace.StuffBefore,
			    New->StuffBefore);
		}
		return 1;
	    } /* don't care if there is too much there, though */
	}

	if ((QueryWord->WordPlace.Flags & WPF_POSSESSIVE) &&
				   !(New->Flags & WPF_POSSESSIVE)) {
	    if (AsciiTrace > 4) {
		fprintf(stderr, "Reject [user flag]\n");
	    }
	    return 1; /* give up on this match */
	}
	break;
    }

    /* If we got here...
     *
     */
    
    return 0; /* It's all OK! */
}
