/* WordInfo.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* WordInfo.c -- handle the database of words for lq-Text.
 * 
 * lq-text keeps a master list of all of the words that have ever been
 * seen.  Currently, this is in dbm format (sdbm or ndbm).
 * For each word, there's an associated WID (a unique number), an offset
 * into the master database (see pblock.c), and possibly thesaurus info.
 *
 * $Id: WordInfo.c,v 2.23 92/08/24 00:20:41 lee Exp $
 *
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <errno.h>
#include <fcntl.h>
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <unistd.h>

#include "fileinfo.h"
#include "smalldb.h"
#include "wordindex.h"
#include "wordinfo.h"
#include "numbers.h"

#include "emalloc.h"

#include "wordrules.h" /* max word length */

#include "pblock.h"

/** declarations: **/
/** Unix system calls that need to be declared: **/
extern int open(); /* (these are not the stdio fopen and fclose) */
extern long lseek(); /* watch out for this on 16 bit (286, PDP11) systems! */
extern int read(), write();

/** Unix Library Calls that need to be declared: **/

/** lqtext Library calls that need to be declared: **/
extern void Deletepblock();
extern void lqGetFileModes();

/** Functions within this file that need to be declared: **/
t_WordInfo *MakeWordInfo();
void SlayWordInfo();

/** **/

#ifdef ASCIITRACE
extern int AsciiTrace;
#endif

#define WIDINDEXCACHELEN (WIDBLOCKSIZE*32)

static int Widfd = (-1);
static long WidPos = 0L;

static void
OpenWordIndexFile()
{
    int Flags, Modes;

    lqGetFileModes(&Flags, &Modes);

    if ((Widfd = open(WidIndexFile, Flags, Modes)) < 0) {
	Error(E_FATAL|E_SYS,
	    "Can't open WID file \"%s\" flags %d mode %d",
	    WidIndexFile, Flags, Modes
	);
    }
    WidPos = 0L;
    (void) GetMaxWID(0);
}

extern t_WID LastNextWIDVal;

t_WordInfo *
WID2WordInfo(WID)
    t_WID WID;
{
    extern t_WordPlace *GetWordPlaces(); /* pblock.c */

    char Buffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */
    char *q = Buffer;
    t_WordInfo *WP;
    int i;

    if (!WID) return (t_WordInfo *) 0;

    if (Widfd < 0) {
	OpenWordIndexFile();
    }

    /* Optimisation: if the WID is greater than the largest allocated WID,
     * there's no point in looking at the file!
     */
    if (LastNextWIDVal && WID > LastNextWIDVal) {
	if (WID > GetMaxWID()) {
	    Error(E_WARN, "WordInfo for %ld, max is %ld, request ignored",
		    WID, LastNextWIDVal
	    );
	    return (t_WordInfo *) 0;
	}
    }

    if (WidPos != (long) (WID * WIDBLOCKSIZE)) {
	WidPos = (long) (WID * WIDBLOCKSIZE);
	if (lseek(Widfd, WidPos, 0) < 0) {
	    Error(E_FATAL|E_SYS,
		"WID2WordInfo: WID %ld: lseek(%d=\"%s\", %ld, 0) failed",
		WID, Widfd, WidIndexFile, WidPos
	    );
	    return (t_WordInfo *) 0;
	}
    }

    if ((i = read(Widfd, Buffer, WIDBLOCKSIZE)) != WIDBLOCKSIZE) {
	Error(E_SYS|E_BUG,
	    "Tried to read %d bytes from %d=\"%s\", but got %d",
	    WIDBLOCKSIZE,
	    Widfd,
	    WidIndexFile,
	    i
	);
    }

    WidPos += WIDBLOCKSIZE;

    if (i != WIDBLOCKSIZE) {
	/*NOTREACHED*/
	WidPos = -1L;
	return (t_WordInfo *) 0;
    }

    {
	unsigned short L;

	if ((L = sReadNumber(&q)) == 0) {
	    Error(E_WARN,
		"WID2WordInfo: Database corrupt, WID %lu has wordlength zero",
		WID
	    );
	    return (t_WordInfo *) 0;
	}
	WP = MakeWordInfo(WID, (int) L, q);
	q += L;
    }

    WP->Offset = sReadNumber(&q) * BLOCKSIZE;
    WP->NumberOfWordPlaces = sReadNumber(&q);

    /* Now, maybe read some WordPlace tuplets: */
    Buffer[WIDBLOCKSIZE] = '\0';

#if 1
    if (q - Buffer < WIDBLOCKSIZE) {
	WP->DataBlock = emalloc(WIDBLOCKSIZE + 5);
	(void) bcopy(Buffer, WP->DataBlock, WIDBLOCKSIZE);
	WP->WordPlaceStart = &(WP->DataBlock[q - Buffer]);
	WP->WordPlaces = GetWordPlaces(
	    WP->WID,
	    q,
	    WIDBLOCKSIZE - (q - Buffer),
	    WP->Offset,
	    WP->NumberOfWordPlaces
	);
	WP->WordPlacesInHere = WP->NumberOfWordPlaces;
    } else {
	Error(E_BUG, "block too small for %ld (%s)", WP->WID, WP->Word);
    }

#else
    WP->WordPlaces = (t_WordPlace *) 0;
    if (q - Buffer < WIDBLOCKSIZE) {
	WP->DataBlock = emalloc(WIDBLOCKSIZE + 5);
	(void) bcopy(Buffer, WP->DataBlock, WIDBLOCKSIZE);
	WP->WordPlaceStart = &(WP->DataBlock[q - Buffer]);
	WP->WordPlacesInHere = 0;
    }
#endif

    /* done! */
    return WP;
}

static char PairBuffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */

/* Make WordInfo Block Header... */
void
MkWIBH(WordInfo, pblock)
    t_WordInfo *WordInfo;
    t_pblock *pblock;
{
    char *q = PairBuffer;

#ifdef ASCIITRACE
    if (AsciiTrace > 15) {
	fprintf(stderr, "\tMake info block header for %s, Offset %lu==%lu\n",
	WordInfo->Word, pblock->ChainStart, WordInfo->Offset);
    }
#endif

    sWriteNumber(&q, WordInfo->Length);
    (void) strncpy(q, WordInfo->Word, WordInfo->Length);
    q += WordInfo->Length;
    if (pblock) sWriteNumber(&q, (pblock->ChainStart / BLOCKSIZE) );
    else sWriteNumber(&q, 0L);
    sWriteNumber(&q, WordInfo->NumberOfWordPlaces);

    WordInfo->WordPlaceStart = q;
    WordInfo->DataBlock = PairBuffer;
}

/* Make WordInfo Block ... */
int
MkWIB(WordInfo, pblock)
    t_WordInfo *WordInfo;
    t_pblock *pblock;
{
    extern unsigned int PutWordPlaces();

    /* See how many pairs from the given pblock fit into WordInfo,
     * and leave them in PairBuffer...
     */

#ifdef ASCIITRACE
    if (AsciiTrace > 3) {
	fprintf(stderr, "MkWIB Make info block for %s/%d at %ld\n",
			    WordInfo->Word, WordInfo->WID, WordInfo->Offset);
    }
#endif

    MkWIBH(WordInfo, pblock);

    if (pblock == (t_pblock *) 0) {
	/* No WordPlaces to put in! */
	WordInfo->WordPlacesInHere = 0;
	return 0;
    }

    if (pblock->ChainStart != 0L) {
	Error(E_WARN,
	    "liblqtext/WordInfo.c::MkWIB() pblock->ChainStart %ld != 0",
	    pblock->ChainStart
	);
    }

    return WordInfo->WordPlacesInHere = PutWordPlaces(
	pblock->WordPlaces,
	WordInfo->WID,
	(unsigned char *) WordInfo->WordPlaceStart,
	WIDBLOCKSIZE - (WordInfo->WordPlaceStart - PairBuffer),
	0L, /* start */
	0L, /* blocksize */
	pblock->NumberOfWordPlaces
    );
}

t_WID
Word2WID(Word, Length)
    char *Word;
    unsigned int Length;
{
    DBM *db;
    datum key, data;
    char *q;
    t_WID WID;
 
    if (Length > MaxWordLength) {
	Length = MaxWordLength; /* NOTE: no trailing \0 required. */
    }

    /* contact database server */
    if ((db = startdb(WordIndex)) == (DBM *) 0) {
	Error(E_FATAL|E_SYS,
	    "Word2WID: Couldn't open Word Index (dbm) database \"%s\"",
	    WordIndex
	);
    }

    key.dptr = Word;
    key.dsize = Length;

    data = dbm_fetch(db, key);

    if (data.dptr == (char *) 0 || data.dsize == 0) {
	enddb(db);
	return (t_WID) 0;
    }

    q = data.dptr;
    WID = sReadNumber(&q);
    if (q - data.dptr != data.dsize) {
	Error(E_BUG, "Word2Wid \"%*s\" failed... got %lu", Length, Word, WID);
    }
    if (WID > LastNextWIDVal) {
	(void) GetMaxWID();
    }

    if (WID > LastNextWIDVal) {
	int i;
	int j;

	i *= 3;
	j += 2;
	Error(E_BUG, "Word2WID(%*s) value is %ld, but max WID is %ld! [garbage: %d]",
	    Length,
	    Word,
	    WID,
	    LastNextWIDVal,
	    i + j
	);
    }
    enddb(db);
    return WID;
}
    
char *
WID2Word(WID)
    t_WID WID;
{
    t_WordInfo *W;
    char *Word;

    if (WID == (t_WID) 0) {
	return (char *) 0;
    }

    if ((W = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
	return (char *) 0;
    }
    Word = W->Word;
    W->Word = (char *) 0;
    SlayWordInfo(W);
    return Word;
}

t_WID
AllocateWID(Word, Length, WID)
    char *Word;
    int Length;
    t_WID WID;
{
    DBM *db;
    char NumBuf[sizeof(t_WID) * 8/7 + 1];
    char *q = NumBuf;
    datum key, data;

    key.dptr = Word;
    key.dsize = Length;

    sWriteNumber(&q, WID);

    data.dptr = NumBuf;
    data.dsize = q - NumBuf;

    /* contact database server */
    if ((db = startdb(WordIndex)) == (DBM *) 0) {
	Error(E_FATAL|E_SYS,
	    "AllocateWID: Couldn't open dbm Word Index \"%s\"",
	    WordIndex
	);
    }

    if (dbm_store(db, key, data, DBM_REPLACE) < 0) {
	Error(E_FATAL|E_SYS,
	    "WID %ld: dbm_store of %*s failed",
	    WID,
	    Length,
	    Word
	);
    }

    enddb(db);

#ifdef ASCIITRACE
    if (AsciiTrace > 10) {
	t_WID W;

	if ((W = Word2WID(Word, Length)) != WID) {
	    Error(E_BUG,
		"AllocateWID: stored %ld, but retrieved %ld",
		WID,
		W
	    );
	}
    }
#endif

    return WID;
}

int
PutWordInfoIntoIndex(WordInfo, Offset)
    t_WordInfo *WordInfo;
    unsigned long Offset;
{

    /** Ensure that we have a physical block for WordInfo.  If
     ** we don't, there is something very wrong in our caller.
     **/

    if (WordInfo->DataBlock == (char *) 0) {
	if (Offset) {
	    Error(E_WARN|E_INTERNAL, "WordInfo corrupt for \"%s\"",
			    WordInfo->Word);
	}
	(void) MkWIB(WordInfo, (t_pblock *) 0);
    }

#if 0
    /* not needed, the callre does this */
    AllocateWID(WordInfo->Word, WordInfo->Length, WordInfo->WID);
#endif

    /** Now write the physical entry... */

    if (Widfd < 0) {
	OpenWordIndexFile();
    }

    if (WordInfo->WID > LastNextWIDVal) {
	/* how can this happen? */
	LastNextWIDVal = WordInfo->WID;
    }

    if (WidPos != (long) (WordInfo->WID * WIDBLOCKSIZE)) {
	WidPos = (long) (WordInfo->WID * WIDBLOCKSIZE);
	if (lseek(Widfd, WidPos, 0) < 0) {
	    Error(E_SYS|E_FATAL,
	        "Index \"%s\": PutWordInfoIntoIndex(%s) lseek to %ld failed",
		WidIndexFile,
		WordInfo->Word,
		WidPos
	    );
	}
    }

    if (write(Widfd, WordInfo->DataBlock, WIDBLOCKSIZE) != WIDBLOCKSIZE) {
	Error(E_SYS|E_FATAL,
	    "Index \"%s\": PutWordInfoIntoIndex(%s) write failed",
	    WidIndexFile,
	    WordInfo->Word
	);
    }
    WidPos += WIDBLOCKSIZE;

#ifdef ASCIITRACE
    if (AsciiTrace > 3) {
	t_WID w = Word2WID(WordInfo->Word, WordInfo->Length);

	if (w != WordInfo->WID) {
	    Error(E_BUG, "Word %*s Wid changed from %ld to %ld!",
		WordInfo->Length,
		WordInfo->Word,
		WordInfo->WID,
		w
	    );
	}
    }
#endif
    return 0;
}

int
DeleteWord(Word)
    char *Word;
{
    extern t_pblock *Getpblock();

    t_WID WID;
    t_WordInfo *WordInfo;
    t_pblock *tmp;

    if ((WID = Word2WID(Word, strlen(Word))) == (t_WID) 0) {
	return -1; /* not there */
    }

    /* get info from the list */
    if ((WordInfo = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
	return -1;
    }

    if ((tmp = Getpblock(WordInfo)) != (t_pblock *) NULL) {
	Deletepblock(tmp);
	(void) efree((char *)tmp);
    }

    /* delete the offset from the database, but retain the WID: */
    WordInfo->Offset = 0L;
    WordInfo->NumberOfWordPlaces = 0L;
    WordInfo->WordPlacesInHere = 0;
    PutWordInfoIntoIndex(WordInfo, 0L);
    SlayWordInfo(WordInfo);

    return 0;
}

static t_WordInfo ZeroWordinfo = {
    0,
};

/* Routines to create and destroy WordInfo structures */
INLINE t_WordInfo *
MakeWordInfo(WID, Length, Word)
    t_WID WID;
    int Length;
    char *Word; /* the word, which might not be nul-terminated */
{
    register t_WordInfo *WP;
    WP = (t_WordInfo *) emalloc(sizeof(t_WordInfo));

    *WP = ZeroWordinfo; /* structure copy */
    WP->WID = WID;

    WP->Word = emalloc(Length + 1);
    (void) strncpy(WP->Word, Word, Length);
    WP->Word[WP->Length = Length] = '\0'; /* strncpy does not add a null */

    return WP;
}

void
SlayWordInfo(WP)
    t_WordInfo *WP;
{
    if (!WP) return;
    if (WP->Word) efree(WP->Word);
    if (WP->WordPlaces) efree((char *)WP-> WordPlaces);
    efree((char *) WP);
}

#ifdef ASCIITRACE
void
fprintWordInfo(stream, W, Caller)
    FILE *stream;
    t_WordInfo *W;
    char *Caller;
{
    fprintf(stream, "%s: WordInfo 0x%x: {\n", Caller, W);
    (void) fflush(stderr);
    if (W) {
	fprintf(stream, "\tWID: %ld (%s, len %u)\n",
			W->WID, W->Word, (unsigned int) W->Length);
	fprintf(stream, "\tNumberOfWordPlaces: %lu In here: %d\n",
			W->NumberOfWordPlaces, W->WordPlacesInHere);
	fprintf(stream, "\tFID: %ld; Offset: %lu\n", W->FID, W->Offset);
	if (W->DataBlock) fprintf(stream, "\tDataBlock: 0x%x\n", W->DataBlock);
	if (W->WordPlaceStart) {
	    fprintf(stream, "\tWordPlaceStart: 0x%x\n", W->WordPlaceStart);
	}
	if (W->WordPlaces) {
	    fprintf(stream, "\tWordPlaces: 0x%x\n", W->WordPlaces);
	}
	if (W->WordPlace.FID && W->WordPlace.FID != W->FID) {
	    fprintf(stream, "\tWordPlace->FID: %ld != FID\n", W->WordPlace.FID);
	}
	fprintf(stream, "\tWordPlace: (Block: %lu; Word %u",
			W->WordPlace.BlockInFile, W->WordPlace.WordInBlock);
	if (W->WordPlace.Flags || W->WordPlace.StuffBefore) {
	    fprintf(stream, "; Flags %u", W->WordPlace.Flags);
	    fprintf(stream, "; StuffBefore: %u", (unsigned int)
					    W->WordPlace.StuffBefore);
	}
	fprintf(stream, ")\n");
	fprintf(stream, "} %s: WordInfo 0x%x\n", Caller, W);
    }
    fflush(stream);
}
#endif /*ASCIITRACE*/
