/* wpblock.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

#ifndef LINT
static char *RcsId = "@(#) $Id: wpblock.c,v 1.6 92/08/24 00:22:03 lee Exp $";
#endif

/* This file defines
 * unsigned long Putpblock(t_WordInfo *WordInfo; t_pblock *pblock);
 * 
 * unsigned long PutWordPlaces(
 *    t_WordPlace *WordPlaces;
 *    t_WID WID;
 *    unsigned char *Block;
 *    unsigned BlockLength;
 *    unsigned long NextOffset;
 *    unsigned long NextLength,
 *    unsigned long NumberToWrite;
 * );
 *
 */

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <sys/types.h>
#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "wordrules.h"
#include "pblock.h"
#include "putbyte.h"

/** Unix system calls that need to be declared: **/

/** C library functions that need to be declared: **/

/** lqtext library functions that need to be declared: **/
extern void MkWIBH();
extern void SortWordPlaces();
extern void FlushBlock();
extern int _PutByte(), _PutLong();
extern unsigned long FindFreeBlock();

#ifdef ASCIITRACE
extern void fprintWordInfo(/*stream, W, Caller*/);
#endif

/** Functions within this file that need to be declared: **/
unsigned long PutWordPlaces();
/** **/


#ifdef ASCIITRACE
extern int AsciiTrace;
#endif

/* Write out an entire (presumably new) data entry, and
 * return a disk pointer to the start of the chain
 */
unsigned long
Putpblock(WordInfo, pblock)
    t_WordInfo *WordInfo;
    t_pblock *pblock;
{
    unsigned long BlockLength;

    /* Assume that we can discard the PairBlock in WordInfo --
     * it was a pointer to a static buffer anyway.
     */

    if (WordInfo->DataBlock) {
	WordInfo->DataBlock = (char *) 0;
    }

    WordInfo->Offset = pblock->ChainStart =
				FindFreeBlock(WordInfo->WID, &BlockLength);
    (void) MkWIBH(WordInfo, pblock);

    (void) PutWordPlaces(
	pblock->WordPlaces,
	WordInfo->WID,
	(unsigned char *) WordInfo->WordPlaceStart,
	(unsigned) WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock),
	WordInfo->Offset,
	BlockLength,
	pblock->NumberOfWordPlaces
    );

    return WordInfo->Offset;
}

/** CAUTION - this comment might be out of date CAUTION *********************
 ** WordPlaces are now stored as sequences, as follows:
 **  FID*2 -- 1, 2, 3 (usually, for FID) or 4 bytes			1-5
 **  (very, very occasionaly a variable-size number may be 5 bytes long.)
 **   . the bottom bit in the stored number determines whether there
 **     is more than one FID to follow
 ** Number of following places (only if prev. bit was 1) -- 1 byte	0-1
 ** For each following entry:-
 **   . for each of the following places:
 **     Block In File (long, 1-5 bytes, usually 1)			1-5
 **     Word In Block -- always 1 byte					1-1
 **		the bottom bit of this says if there are flags
 **     Flags -- always 1 byte, if present				0-1
 **	    (flags stored only if different from previous entry)
 **	    Stuff Before -- 1 byte					0-1
 **	    (if there are no flags, there's no Stuff Before byte, and
 **     we use the default value of 1)
 **
 ** Hence:	each sub-place takes from 2 to 9 bytes;
 **		each Place sequence takes from 3
 **		to (4 + 1) + 255 * (2..7) bytes.
 **		In most (I guess > 7/10) cases, flags will be 0, and
 **		StuffBefore will be the default of 1.
 **
 ** In practice, though, we store the difference since the last block-in-file,
 ** and the difference since the last FID, so that the numbers are usually
 ** on the small side.
 **
 **	I am hoping, of course, that the extra information stored is
 ** worth while!
 **	It might be possible to coalesce WordInBlock and BlockInFile using
 ** delta modulation -- i.e., storing the increment from the previous.  In
 ** this case, a flag bit could mean that those two values each occupy a
 ** nibble in a single byte.  Or, I could use a single byte, like this:
 **	[a b c d e f g h]
 **	a == 1 --> (efgh) is word in block inc., (bcd is block in file inc)
 ** but I need to do some real measurements to figure out how best to save
 ** space.  It really is worth while keeping the format as simple as I can,
 ** as this speeds retrieval.
 **
 **/

unsigned long
PutWordPlaces(WordPlaces, WID, Block, BlockLength, NextOffset, NextSize, NumberToWrite)
    t_WordPlace *WordPlaces;
    t_WID WID;
    unsigned char *Block;
    unsigned BlockLength;
    unsigned long NextOffset;
    unsigned long NextSize;
    unsigned long NumberToWrite;
{
    unsigned char *q = Block;
    unsigned long L;
    int CurrentPlace = 0;
    unsigned long LastStart = 0L;
    t_FID LastFID = 0;
    unsigned long LastBlock = 0L;
    unsigned char LastFlags = 0;

#ifdef ALWAYSSORT
    /* Sort the pblock to simplify subsequent accesses,
     * and also to allow more space-efficient encoding, recording the change
     * (increment) since the previous FID or Block in the list, instead of
     * the actual number.  The WriteNumber package works much better if
     * most numbers are small.
     */
    if (NumberToWrite > 1) {
	SortWordPlaces(NumberToWrite, WordPlaces);
    }
#endif

    while (CurrentPlace < 0 || CurrentPlace < NumberToWrite) {
	unsigned short NumberOfRepeats;
	unsigned char U;
	t_FID FID = WordPlaces[CurrentPlace].FID;
	int LastPlace;

	if (FID == 0) {
	    Error(E_BUG, "PutWordPlaces WID %ld, FID %ld is Zero!",
		WID, CurrentPlace
	    );
	}

	/* Determine the number of Places in the same file;
	 * note that we can write at most 255 in the same place, so
	 * longer stretches are broken up into clumps of 255.
	 * This is a reasonable tradeoff, I think.  The alternative would
	 * be to write NumberOfRepeats as a long, and lose if there were
	 * (say) between 64 (old, 127 new) and 255 of them.  This case only
	 * occurs once in the New Testament anyway, and presumably is
	 * generally quite rare.
	 */
	NumberOfRepeats = 0;
	LastPlace = CurrentPlace;
	while (NumberOfRepeats < 255) {
	    if (LastPlace >= NumberToWrite) {
		break;
	    } else if (WordPlaces[LastPlace].FID != FID) {
		break;
	    }
	    ++NumberOfRepeats;
	    ++LastPlace;
	}

	L = (FID - LastFID) << 1;
	LastFID = FID;
	if (NumberOfRepeats > 1) L |= 01L;
	if (PutLong(L, WID, &q, &Block, &BlockLength,
				    &LastStart, &NextOffset, &NextSize) < 0) {
	    return CurrentPlace;
	}
	if (L & 01L) {
	    if (PutByte(NumberOfRepeats, WID, &q, &Block, &BlockLength,
				    &LastStart, &NextOffset, &NextSize)  < 0) {
		return CurrentPlace;
	    }
	}

	LastBlock = 0;

	for (; NumberOfRepeats != 0; --NumberOfRepeats) {
	    if (CurrentPlace > NumberToWrite) {
		Error(E_BUG,
		"Word %ld: Entry for file %lu has more matches than expected",
								WID, FID);
	    }
	    /* block number */
#ifdef ASCIITRACE
	    if (WordPlaces[CurrentPlace].BlockInFile < LastBlock) {
		Error(E_BUG,
		    "PutWordPlaces Sort WID %ld failed, backwards blocks",
		    WID
		);
	    } else if (CurrentPlace &&
		    (WordPlaces[CurrentPlace].FID ==
			    WordPlaces[CurrentPlace - 1].FID) &&
		    (WordPlaces[CurrentPlace].BlockInFile == LastBlock) &&
		    (WordPlaces[CurrentPlace].WordInBlock <=
			    WordPlaces[CurrentPlace - 1].WordInBlock)) {
		Error(E_BUG,
"PutWordPlaces Sort WID %ld failed, FID %ld: Blk %d: WIB %d <= %d",
		    WID, FID, LastBlock, WordPlaces[CurrentPlace].WordInBlock,
		    WordPlaces[CurrentPlace - 1].WordInBlock
		);
	    }
#endif /* ASCIITRACE */
	    L = WordPlaces[CurrentPlace].BlockInFile - LastBlock;
	    LastBlock += L;

	    if (PutLong(L, WID, &q, &Block, &BlockLength,
				&LastStart, &NextOffset, &NextSize) < 0) {
		return CurrentPlace;
	    }
	    U = (WordPlaces[CurrentPlace].WordInBlock << 1);
	    if (WordPlaces[CurrentPlace].StuffBefore != 1) {
		WordPlaces[CurrentPlace].Flags |= WPF_HASSTUFFBEFORE;
	    }
	    if (WordPlaces[CurrentPlace].Flags != LastFlags) {
		U |= 01;
	    }

	    if (U > 255) {
		Error(E_BUG, "WID %lu: WordInBlock (0%o) from FID %lu too big",
			WID, U, FID);
	    }

	    if (PutByte(U, WID, &q, &Block, &BlockLength,
				&LastStart, &NextOffset, &NextSize) < 0) {
		return CurrentPlace;
	    }
	    if (U & 01) {
		LastFlags = WordPlaces[CurrentPlace].Flags;
		if (PutByte(LastFlags, WID, &q, &Block, &BlockLength,
				&LastStart, &NextOffset, &NextSize) < 0) {
		    return CurrentPlace;
		}
	    }

	    /* Even if there are flags, there still might not be a separate
	     * entry for the number of preceding skipped bytes.
	     */
	    if (WordPlaces[CurrentPlace].Flags & WPF_HASSTUFFBEFORE) {
		if (PutByte(WordPlaces[CurrentPlace].StuffBefore, WID, &q,
			&Block, &BlockLength,
				&LastStart, &NextOffset, &NextSize) < 0) {
		    return CurrentPlace;
		}
	    }
	    ++CurrentPlace;
	}
	if (CurrentPlace > LastPlace) {
	    Error(E_BUG, "PutWordPlaces: CurrentPlace %ld > LastPlace %ld",
						    CurrentPlace, LastPlace);
	}
    }
    if (LastStart) {
	/* NextStart had better not be non-zero, but FlushBlock will
	 * take care of it (we have wasted a block in that case!).
	 * LastStart is zero if we fitted it all inside the WordInfo
	 * block.
	 */
	FlushBlock(
	    (char *) Block,
	    q - Block,
	    &NextOffset,
	    &LastStart,
	    WID
	);
    }
    return NumberToWrite;
}
