/* block.c -- Copyright 1991 Liam R. Quin.  All Rights Reserved.
 * This code is NOT in the public domain.
 * See the file COPYRIGHT for full details.
 */

/* The low-level physical Word Database for lq-text.
 *
 * $Id: block.c,v 1.7 92/08/24 00:21:33 lee Exp $
 */

/* TODO
 * split this up into individual functions, or at any rate put
 * _GetByte and _PutByte separately, same for _Longs.
 */

#include <stdio.h> /* stderr, also for fileinfo.h */
#include <fcntl.h>
#include <sys/types.h>

#include "globals.h" /* defines and declarations for database filenames */
#include "error.h"

#include "fileinfo.h" /* for wordinfo.h */
#include "wordinfo.h"
#include "pblock.h"
#include "numbers.h"
#include "wordrules.h"
#include "putbyte.h"

extern t_WordInfo *WID2WordInfo();
extern t_WID Word2WID();

/** C library functions that need to be declared: **/

/** lqtext library functions that need to be declared: **/
extern unsigned int FindFreeBlock();

/** Functions within this file that need to be declared: **/

/** **/

#ifdef ASCIITRACE
extern int AsciiTrace;
#endif

extern char *ReadBlock();
extern void WriteBlock();

/* Layout of the physical index database
 * =====================================
 *
 * This file is the only interface to the database of FID/Offset pairs.
 *
 * The db is organised in blocks arranged in Tagliatelli format: a linked
 * list of blocks for each WID; there is a list for each WID in the Word
 * Index.  The Word Index contains the block number of the start of the
 * chain.
 *
 * A separate file, FreeListFile (by default "freefile") contains one bit for
 * every block in the data file -- if the bit is set, the corresponding block
 * is in use.
 * Block 0 of data is never used to store wordplaces; it's intended to use it
 * for concurrency/locking in the future.  As a result, the first bit of the
 * first byte of the freelist is always set...
 *
 * block 1... first data block:
 * +---------------------------
 * | bytes 0...3: Offset of next block in this chain
 * | The (FID, Offset) pairs follow, in compressed format.
 * |
 * block 2... next data block (either the start of a new chain, or a
 * continuation of some other chain.  Or maybe unused, especially if files
 * have been deleted).
 *
 * The block header is described by t_BlockHeader.  It's awfully simple,
 * but it used to be more complex.
 *
 * If lq-text was compiled with -DWIDINBLOCK, each block also contains the
 * WID for which it was written, for better checking.  This only really
 * makes sense with -DASCIITRACE, as otherwise the debugging and tracing
 * code gets compiled out!
 *
 */

#include "blkheader.h"

static unsigned char pblockBuffer[8192 * 2]; /*TODO use CACHELEN */
/* TODO: malloc() pblockBuffer or (better) eliminate it altogether */

void
FlushBlock(Block, ByteCount, NextOffset, LastStart, WID)
    char *Block;
    int ByteCount;
    unsigned long *NextOffset, *LastStart;
    t_WID WID;
{
    if (*LastStart && Block) {
	/*NOSTRICT*/
	t_BlockHeader *BH = (t_BlockHeader *) Block;
	register char *p;
	int BlocksToWrite = (ByteCount + BLOCKSIZE - 1) / BLOCKSIZE;
							/* round up! */

	BH->NextOffset = (*NextOffset);
#ifdef WIDINBLOCK
	BH->WID = WID;
#endif
	/* pad with -1 for future compatibility */
	for (p = &Block[BLOCKSIZE*BlocksToWrite - 1]; p > &Block[ByteCount]; p--) {
	     *(unsigned char *)p = (unsigned char) 0xff;
	}
	WriteBlock(*LastStart, Block, BlocksToWrite);
    }
    if (*NextOffset) {
	/* char *p;*/
	t_BlockHeader *BH;

	Error(E_INTERNAL|E_WARN, "FlushBlock %ld != 0 (from %ld, WID %ld)",
					*NextOffset, *NextOffset, WID);
	/* p = ReadBlock(*NextOffset); */
	BH = (t_BlockHeader *) Block;
	BH->NextOffset = 0L;
#ifdef WIDINBLOCK
	BH->WID = WID;
#endif
	WriteBlock(*NextOffset, Block, BH->NumberOfBlocks);
    }
    *LastStart = *NextOffset = 0L;
}

/* This is simply to help keep the source lines getting too long! */
typedef unsigned char *UCP;

int
_PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock, NextLength)
    unsigned char Byte;
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned int* BlockLength;
    unsigned long *NextBlock;
    unsigned long *NextLength;
    unsigned long *LastStart; /* for writing the linked list */
{
    t_BlockHeader *BH;

    if (*sp - (*Blockp) >= (*BlockLength)) {
	int NumberOfBlocks = (*BlockLength + BLOCKSIZE - 1) / BLOCKSIZE;

	if (!*NextBlock && !*LastStart) return -1; /* only do the 1st block */
	if (*NextBlock == (unsigned long) 0) {
	    *NextBlock = FindFreeBlock(WID, BlockLength);
	} else {
	    if (NextLength && *NextLength) {
		*BlockLength = (*NextLength);
		*NextLength = 0;
	    } else {
		Error(E_BUG,
		    "_PutByte: NextLength is %s",
		    NextLength ? "zero" : "a null pointer"
		);
	    }
	}

	/* Complete the information in the previous block, if required */
	if (*LastStart) {
	    BH = (t_BlockHeader *) (*Blockp);
#ifdef WIDINBLOCK
	    BH->WID = WID;
#endif
	    BH->NextOffset = (*NextBlock);
	    /* Write the old block */
	    WriteBlock(*LastStart, *Blockp, NumberOfBlocks);
	    *LastStart = 0L;
	}
	*LastStart = (*NextBlock);
	(*NextBlock) = 0L; /* after this point NextLength is ignored */
	*Blockp = pblockBuffer; /* Use static (to this file) data buffer */
	/*NOSTRICT*/
	BH = (t_BlockHeader *) (*Blockp);
	BH->NumberOfBlocks = (unsigned char) (*BlockLength/BLOCKSIZE); /*exact*/
	(*sp) = (UCP) BH->Data;
    }
    **sp = Byte;
    (*sp)++;
    return 0;
}

unsigned char
_GetByte(WID, sp, Blockp, BlockLength, NextBlock)
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned long *BlockLength;
    unsigned long *NextBlock;
{
    t_BlockHeader *BH;

    if (*sp - (*Blockp) >= (*BlockLength)) {
	if (*NextBlock == (unsigned long) 0) {
	    (*Blockp) = (*sp) = (UCP) 0;
	    Error(E_BUG,
		"_GetByte: WID %ld: database is corrupt, Next is zero", WID);
	} else {
	    (*sp) = (*Blockp) = (UCP) ReadBlock(*NextBlock);
	}
	/* Check the new block */
	if ((*Blockp) == (UCP) 0) {
	    Error(E_BUG, "_GetByte: Database corrupt, %lu, sigh.", *NextBlock);
	}
	/*NOSTRICT*/
	BH = (t_BlockHeader *) (*Blockp);
#ifdef WIDINBLOCK
	if (BH->WID != WID) {
	    Error(E_BUG, "GetByte: Block %ld has WID %ld, not WID %ld",
		*NextBlock, BH->WID, WID);
	}
#endif
	*BlockLength = BLOCKSIZE * BH->NumberOfBlocks;
	*NextBlock = BH->NextOffset;
	(*sp) = (UCP) BH->Data;
    }
    return *((*sp)++);
}

/* PutLong -- write a long number in compressed/abbreviated form into a
 * string.  If this moves the string pointer beyond the block, write out
 * the block and start a new one.  In that case, the number written may well
 * span the gap between the blocks.  We use an overflow buffer to copy
 * the bytes (if any) that overflowed into it.
 * Then we write them at the start of the next block.
 *
 * This routine returns -1 and writes a partial number (no allocated block)
 * if *LastBlock and *NextBlock are zero.  This allows PutwOrdPlaces to be
 * called to put the WordPlaces into the WIDFILE block without writing out
 * an entire chain.
 */
int
_PutLong(Long, WID, sp, Blockp, BlockLength, LastStart, NextBlock, NextLength)
    unsigned long Long;
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned *BlockLength;
    unsigned long *NextBlock;
    unsigned long *NextLength;
    unsigned long *LastStart; /* for writing the linked list */
{
    t_BlockHeader *BH;
    unsigned char Buffer[sizeof(unsigned long) + 2];
    unsigned char *Bufp = Buffer;
    unsigned char *p;
 
    sWriteNumber((char **) sp, Long);
 
    if (*sp - *Blockp > *BlockLength) { /* gone too far! */
        int NumberOfBlocks;
 
        if (!*NextBlock && !*LastStart) return -1;
 
        NumberOfBlocks = (*BlockLength + BLOCKSIZE - 1) / BLOCKSIZE;
 
        /* Save the overflow in Buffer:
         * the 1st 1 or more characters will fitted into the old block,
         * but we need them all in a lump for readnumber().
         * When we write the next block, we need to put the overflow
         * characters into the start of the next block.
         */
        for (p = &(*Blockp)[*BlockLength]; p < (*sp); p++) {
            *Bufp++ = *p;
        }
        if (*NextBlock == (unsigned long) 0) {
            *NextBlock = FindFreeBlock(WID, BlockLength);
        } else {
            if (NextLength && *NextLength) {
                *BlockLength = (*NextLength);
                *NextLength = 0;
            } else {
                Error(E_BUG,
                    "_PutLong: NextLength is %s",
                    NextLength ? "zero" : "a null pointer"
                );
            }
        }
        /* Complete the information in the previous block, if required */
        if (*LastStart) {
            BH = (t_BlockHeader *) (*Blockp);
            BH->NextOffset = *NextBlock;
#ifdef WIDINBLOCK
            BH->WID = WID;
#endif
            /* Write the old block */
            WriteBlock(*LastStart, *Blockp, NumberOfBlocks);
        }
        *LastStart = (*NextBlock);
        (*NextBlock) = 0L;
        *Blockp = pblockBuffer;
        BH = (t_BlockHeader *) (*Blockp);
        (*sp) = (UCP) BH->Data;
        BH->NumberOfBlocks = *BlockLength / BLOCKSIZE; /* always exact */
        /* Now write the stuff from Buffer into the new block */
        if (Bufp > Buffer) {
            for (p = Buffer; p < Bufp; p++) {
                *((*sp)++) = (*p);
            }
        }
    }
    return 0;
}


/* This is the reverse of PutLong.
 * Things are slightly complicated by the need to provide sReadNumber
 * with a contiguous copy of all of the bytes in a number that spanned
 * a gap between data blocks.
 */
unsigned long
_GetLong(WID, sp, Blockp, BlockLength, NextBlock)
    t_WID WID;
    unsigned char **sp;
    unsigned char **Blockp;
    unsigned *BlockLength;
    unsigned long *NextBlock;
{
    unsigned char Buffer[sizeof(unsigned long) + 2];
    long Result;
    t_BlockHeader *BH;
    unsigned char *NumberStart = (*sp);
    unsigned char *p;
 
    Result = sReadNumber(sp);
 
    /* Now, have we fallen off the end of the block? */
    if ((*sp) - (*Blockp) > (*BlockLength)) {
        unsigned char *bp = Buffer;
 
        if (*NextBlock == (unsigned long) 0) {
            return 0L;
        }
 
        /* Copy the first half of the number into the overflow buffer */
        for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) {
            *bp++ = *p;
        }
 
        /** Now:
         ** . sp is garbage, as is NumberStart, as they point at the old
         **   data block
         ** . Buffer contains the first few bytes of the number
         ** . we need some more bytes, but don't yet know how many, as
         **   this depends on the number representation
         **   NOTE that we must have, however, that we know that there
         **   are more bytes, so that we know if we need the next block.
         ** . bp points 1 beyond the end of the 1st half of the number.
         **/
 
        (*sp) = *Blockp = (UCP) ReadBlock(*NextBlock);
        /* Check the new block */
        if ((*Blockp) == (UCP) 0) {
            Error(E_BUG,
                "GetLong: WID %ld: Database corrupt, *NextBlock zero", WID);
        }
        BH = (t_BlockHeader *) *Blockp;
#ifdef WIDINBLOCK
        if (BH->WID != WID) {
            Error(E_BUG, "GetLong: Block %ld has WID %ld, not WID %ld",
                *NextBlock, BH->WID, WID);
        }
#endif
        *BlockLength = BLOCKSIZE * BH->NumberOfBlocks;
        *NextBlock = BH->NextOffset;
        (*sp) = (UCP) BH->Data;
        /* Fill up the buffer from the new block */
        for (p = bp; p - Buffer < sizeof(Buffer) - 1; p++) {
            *p = *(*sp)++;
        }
        /* read the number from the buffer */
        (*sp) = Buffer;
        /* Try that number again... */
        Result = sReadNumber(sp);
        /* Now put sp where it should be.  Part of the buffer was
         * from the old block...
         */
        (*sp) = (UCP) BH->Data + ((*sp) - bp);
    }
    return Result;
}

