/*
 *
 * $Id: shortpack_io.c,v 2.4 1993/09/23 19:43:56 johans Exp $
 *
 */

#include <stdio.h> 
#include <stdlib.h>
#include <sys/types.h>
#include <netinet/in.h>
#ifdef _AIX
#include <net/nh.h>
#endif _AIX
#include <sys/stat.h>
#include <fcntl.h>
#include <math.h>
#include <speech.h>

#define		FALSE		0
#define		TRUE		1

#define 	ALL_ONES	0xffffffff

static short log2s[] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024,
			  2048, 4096, 8192, 16384, 32768};

/*keep*/
static char ieee_order = -1; /*-1 means not set yet*/

static char *SccsId = "@(#) %W% %D% MIT/LCS/SLS";

/* external variable declaration */
extern char *sys_errlist[];
extern int sys_nerr;
extern int errno;

/* local module header declarations */
static int find_bits_needed(short num);
static int find_next_chunk_to_compress (short *buffer, int start, 
					int num_elements, int *pbits, 
					int mask);
static int better_to_chop_here_vs_using_more_bits (int bits_for_element, 
						   int bits_for_chunk, 
						   int num_in_chunk);
static int  better_to_chop_here_and_use_fewer_bits_for_future(
		int bits_for_element, int bits_for_chunk, short *buffer, 
		short *bits_for_element_cache,  int cache_start, 
	        int start, int end, int mask);
static int pack_and_write_chunk_to_file (short *buffer, int start, 
					 int num, int bits, FILE *fp,
					 int mask);
static int read_shortpack_chunk_from_file (FILE *fp, short *buffer, int start,
					  int max_elements, int mask);
static int pack_short_array_into_buffer(short *buffer, int start, 
					int num, int bits, int mask, 
					short *compressed_data);
static int  unpack_short_array_into_buffer(short *buffer, int start, 
		   int max_elements,  int bits, int mask, 
		   short *compressed_data, int num);


/***************************************************************************
       
       shortpack_io.c
       
       Peter Daly			pdaly@goldilocks.lcs.mit.edu
       Spoken Language Systems Group
       MIT Laboratory for Computer Science
       
       Creation Date: 	12-JUL-1990
       
       This program compresses utterance files made up of short integers.  
       Refer to the manpage for more information.
       This segment comprises the "guts" of the shortpack algorithim.
       
       Revision History:
       
       08-OCT-1990	Modified to not compress files which have an odd 
       number of bytes, since they are obviously not made
       up of short integers (pdaly).
       
       28-AUG-1991	Rewrote many routines to make implementation clearer
       (mp)
       
       Compression algorithim by Mike Phillips, MIT/LCS/SLS
       
       Copyright (C) 1990, 1991	Massachusetts Institute of Technology
       All Rights Reserved
       
**************************************************************************/
     
     /*ok, here's how shortpack works:
       
       The idea is to pack short integers into as few bits as required for
       sequences of speech samples.  For each sequence, a 16 bit header is
       written that says "here comes n words using m bits per word".  The
       compression code needs to decide where to break up the sequences to
       optimize the tradeoff between transmitting fewer bits for the sequence
       vs the cost of transmitting another header for another sequence break.
       The present algorithm does an OK job, but if you went to more work in
       the compression algorithm, you could probably save a percent or two of
       the file size.
       
       If you just want to read and write shortpacked data from your
       programs, use the following two functions:
       
       read_shortpacked_data(array, num_elements, fp) - use this for reading
       shortpacked data into an array (works like fread())
       write_shortpacked_data(array, num_elements, fp) - use this for writing
       shortpacked data from an array (works like fwrite())
       
       If you want to do file-to-file compression (like the shortpack utility
       does), use write_shortpack_reading_from_file() and
       read_shortpack_writing_to_file()
       
       These routines use a circular buffer to hold data between input and
       output of the shortpacked data.
       
       The file-to-file and file-to-memory routines use the same low level
       routines (so the low level routines support the use of circular
       buffers (they use a mask before indexing the buffer array - the
       memory-to-file routines just use ALL_ONES as a mask))
       
       
       All of these routines should work with machines with either byte order
       (the routines load and dump shortpack file format data to and from the
       machines short representation).
       
       If you have any problems, let me know - mike (phillips@goldilocks.lcs.mit.edu)
       
       */
       
#define MAX_SEQUENCE_SIZE 255
       
     /*this goes through the input array, looks for chunks to pack into so
       number of bit, compresses those into a temporary array, and writes a
       header followed by the chunk of data
       */
     
int WriteShortpackedData (short *array, int num_elements, FILE *fp)
{
  int input_pointer;
  int num_in_chunk;
  int bits;
  input_pointer = 0;
  
  while(input_pointer < num_elements)
    {
      num_in_chunk = find_next_chunk_to_compress(array, input_pointer, 
						 num_elements, &bits, ALL_ONES);
      if(pack_and_write_chunk_to_file(array, input_pointer, num_in_chunk,
				      bits, fp, ALL_ONES) < 0)
	return -1;
      input_pointer += num_in_chunk;
    }
  return input_pointer;
}


int ReadShortpackedData(short *array, int num_elements, FILE *fp)
{
  int input_pointer;
  int num_in_chunk;
  
  input_pointer = 0;
  
  while(input_pointer < num_elements)
    {
      num_in_chunk =  read_shortpack_chunk_from_file(fp, array, input_pointer,
						     num_elements, ALL_ONES);
      if(num_in_chunk < 0)
	return -1;
      if(num_in_chunk == 0)
	{
	  ErrorString = "read_shortpack_chunk_from_file read 0";
	  return -1;
	}
      input_pointer += num_in_chunk;
    }
  return input_pointer;
}

#define CBUF_SIZE 512
#define CBUF_MASK 511

int WriteShortpackReadingFromFile (FILE *ifp, FILE *ofp, int swap)
{
  int i;
  short cbuf[CBUF_SIZE];
  int next_write,next_read; /*pointers_to_cbuf*/
  int num_read;
  short test_short = 1;
  int how_much_to_write;
  char *char_ptr;
  char temp_char;
  char temp_char2;
  int num_to_read;
  int num_in_chunk;
  int bits;
  
  if(ieee_order == -1) /*set this if not set already*/
    {
      ieee_order = (htons(test_short) == test_short);
    }
  
  next_read = next_write = 0;
  while(1)
    { /*first read data from file into cbuf, swapping bytes if needed*/
      if(next_read <= next_write)
	{
	  how_much_to_write = CBUF_SIZE - next_write;
	  num_read = fread(&(cbuf[next_write]), sizeof(short), how_much_to_write,
			   ifp);
	  if((num_read == 0) && (next_read == next_write)) break;
	  if(num_read < 0)
	    {
	      if( errno < sys_nerr ) {
		ErrorString = sys_errlist[errno];
	      } else {
		ErrorString = "fread failed";
	      }
	      return( -1 );
	    }
	  
	  if(!(swap ^ ieee_order))
	    {
	      char_ptr = (char*)&(cbuf[next_write]);
	      for(i=0;i<num_read;i++)
		{
		  temp_char = *char_ptr;
		  temp_char2 = *(char_ptr+1);
		  *(char_ptr++) = temp_char2;
		  *(char_ptr++) = temp_char;
		}
	    }
	  if(num_read < how_much_to_write)
	    {
	      next_write += num_read;
	    }
	  else
	    {
	      next_write = 0;
	    }
	}
      if(next_read > next_write)
	{
	  how_much_to_write = next_read - next_write;
	  num_read = fread(&(cbuf[next_write]), sizeof(short),
			   how_much_to_write, ifp);
	  if(num_read < 0)
	    {
	      if( errno < sys_nerr ) {
		ErrorString = sys_errlist[errno];
	      } else {
		ErrorString = "fread failed";
	      }
	      return -1;
	    }
	  if(!(swap ^ ieee_order))
	    {
	      char_ptr = (char*)&(cbuf[next_write]);
	      for(i=0;i<num_read;i++)
		{
		  temp_char = *char_ptr;
		  temp_char2 = *(char_ptr+1);
		  *(char_ptr++) = temp_char2;
		  *(char_ptr++) = temp_char;
		}
	    }
	  next_write += num_read;
	}
      /*now compress and write next chunk*/
      if(next_write == next_read)
	{
	  num_to_read = CBUF_SIZE;
	}
      else
	{
	  if(next_write > next_read)
	    {
	      num_to_read = next_write - next_read;
	    }
	  else
	    {
	      num_to_read = CBUF_SIZE - (next_read - next_write);
	    }
	}
      num_in_chunk = find_next_chunk_to_compress(cbuf, next_read, 
						 next_read + num_to_read, &bits,
						 CBUF_MASK);
      if(pack_and_write_chunk_to_file(cbuf, next_read, num_in_chunk, bits, ofp,
				      CBUF_MASK) < 0)
	return -1;
      next_read += num_in_chunk;
      next_read &= CBUF_MASK;
    }
  return 1;
}


int ReadShortpackWritingToFile(FILE *ifp, FILE *ofp, int swap)
{
  int i;
  short cbuf[CBUF_SIZE];
  int next_write,next_read; /*pointers_to_cbuf*/
  short test_short = 1;
  int how_much_to_write;
  int num_in_chunk;
  char *char_ptr;
  char temp_char;
  char temp_char2;
  int num_written;
  int room_in_cbuf;

  if(ieee_order == -1) /*set this if not set already*/
    {
      ieee_order = (htons(test_short) == test_short);
    }
  
  next_read = next_write = 0;
  
  while(1)
    {
      if(next_write == next_read)
	{
	  room_in_cbuf = CBUF_SIZE;
	}
      else
	{
	  if(next_write > next_read)
	    {
	      room_in_cbuf = next_write - next_read;
	    }
	  else
	    {
	      room_in_cbuf = CBUF_SIZE - (next_read - next_write);
	    }
	}
      
      num_in_chunk = read_shortpack_chunk_from_file(ifp, cbuf, next_write, 
						    next_write + room_in_cbuf, 
						    CBUF_MASK);
      if(num_in_chunk == 0)
	break;

      if(num_in_chunk < 0)
	return -1;
      
      next_write += num_in_chunk;
      next_write &= CBUF_MASK;
      
      /*write data from cbuf to file, swapping bytes if needed*/
      if(next_read >= next_write)
	{
	  how_much_to_write = CBUF_SIZE - next_read;
	  if(!(swap ^ ieee_order))
	    {
	      char_ptr = (char*)&(cbuf[next_read]);
	      for(i=0;i<how_much_to_write;i++)
		{
		  temp_char = *char_ptr;
		  temp_char2 = *(char_ptr+1);
		  *(char_ptr++) = temp_char2;
		  *(char_ptr++) = temp_char;
		}
	    }
	  num_written = fwrite(&(cbuf[next_read]), sizeof(short), 
			       how_much_to_write, ofp);
	  if(num_written < how_much_to_write)
	    {
	      ErrorString = "read_shortpack_writing_to_file: HAD_TROUBLE_WRITING!!\n";
	      return -1;
	    }
	  else
	    {
	      next_read = 0;
	    }
	}
      if(next_read < next_write)
	{
	  how_much_to_write = next_write - next_read;
	  if(!(swap ^ ieee_order))
	    {
	      char_ptr = (char*)&(cbuf[next_read]);
	      for(i=0;i<how_much_to_write;i++)
		{
		  temp_char = *char_ptr;
		  temp_char2 = *(char_ptr+1);
		  *(char_ptr++) = temp_char2;
		  *(char_ptr++) = temp_char;
		}
	    }
	  num_written = fwrite(&(cbuf[next_read]), sizeof(short), 
			       how_much_to_write, ofp);
	  if(num_written < how_much_to_write)
	    {
	      ErrorString = "read_shortpack_writing_to_file: HAD_TROUBLE_WRITING!!\n";
	      return -1;
	    }
	  next_read += how_much_to_write;
	}
    }
  return 1;
}


/*
 * find_bits_needed(): deterimine the minimum number of bits needed to to 
 * represent a number.
 */

static int find_bits_needed(short num)
{
  int i;
  
  num = abs(num);
  for (i = 14; i >= 0; i--)
    if ((num & log2s[i]) != 0)
      return (i + 1);
  
  return (0);
}


/*
 * find_next_chunk_to_compress: (compression) look ahead in the uncompressed 
 * data to determine how many bits 
 * will be needed to represent the next chunk of data.
 */

static int find_next_chunk_to_compress (short *buffer, int start, 
					int num_elements, int *pbits, 
					int mask)
{
  int i;
  int end;
  short abs_element;
  int bits_for_element;
  int bits_for_chunk;
  short bits_for_element_cache[MAX_SEQUENCE_SIZE]; /*so we don't have to recompute each time*/
  int num;
  int ind;
  
  
  if((num_elements - start) > MAX_SEQUENCE_SIZE)
    {
      end = start + MAX_SEQUENCE_SIZE;
    }
  else
    {
      end = num_elements;
    }
  num = end-start;
  for(i=0;i<num;i++) bits_for_element_cache[i] = -1;
  
  bits_for_chunk = 0;
  
  for(i=start;i<end;i++)
    {
      ind = i-start;
      if(bits_for_element_cache[ind] < 0)
	{
	  abs_element = buffer[i & mask];
	  if(abs_element < 0) abs_element *= -1;
	  bits_for_element_cache[ind] = find_bits_needed(abs_element);
	}
      bits_for_element = bits_for_element_cache[ind];
      
      if(bits_for_element > bits_for_chunk)
	{
	  if(   (i!= start) 
	     && better_to_chop_here_vs_using_more_bits(bits_for_element, 
						       bits_for_chunk, (i-start)))
	    {
	      *pbits = bits_for_chunk;
	      return (i-start);
	    }
	  else
	    {
	      bits_for_chunk = bits_for_element;
	    }
	}
      else
	if(bits_for_element < bits_for_chunk)
	  {
	    if(better_to_chop_here_and_use_fewer_bits_for_future(bits_for_element, 
								 bits_for_chunk, buffer, 
								 bits_for_element_cache, start, 
								 i, end, mask))
	      {
		*pbits = bits_for_chunk;
		return (i-start);
	      }
	  }
    }
  
  *pbits = bits_for_chunk;
  return (i-start);
}


/* this figures out how many shorts will be needed to represent output if we 
 * chop now vs how many will be needed if we increase the bits for the chunk
 * from bits_for_chunk to bits_for_element
 */

static int better_to_chop_here_vs_using_more_bits (int bits_for_element, 
						   int bits_for_chunk, 
						   int num_in_chunk)
{
  int els_so_far;
  int new_els;
  
  /* if we were to stop here and section off now.. */
  
  els_so_far = ((bits_for_chunk * num_in_chunk) % 16 == 0)
    ? bits_for_chunk * num_in_chunk / 16 : bits_for_chunk * num_in_chunk / 16 + 1;
  
  els_so_far++; /*add one for the header*/
  
  /* or to upgrade and continue? */
  
  new_els = ((bits_for_element * num_in_chunk) % 16 == 0)
    ? bits_for_element * num_in_chunk / 16 : bits_for_element * num_in_chunk / 16 + 1;
  
  if (els_so_far < new_els)
    {
      return 1;
    }
  else
    {
      return (0);
    }
}

/*
 * chop if number of bits saved for future is more than the header size.  
 * Check though to see if it will want to use less bits for for before these
 * bits are saved (for steadily decreasing amp for example)
 */

static int  better_to_chop_here_and_use_fewer_bits_for_future(
		int bits_for_element, int bits_for_chunk, short *buffer, 
		short *bits_for_element_cache,  int cache_start, 
	        int start, int end, int mask)
{ 
  int bits_saved_per_element;
  register int i, j;
  int bits;
  short abs_element;
  int max_bits;
  int real_end;
  int ind;
  
  max_bits = bits_for_element;
  
  if(end > start+16)
    {
      real_end = start+16;
    }
  else
    {
      real_end = end;
    }
  
  for(i=start+1;i<real_end;i++)
    {
      ind = i-cache_start;
      if(bits_for_element_cache[ind] < 0)
	{
	  abs_element = buffer[i & mask];
	  if(abs_element < 0) abs_element *= -1;
	  
	  bits_for_element_cache[ind] = find_bits_needed(abs_element);
	}
      bits = bits_for_element_cache[ind];
      
      if(bits > max_bits)
	{ max_bits = bits;
	}
      
      bits_saved_per_element = (bits_for_chunk - max_bits);
      
      if(bits_saved_per_element <= 0) break; /*no hope for saving any bits*/
      
      if((bits_saved_per_element * (i-start)) > 16) /*if the total bits saved is more than header*/
	{ /*check if we are going to reduce bits in the near future 
	    (before the current savings is realized)*/
	  for(j=start+1;j<=i;j++)
	    {
	      ind = j-cache_start;
	      if(bits_for_element_cache[ind] < 0)
		{
		  abs_element = buffer[j & mask];
		  if(abs_element < 0) abs_element *= -1;
		  
		  bits_for_element_cache[ind] = find_bits_needed(abs_element);
		}
	      bits = bits_for_element_cache[ind];
	      
	      if(better_to_chop_here_and_use_fewer_bits_for_future(bits, max_bits,
								   buffer, bits_for_element_cache, 
								   cache_start, 
								   j, end, mask))
		{ /*going to chop in the future anyway, so don't chop here*/
		  return 0;
		}
	    }
	  return 1;
	}
    }
  
  return 0;
}


/*
 * write_chunk_to_file(): (compression) writes "shortpacked" data to a file.
 */

static int pack_and_write_chunk_to_file (short *buffer, int start, 
					 int num, int bits, FILE *fp,
					 int mask)
{
  int i;
  char *char_ptr;
  char temp_char;
  char temp_char2;
  short compressed_data[MAX_SEQUENCE_SIZE];
  int els;
  int   samples_write, samples_need;
  short test_short = 1;
  
  els = pack_short_array_into_buffer(buffer, start, num, bits, mask, 
				     compressed_data);
  
  if(fputc((unsigned char) num, fp) == EOF)
    {
      if( errno < sys_nerr ) {
	ErrorString = sys_errlist[errno];
      } else {
	ErrorString = "fputc failed";
      }
      return -1;
    }

  if(fputc((unsigned char) bits, fp) == EOF)
    {
      if( errno < sys_nerr ) {
	ErrorString = sys_errlist[errno];
      } else {
	ErrorString = "fputc failed";
      }

      return -1;
    }
  
  if(ieee_order == -1) /*set this if not set already*/
    ieee_order = (htons(test_short) == test_short);
  
  /* if the machine is NOT IEEE order, swap bytes of data before writing.*/
  if (!ieee_order)
    {
      char_ptr = (char*)compressed_data;
      for(i=0;i<els;i++)
	{
	  temp_char = *char_ptr;
	  temp_char2 = *(char_ptr+1);
	  *(char_ptr++) = temp_char2;
	  *(char_ptr++) = temp_char;
	}
    }
  /* write the "shortpacked" data to file */
  samples_need = 0;

  while (samples_need < els) {
    samples_write = fwrite(compressed_data + samples_need, sizeof(short), 
			   els-samples_need, fp);
    samples_need += samples_write;
  }

  if(samples_need != els)
    {
      if( errno < sys_nerr ) {
	ErrorString = sys_errlist[errno];
      } else {
	ErrorString = "fwrite failed";
      }

      return -1;
    }
  return 1;
}


/*
 * return number of shorts unpacked
 * -1 on error
 */

static int read_shortpack_chunk_from_file (FILE *fp, short *buffer, int start,
					  int max_elements, int mask)
{
  int i;
  int num_read, samples_read;
  int num, bits_used, els;
  char *char_ptr;
  char temp_char;
  char temp_char2;
  int num_unpacked;
  short compressed_data[MAX_SEQUENCE_SIZE];
  short test_short = 1;
  
  num = fgetc(fp);
  bits_used = fgetc(fp);
  
  if (num < 0 || bits_used < 0)
    return 0;
  
  els = (num * (bits_used + 1)) / 16.0;
  if ((num * (bits_used + 1)) % 16 != 0)
    els++;

  num_read = 0;
  while (num_read<els) {
    samples_read = fread(compressed_data + num_read, sizeof(short), 
			 els-num_read, fp);
    num_read += samples_read;
  }

  if(num_read != els)
    {
      if( errno < sys_nerr ) {
	ErrorString = sys_errlist[errno];
      } else {
	ErrorString = "fread failed";
      }
      return -1;
    }
  if(ieee_order == -1) /* set this if not set already */
    ieee_order = (htons(test_short) == test_short);
  
  /*
   * If this machinee does not follow IEEE byte swapping,
   * swap the input bytes here
   */
  if (!ieee_order)
    {
      char_ptr = (char*)compressed_data;
      for(i=0;i<els;i++)
	{
	  temp_char = *char_ptr;
	  temp_char2 = *(char_ptr+1);
	  *(char_ptr++) = temp_char2;
	  *(char_ptr++) = temp_char;
	}
    }
  num_unpacked = unpack_short_array_into_buffer(buffer, start, max_elements,
						bits_used, mask, 
						compressed_data, num);
  
  return num_unpacked;
}


static int pack_short_array_into_buffer(short *buffer, int start, 
					int num, int bits, int mask, 
					short *compressed_data)
{
  int i,j,k;
  int bit_mark;
  int buffer_ind;
  int els;
  int word;
  
  els = (num * (bits + 1)) / 16.0;
  if ((num * (bits + 1)) % 16 != 0)
    els++;
  for(j=0;j<els;j++) compressed_data[j] = 0;
  
  j = 0;
  bit_mark = 0;
  
  for (i = 0; i < num; i++)
    {
      buffer_ind = (start+i) & mask;
      /* set the sign here */
      bit_mark++;
      if (buffer[buffer_ind] < 0)
	{
	  compressed_data[j] |= log2s[16 - bit_mark];
	  word = -(buffer[buffer_ind]);
	}
      else
	word = buffer[buffer_ind];
      if (bit_mark == 16)
	{
	  bit_mark = 0;
	  j++;
	}
      for (k = bits - 1; k >= 0; k--)
	{
	  bit_mark++;
	  if ((word & log2s[k]) != 0)
	    compressed_data[j] |= log2s[16 - bit_mark];
	  if (bit_mark == 16)
	    {
	      bit_mark = 0;
	      j++;
	    }
	}
    }
  if(bit_mark == 0)return j;
  else return (j+1);
}


static int  unpack_short_array_into_buffer(short *buffer, int start, 
		   int max_elements,  int bits, int mask, 
		   short *compressed_data, int num)
{
  int i, k;
  char negative;
  register short *log2s_ptr;
  register short *log2s_stop_ptr;
  register short temp_out;
  register short *logs2_kptr;
  short *log2s_start_ptr;
  int buffer_ind;
  register short temp_short;
  short *buf_ptr;
  
  buf_ptr = compressed_data;
  
  temp_short = *buf_ptr++;
  log2s_ptr = log2s_start_ptr = &(log2s[15]);
  log2s_stop_ptr = log2s;
  
  for (i = 0; i < num; i++)
    {
      if((start + i) >= max_elements)
	{
	  ErrorString = "unpack_short_array_into_buffer:HEY! something seems wrong - ran out of space in buffer!! (just truncating)\n";
	  return -1;
	}
      buffer_ind = (start+i) & mask;
      temp_out = 0;
      
      negative = ((temp_short & *(log2s_ptr--)) != 0);
      if (log2s_ptr < log2s_stop_ptr)
	{
	  log2s_ptr = log2s_start_ptr;
	  temp_short = *(buf_ptr++);
	}
      logs2_kptr = &(log2s[bits - 1]);
      
      for (k = bits + 1; (--k) > 0;)
	{
	  if ((temp_short & *(log2s_ptr--)) != 0)
	    temp_out |= *logs2_kptr;
	  logs2_kptr--;
	  
	  if (log2s_ptr < log2s_stop_ptr)
	    {
	      log2s_ptr = log2s_start_ptr;
	      temp_short = *(buf_ptr++);
	    }
	}
      
      if (negative)
	if (temp_out != 0)
	  buffer[buffer_ind] = -temp_out;
	else
	  buffer[buffer_ind] = 32768;
      else
	buffer[buffer_ind] = temp_out;
      
    }
  return num;
}





