/* 
           merge.c -- mix linear 16-bit sound files

   merge sound files -- this is equivalent to a very simple add-sound call 
   the assumption here is that we're being called from open-input and can  
   take advantage of the fact that the two files are the compatible and no 
   processing at all is going on.  We get the main file, the start point   
   in that file, the number of samples to merge, and the merged-in file.   
   This all takes place when all other files are assumed to be closed.     
   On the 68040 it can read/merge/write about .5 million samples per second

   this code called from fasmix in sound.lisp, tied into clm in next-io.lisp,
   but there's nothing here that depends on lisp or clm, so it could easily
   be turned into a stand-alone C program.

   (ff:defforeign 'c-mix-compatible-sounds :entry-point "_mix_compatible_sounds" 
                               :arguments '(string fixnum string fixnum fixnum fixnum fixnum array) 
			       :return-type :integer) 

   (ff:defforeign 'c-mix-mono-to-stereo-sounds :entry-point "_mix_mono_to_stereo_sounds" 
                               :arguments '(string fixnum string fixnum fixnum fixnum fixnum fixnum fixnum array) 
			       :return-type :integer) 

   (ff:defforeign 'c-mix-stereo-to-mono-sounds :entry-point "_mix_stereo_to_mono_sounds" 
                               :arguments '(string fixnum string fixnum fixnum fixnum fixnum fixnum fixnum fixnum array) 
			       :return-type :integer) 

   (ff:defforeign 'c-mix-stereo-to-stereo-sounds :entry-point "_mix_stereo_to_stereo_sounds" 
                               :arguments '(string fixnum string fixnum fixnum fixnum fixnum fixnum fixnum) 
			       :return-type :integer) 

   (ff:defforeign 'c-mix-quad-to-quad-sounds :entry-point "_mix_quad_to_quad_sounds" 
                               :arguments '(string fixnum string fixnum fixnum fixnum fixnum fixnum fixnum fixnum) 
			       :return-type :integer) 

   the final array is an array of integers, grouped by twos as pass-value (32 bit integer) | rate (24 bit fraction)

   c-mix-compatible-sounds takes the names (strings) of the output and input files, assumed to
      be "compatible" (that is, same sampling rate, same number of channels, same type),
      and mixes the input into the output with an optional scale factor, and an optional envelope.

   c-mix-mono-to-stereo-sounds treats the same cases, but assumes the input is 1 channel and
      the output is stereo.  The input can be scaled/enveloped, and can be directed to either
      stereo channel, or both channels.

   c-mix-stereo-to-mono-sounds is the obvious reverse case, mixing stereo input to mono output.  Here
      either or both channels can be included, scaled, enveloped, etc.

   c-mix-stereo-to-stereo-sounds handles the one special case of scaling by four
      factors (this provides channel swapping and so on).

   More complicated cases can be handled with clm instruments like add-sound.

*/


#include <stdio.h>
#include <fcntl.h>

#ifndef AKCL

/* from io.c*/
extern int clm_read(int fd, char *buf, int n);
extern int clm_write(int fd, char *buf, int n);
extern short clm_short(short n);
extern int clm_int(int n);
extern long clm_seek(int fd, long offset, int origin);
extern void open_clm_file_descriptors (int tfd, int df, int ds, int dl);
extern void clm_close(int tfd);
extern int clm_reopen_write(char *arg);
extern int clm_open_read(char *arg); 

/* from headers.c */
extern int c_read_header_with_fd (int chan);
extern int c_snd_header_data_size (void);
extern int c_snd_header_datum_size (void);
extern int c_snd_header_data_location (void);
extern int c_snd_header_chans (void);
extern int c_snd_header_type (void);
extern int c_snd_header_format (void);
extern void c_update_header_with_fd(int chan, int type, int siz);

#endif

void merge_read_header(int fd)
{ 
  c_read_header_with_fd(fd);
  open_clm_file_descriptors(fd,c_snd_header_format(),c_snd_header_datum_size(),c_snd_header_data_location());
}


#define SHIFT 10
#define ENV_SHIFT 14
/*
   since we're trying to use short integer arithmetic in some places here, there
   are 16 bits in our scalers (worst case).  The SHIFT amount here sets an upper
   limit on the integer part of that scale factor -- i.e. SHIFT=15 means all scalers
   have to be between -1..1.  merge.c and sound.lisp (fasmix) have to be in sync
   on this -- also envelopes have an addition ENV_SHIFT bit shift for the post-scaling
   fractional part of the ramp.
*/

#define run_env  scaler += rate;                \
                 curenvtime += 1;               \
	         if (curenvtime > pass)         \
	           {                            \
	            rate = env[curenvloc+1];    \
	            curenvloc += 2;             \
	            pass = env[curenvloc];      \
	           }

#ifndef MAC
#define MERGEBUFLIM 64*1024
#define MONOBUFLIM 32*1024
static char *mergebuf,*mainbuf;
static int mergebuf_ok = -1;

check_mergebuf(void)
{
  if (mergebuf_ok == -1)
    {
      mergebuf = (char *)calloc((MERGEBUFLIM),sizeof(char));
      mainbuf = (char *)calloc((MERGEBUFLIM),sizeof(char));
    }
  mergebuf_ok = 0;
}

#else

#define MERGEBUFLIM 1024
#define MONOBUFLIM 512
static char mergebuf[MERGEBUFLIM];
static char mainbuf[MERGEBUFLIM];
check_mergebuf(void)
{
}
#endif

int fasmix_buffer_size (void)
{
  return(MERGEBUFLIM);
}


/*    this not yet implemented

   more than half the actual time spent mixing is going into file IO -- to reduce
   this, the caller has an added parameter (called IO_op here, safe in lisp) that
   has the following values -- 
      
       0=unsafe (all IO ops performed)
       1=no output read
       2=no output write
       3=no output read or write
   
       4 on if no input read (so 5..7 are no read+output decision from lower 2 bits)

   The point of all this is that the in-core buffers (mainbuf and mergebuf) are large
   enough that it might often be the case that we already have the needed data in-core.
   Of course, on the Mac there's no chance we'll ever get a hit.
*/


/* Step 1. mix mono to mono or stereo to stereo */

int mix_compatible_sounds(char *mainfile, int main_sample, char *mergefile, 
			  int merge_sample, int samples, 
			  int scaler, int use_env, int *env)
{

  int mainfd,mergefd,main_loc,merge_loc,rate,pass,curenvtime,curenvloc,file_type;
  char *i,*j;
  short *ii,*jj;
  int k,bytes,rtn_main,rtn_merge,lim,n,bufnum,left,curloc,curbytes,bytes_merged;
  short scl;
  int maindataloc,mergedataloc,chans,maindatasize,mainloc,mergeloc,samples_merged,newdatasize;

  /* open the two files, read the header info (assumed to be same srate and channels) */

  if ((mainfd=clm_reopen_write(mainfile)) == -1) return(-1);
  if ((mergefd=clm_open_read(mergefile)) == -1) {close(mainfd); return(-2);}

  check_mergebuf();

  merge_read_header(mainfd);
  maindataloc = c_snd_header_data_location();
  chans = c_snd_header_chans();
  maindatasize = c_snd_header_data_size();
  mainloc = maindataloc+(chans*2*main_sample);
  file_type = c_snd_header_type();

  merge_read_header(mergefd);
  mergedataloc = c_snd_header_data_location();
  mergeloc=mergedataloc+(chans*2*merge_sample);
  if (samples == -1) 
    {
      samples = c_snd_header_data_size();
    }
  else samples = (samples*chans);

  main_loc=clm_seek(mainfd,mainloc,0);

  if (main_loc<mainloc)   /* output not long enough, so add zeros until main_byte_location */
    {
      main_loc = mainloc-main_loc;
      for (n=0,i=mainbuf;n<main_loc;n++,i++)
	{
	  *i=0;
	}
      clm_write(mainfd,mainbuf,main_loc);
    }

  merge_loc=clm_seek(mergefd,mergeloc,0);
  if (merge_loc<mergeloc) return 0;
  
  /* now loop through both files, reading a bufferfull, merging and writing it back out */

  bufnum = (MERGEBUFLIM);
  lim=samples*2;
  left=lim;
  curloc=mainloc;
  bytes_merged = 0;

  if (use_env != 0)
    {
      curenvtime = 0;
      curenvloc = 2;
      rate = env[1];
      pass = env[2];
    }
  else scl = (short)scaler;

  for (n=0;n<lim;n+=bufnum)
    {
      if (left<bufnum)
	curbytes=left;
      else curbytes=bufnum;
      
      rtn_merge=clm_read(mergefd,mergebuf,curbytes);
      rtn_main=clm_read(mainfd,mainbuf,curbytes);

      if (rtn_merge<rtn_main)
	bytes=rtn_merge;
      else bytes=rtn_main;

      if (use_env == 0)
	{
	  if (scaler == 0)
	    {
	      for (k=0,ii=(short *)mainbuf,jj=(short *)mergebuf;k<bytes;k+=2,ii++,jj++) 
		{
		  (*ii)+=(*jj);
		}
	      if (rtn_main<rtn_merge)
		{
		  for (i=mainbuf+bytes,j=mergebuf+bytes,k=bytes;k<rtn_merge;k++,j++,i++)
		    *i = *j;
		}
	    }
	  else
	    {
	      for (ii=(short *)mainbuf,jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii++,jj++)
		{
		  (*ii)+=(short) (((*jj)*scl)>>SHIFT);
		}
	      if (rtn_main<rtn_merge)
		{
		  for (ii=(short *)(mainbuf+bytes),jj=(short *)(mergebuf+bytes),k=bytes;k<rtn_merge;k+=2,jj++,ii++)
		    {
		      (*ii)=(short) (((*jj)*scl)>>SHIFT);
		    }
		}
	    }
	}
      else
	{
	  for (ii=(short *)mainbuf,jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii++,jj++)
	    {
	      (*ii)+=(short) ((((int)(*jj))*(scaler>>ENV_SHIFT)) >> SHIFT);
	      /* the outer >> SHIFT gives us a short result with sign and so on, the inner >> ENV_SHIFT keeps the */
	      /* multiply from overflowing while giving us effectively ENV_SHIFT bits of fraction -- this is   */
	      /* really needed in long envelopes, since 15 bits is not even a second.                  */
	      run_env;
	    }
	  if (rtn_main<rtn_merge)
	    {
	      for (ii=(short *)(mainbuf+bytes),jj=(short *)(mergebuf+bytes),k=bytes;k<rtn_merge;k+=2,ii++,jj++)
		{
		  (*ii)=(short) ((((int)(*jj))*(scaler>>ENV_SHIFT)) >> SHIFT);
		  run_env;
		}
	    }
	}

      bytes_merged += rtn_merge;
      clm_seek(mainfd,curloc,0);
      clm_write(mainfd,mainbuf,rtn_merge);
      curloc += rtn_merge;
      left -= bufnum;
    }
  samples_merged=(bytes_merged>>1);
  
  newdatasize = (2*(samples_merged+(chans*main_sample)));
  if (newdatasize > maindatasize)
    c_update_header_with_fd(mainfd,file_type,newdatasize);
  clm_close(mainfd);
  clm_close(mergefd);
  return (samples_merged>>(chans-1));
}





/* 
    step 2.  mix mono->stereo

    mono->stereo chan A|B|AB
    mono * scl -> stereo chan A|B|AB
    mono * env -> stereo chan A|B|AB

    anything more complicated can use a clm instrument like add-sound.
*/


int mix_mono_to_stereo_sounds(char *mainfile, int main_sample, char *mergefile, 
			      int merge_sample, int samples,
			      int scalerA, int scalerB, int chan, int use_env, int* env)
{
  int mainfd,mergefd,main_loc,merge_loc,rate,pass,curenvtime,curenvloc,scaler,otherchan,file_type;
  char *i;
  short *ii,*jj,*kk;
  int k,bytes,rtn_main,rtn_merge,lim,n,mono_bufnum,stereo_bufnum,left,curloc,curbytes,bytes_merged;
  short scl,sclA,sclB,temp;
  int maindataloc,mergedataloc,maindatasize,mainloc,mergeloc,samples_merged,newdatasize;

  /* open the two files, read the header info (assumed to be same srate) */
  if ((mainfd=clm_reopen_write(mainfile)) == -1) return(-1);
  if ((mergefd=clm_open_read(mergefile)) == -1) {close(mainfd); return(-2);}

  check_mergebuf();
  merge_read_header(mainfd);
  maindataloc = c_snd_header_data_location();
  maindatasize = c_snd_header_data_size();
  mainloc = maindataloc+(2*2*main_sample);
  file_type = c_snd_header_type();

  merge_read_header(mergefd);
  mergedataloc = c_snd_header_data_location();
  mergeloc=mergedataloc+(2*merge_sample);
  if (samples == -1) 
    samples = c_snd_header_data_size();

  main_loc=clm_seek(mainfd,mainloc,0);

  if (main_loc<mainloc)   /* output not long enough, so add zeros until main_byte_location */
    {
      main_loc = mainloc-main_loc;
      for (n=0,i=mainbuf;n<main_loc;n++,i++)
	{
	  *i=0;
	}
      clm_write(mainfd,mainbuf,main_loc);
    }

  merge_loc=clm_seek(mergefd,mergeloc,0);
  if (merge_loc<mergeloc) return 0;
  stereo_bufnum = (MERGEBUFLIM);
  mono_bufnum = (MONOBUFLIM);
  lim=samples*2;
  left=lim;
  curloc=mainloc;
  bytes_merged = 0;
  if (chan == 0)
    {
      scl = (short)scalerA;
      scaler = scalerA;
      otherchan = 1;
    }
  else
    if (chan == 1)
      {
	scl = (short)scalerB;
	scaler = scalerB;
	otherchan = 0;
      }
  else
    {
      sclA = (short)scalerA;
      sclB = (short)scalerB;
      scaler = scalerA;
    }

  if (use_env != 0)
    {
      curenvtime = 0;
      curenvloc = 2;
      rate = env[1];
      pass = env[2];
    }


  for (n=0;n<lim;n+=mono_bufnum)
    {
      if (left<mono_bufnum)
	curbytes=left;
      else curbytes=mono_bufnum;
      
      rtn_merge=clm_read(mergefd,mergebuf,curbytes);
      rtn_main=clm_read(mainfd,mainbuf,(curbytes*2));

      if ((rtn_merge*2)<rtn_main)
	bytes=rtn_merge;
      else bytes=(rtn_main>>1);

      if (use_env == 0)
	{
	  if (chan != -1)
	    {
	      if (scl == 0)
		{                  /* mono -> A|B */
		  for (k=0,ii=(short *)(mainbuf+(chan*2)),jj=(short *)mergebuf;k<bytes;k+=2,ii+=2,jj++)
		    {
		      (*ii)+=(*jj);
		    }
		  if (rtn_main<(rtn_merge*2))
		    {
		      for (ii=(short *)(mainbuf+(bytes*2)+(chan*2)),
			   kk=(short *)(mainbuf+(bytes*2)+(otherchan*2)),
			   jj=(short *)(mergebuf+bytes),
			   k=bytes;
			   k<rtn_merge;
			   k+=2,jj++,ii+=2,kk+=2)
			{
			  *ii = *jj;
			  *kk = 0;        /* needed because previous call might have left garbage in mainbuf */
			}
		    }
		}
	      else
		{                  /* mono * scl -> A|B */
		  for (ii=(short *)(mainbuf+(chan*2)),jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii+=2,jj++)
		    {
		      (*ii)+=(short)(((*jj)*scl)>>SHIFT);
		    }
		  if (rtn_main<(rtn_merge*2))
		    {
		      for (ii=(short *)(mainbuf+(bytes*2)+(chan*2)),
			   kk=(short *)(mainbuf+(bytes*2)+(otherchan*2)),
			   jj=(short *)(mergebuf+bytes),
			   k=bytes;
			   k<rtn_merge;
			   k+=2,jj++,ii+=2,kk+=2)
			{
			  (*ii)=(short)(((*jj)*scl)>>SHIFT);
			  *kk = 0;
			}
		    }
		}
	    }
	  else
	    {                      /* mono * sclA -> A and mono * sclB -> B */
	      for (ii=(short *)mainbuf,kk=(short *)(mainbuf+2),jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii+=2,kk+=2,jj++)
		{
		  (*ii)+=(short)(((*jj)*sclA)>>SHIFT);
		  (*kk)+=(short)(((*jj)*sclB)>>SHIFT);
		}
	      if (rtn_main<(rtn_merge*2))
		{
		  for (ii=(short *)(mainbuf+(bytes*2)),
		       kk=(short *)(mainbuf+(bytes*2)+2),
		       jj=(short *)(mergebuf+bytes),
		       k=bytes;
		       k<rtn_merge;
		       k+=2,jj++,ii+=2,kk+=2)
		    {
		      (*ii)=(short)(((*jj)*sclA)>>SHIFT);
		      (*kk)=(short)(((*jj)*sclB)>>SHIFT);
		    }
		}
	    }
	}
      else
	{                          /* same cases, but now the scalers include the envelope value */
	  if (chan != -1)
	    {                      /* mono * env -> A|B */
	      for (ii=(short *)(mainbuf+(chan*2)),jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii+=2,jj++)
		{
		  (*ii)+=(short) ((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		  run_env;
		}
	      if (rtn_main<(rtn_merge*2))
		{
		  for (ii=(short *)(mainbuf+(bytes*2)+(chan*2)),
		       kk=(short *)(mainbuf+(bytes*2)+(otherchan*2)),
		       jj=(short *)(mergebuf+bytes),
		       k=bytes;
		       k<rtn_merge;
		       k+=2,jj++,ii+=2,kk+=2)
		    {
		      (*ii)=(short) ((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		      *kk = 0;
		      run_env;
		    }
		}
	    }
	  else
	    {                      /* mono * env -> A and mono * env * sclB -> B */
	                           /* assume sclA subsumed into env and sclB is inverse thereof (i.e. sclB/sclA from caller) */
	      for (ii=(short *)mainbuf,kk=(short *)(mainbuf+2),jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii+=2,kk+=2,jj++)
		{
		  temp=(short)((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		  (*ii)+=temp;
		  (*kk)+=(short)((temp*sclB)>>SHIFT);
		  run_env;
		}
	      if (rtn_main<(rtn_merge*2))
		{
		  for (ii=(short *)(mainbuf+(bytes*2)),
		       kk=(short *)(mainbuf+(bytes*2)+2),
		       jj=(short *)(mergebuf+bytes),
		       k=bytes;
		       k<rtn_merge;
		       k+=2,jj++,ii+=2,kk+=2)
		    {
		      temp=(short)((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		      (*ii)=temp;
		      (*kk)=(short)((temp*sclB)>>SHIFT);
		      run_env;
		    }
		}
	    }
	}
      bytes_merged += rtn_merge;
      clm_seek(mainfd,curloc,0);
      clm_write(mainfd,mainbuf,(rtn_merge*2));
      curloc += (rtn_merge*2);
      left -= mono_bufnum;
    }

  samples_merged=bytes_merged;
  
  newdatasize = 2*2*(samples+main_sample);
  if (newdatasize > maindatasize)
    c_update_header_with_fd(mainfd,file_type,newdatasize);
  clm_close(mainfd);
  clm_close(mergefd);
  return (samples_merged>>1);
}



/* 
    step 3.  stereo to mono:

    stereo A|B|AB -> mono
    stereo A|B|AB * scl -> mono
    stereo A|B|AB * env -> mono
    stereo (A * sclA + B * sclB) -> mono

*/

int mix_stereo_to_mono_sounds(char *mainfile, int main_sample, char *mergefile, 
			      int merge_sample, int samples,
			      int scalerA, int scalerB, int chan, int use_env, int initial_value, int* env)
{
  /* essentially the same as mono-to-stereo, but all pointers/counters are swapped */
  int mainfd,mergefd,main_loc,merge_loc,rate,pass,curenvtime,curenvloc,scaler,file_type;
  char *i;
  short *ii,*jj,*kk;
  int k,bytes,rtn_main,rtn_merge,lim,n,mono_bufnum,stereo_bufnum,left,curloc,curbytes,bytes_merged;
  short scl,sclA,sclB;
  int maindataloc,mergedataloc,maindatasize,mainloc,mergeloc,samples_merged,newdatasize;

  /* open the two files, read the header info (assumed to be same srate) */
  if ((mainfd=clm_reopen_write(mainfile)) == -1) return(-1);
  if ((mergefd=clm_open_read(mergefile)) == -1) {close(mainfd); return(-2);}

  check_mergebuf();

  merge_read_header(mainfd);
  maindataloc = c_snd_header_data_location();
  maindatasize = c_snd_header_data_size();
  mainloc = maindataloc+(2*main_sample);
  file_type = c_snd_header_type();

  merge_read_header(mergefd);
  mergedataloc = c_snd_header_data_location();
  mergeloc=mergedataloc+(2*2*merge_sample);
  if (samples == -1) 
      samples = (c_snd_header_data_size() >> 1);

  main_loc=clm_seek(mainfd,mainloc,0);

  if (main_loc<mainloc)   /* output not long enough, so add zeros until main_byte_location */
    {
      main_loc = mainloc-main_loc;
      for (n=0,i=mainbuf;n<main_loc;n++,i++)
	{
	  *i=0;
	}
      clm_write(mainfd,mainbuf,main_loc);
    }

  merge_loc=clm_seek(mergefd,mergeloc,0);
  if (merge_loc<mergeloc) return 0;
  stereo_bufnum = (MERGEBUFLIM);
  mono_bufnum = (MONOBUFLIM);
  lim=samples*2;
  left=lim;
  curloc=mainloc;
  bytes_merged = 0;
  scaler = initial_value;
  if (chan == 0)
    {
      scl = (short)scalerA;
    }
  else
    if (chan == 1)
      {
	scl = (short)scalerB;
      }
  else
    {
      sclA = (short)scalerA;
      if (sclA == 0) sclA = ((1<<SHIFT)-1);
      sclB = (short)scalerB;
      if (sclB == 0) sclB = ((1<<SHIFT)-1);
    }

  if (use_env != 0)
    {
      curenvtime = 0;
      curenvloc = 2;
      rate = env[1];
      pass = env[2];
    }


  for (n=0;n<lim;n+=mono_bufnum)
    {
      if (left<mono_bufnum)
	curbytes=left;
      else curbytes=mono_bufnum;
      
      rtn_merge=clm_read(mergefd,mergebuf,(curbytes*2));
      rtn_main=clm_read(mainfd,mainbuf,curbytes);

      if (rtn_merge<(rtn_main*2))
	bytes=(rtn_merge>>1);
      else bytes=rtn_main;

      if (use_env == 0)
	{
	  if (chan != -1)
	    {
	      if (scl == 0)
		{                  /* A|B -> mono */
		  for (k=0,ii=(short *)mainbuf,jj=(short *)(mergebuf+(chan*2));k<bytes;k+=2,ii++,jj+=2)
		    {
		      (*ii)+=(*jj);
		    }
		  if (rtn_main<(rtn_merge>>1))
		    {
		      for (ii=(short *)(mainbuf+bytes),
			   jj=(short *)(mergebuf+(bytes*2)+(chan*2)),
			   k=bytes;
			   k<(rtn_merge>>1);
			   k+=2,jj+=2,ii++)
			{
			  *ii = *jj;
			}
		    }
		}
	      else
		{                  /* A|B * scl -> mono */
		  for (ii=(short *)mainbuf,jj=(short *)(mergebuf+(chan*2)),k=0;k<bytes;k+=2,ii++,jj+=2)
		    {
		      (*ii)+=(short)(((*jj)*scl)>>SHIFT);
		    }
		  if (rtn_main<(rtn_merge>>1))
		    {
		      for (ii=(short *)(mainbuf+bytes),
			   jj=(short *)(mergebuf+(bytes*2)+(chan*2)),
			   k=bytes;
			   k<(rtn_merge>>1);
			   k+=2,jj+=2,ii++)
			{
			  (*ii)=(short)(((*jj)*scl)>>SHIFT);
			}
		    }
		}
	    }
	  else
	    {                      /*  (A * sclA) + (B * sclB) -> mono */
	      for (ii=(short *)mainbuf,kk=(short *)(mergebuf+2),jj=(short *)mergebuf,k=0;k<bytes;k+=2,ii++,kk+=2,jj+=2)
		{
		  (*ii)+=(short)((short)(((*jj)*sclA)>>SHIFT) + (short)(((*kk)*sclB)>>SHIFT));
		}
	      if (rtn_main<(rtn_merge>>1))
		{
		  for (ii=(short *)(mainbuf+bytes),
		       kk=(short *)(mergebuf+(bytes*2)+2),
		       jj=(short *)(mergebuf+(bytes*2)),
		       k=bytes;
		       k<(rtn_merge>>1);
		       k+=2,jj+=2,ii++,kk+=2)
		    {
		      (*ii)=(short)((short)(((*jj)*sclA)>>SHIFT) + (short)(((*kk)*sclB)>>SHIFT));
		    }
		}
	    }
	}
      else
	{                          /* same cases, but now the scalers include the envelope value */
	  if (chan != -1)
	    {                      /* A|B * env -> mono */
	      for (ii=(short *)mainbuf,jj=(short *)(mergebuf+(chan*2)),k=0;k<bytes;k+=2,ii++,jj+=2)
		{
		  (*ii)+=(short)((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		  run_env;
		}
	      if (rtn_main<(rtn_merge>>1))
		{
		  for (ii=(short *)(mainbuf+bytes),
		       jj=(short *)(mergebuf+(bytes*2)+(chan*2)),
		       k=bytes;
		       k<(rtn_merge>>1);
		       k+=2,jj+=2,ii++)
		    {
		      (*ii)=(short)((((int)(*jj))*(scaler>>ENV_SHIFT))>>SHIFT);
		      run_env;
		    }
		}
	    }
	  else
	    {                      /* (A * sclA + B * sclB) * env -> mono */
	      for (ii=(short *)mainbuf,
		   kk=(short *)(mergebuf+2),
		   jj=(short *)mergebuf,
		   k=0;
		   k<bytes;
		   k+=2,ii++,kk+=2,jj+=2)
		{
		  (*ii)+=(short)((((int)(((*jj)*sclA)+((*kk)*sclB)))*(scaler>>ENV_SHIFT))>>SHIFT);
		  run_env;
		}
	      if (rtn_main<(rtn_merge>>1))
		{
		  for (ii=(short *)(mainbuf+bytes),
		       kk=(short *)(mergebuf+(bytes*2)+2),
		       jj=(short *)(mergebuf+(bytes*2)),
		       k=bytes;
		       k<(rtn_merge>>1);
		       k+=2,jj+=2,ii++,kk+=2)
		    {
		      (*ii)=(short)((((int)(((*jj)*sclA)+((*kk)*sclB)))*(scaler>>ENV_SHIFT))>>SHIFT);
		      run_env;
		    }
		}
	    }
	}
      bytes_merged += (rtn_merge>>1);
      clm_seek(mainfd,curloc,0);
      clm_write(mainfd,mainbuf,(rtn_merge>>1));
      curloc += (rtn_merge>>1);
      left -= mono_bufnum;
    }

  samples_merged=bytes_merged;
  
  newdatasize = 2*(samples+main_sample);
  if (newdatasize > maindatasize)
    c_update_header_with_fd(mainfd,file_type,newdatasize);
  clm_close(mainfd);
  clm_close(mergefd);
  return (samples_merged>>1);
}



/*

    step 4. stereo to stereo with four scale factors.

    stereo A * sclAA -> A
           A * sclAB -> B
           B * sclBA -> A
           B * sclBB -> B

*/

int mix_stereo_to_stereo_sounds(char *mainfile, int main_sample, char *mergefile, 
				int merge_sample, int samples, 
				int scalerAA, int scalerBB, int scalerAB, int scalerBA)
{

  int mainfd,mergefd,main_loc,merge_loc,file_type;
  char *i;
  short *ii,*jj,*kk,*mm;
  int k,bytes,rtn_main,rtn_merge,lim,n,bufnum,left,curloc,curbytes,bytes_merged;
  short sclAA,sclAB,sclBB,sclBA;
  int maindataloc,mergedataloc,maindatasize,mainloc,mergeloc,samples_merged,newdatasize;

  if ((mainfd=clm_reopen_write(mainfile)) == -1) return(-1);
  if ((mergefd=clm_open_read(mergefile)) == -1) {close(mainfd); return(-2);}

  check_mergebuf();
  merge_read_header(mainfd);
  maindataloc = c_snd_header_data_location();
  maindatasize = c_snd_header_data_size();
  mainloc = maindataloc+(2*2*main_sample);
  file_type = c_snd_header_type();

  merge_read_header(mergefd);
  mergedataloc = c_snd_header_data_location();
  mergeloc=mergedataloc+(2*2*merge_sample);
  if (samples == -1) 
    {
      samples = c_snd_header_data_size();
    }
  else samples = (samples*2);

  main_loc=clm_seek(mainfd,mainloc,0);

  if (main_loc<mainloc)   /* output not long enough, so add zeros until main_byte_location */
    {
      main_loc = mainloc-main_loc;
      for (n=0,i=mainbuf;n<main_loc;n++,i++)
	{
	  *i=0;
	}
      clm_write(mainfd,mainbuf,main_loc);
    }

  merge_loc=clm_seek(mergefd,mergeloc,0);
  if (merge_loc<mergeloc) return 0;
  
  /* now loop through both files, reading a bufferfull, merging and writing it back out */

  bufnum = (MERGEBUFLIM);
  lim=samples*2;
  left=lim;
  curloc=mainloc;
  bytes_merged = 0;

  sclAA = (short)scalerAA;
  sclAB = (short)scalerAB;
  sclBA = (short)scalerBA;
  sclBB = (short)scalerBB;

  for (n=0;n<lim;n+=bufnum)
    {
      if (left<bufnum)
	curbytes=left;
      else curbytes=bufnum;
      
      rtn_merge=clm_read(mergefd,mergebuf,curbytes);
      rtn_main=clm_read(mainfd,mainbuf,curbytes);

      if (rtn_merge<rtn_main)
	bytes=rtn_merge;
      else bytes=rtn_main;

      for (k=0,ii=(short *)mainbuf,jj=(short *)(mainbuf+2),kk=(short *)mergebuf,mm=(short *)(mergebuf+2);
	   k<bytes;
	   k+=4,ii+=2,jj+=2,kk+=2,mm+=2) 
	{
	  (*ii)+=(short) ((((*kk)*sclAA)+((*mm)*sclBA))>>SHIFT);
	  (*jj)+=(short) ((((*kk)*sclAB)+((*mm)*sclBB))>>SHIFT);
	}
      if (rtn_main<rtn_merge)
	{
	  for (ii=(short *)(mainbuf+bytes),jj=(short *)(mainbuf+bytes+2),kk=(short *)(mergebuf+bytes),mm=(short *)(mergebuf+bytes+2),k=bytes;
	       k<rtn_merge;
	       k+=4,jj+=2,ii+=2,kk+=2,mm+=2)
	    {
	      (*ii)=(short) ((((*kk)*sclAA)+((*mm)*sclBA))>>SHIFT);
	      (*jj)=(short) ((((*kk)*sclAB)+((*mm)*sclBB))>>SHIFT);
	    }
	}

      bytes_merged += rtn_merge;
      clm_seek(mainfd,curloc,0);
      clm_write(mainfd,mainbuf,rtn_merge);
      curloc += rtn_merge;
      left -= bufnum;
    }
  samples_merged=(bytes_merged>>1);
  
  newdatasize = (2*(samples_merged+(2*main_sample)));
  if (newdatasize > maindatasize)
    c_update_header_with_fd(mainfd,file_type,newdatasize);
  clm_close(mainfd);
  clm_close(mergefd);
  return (samples_merged>>1);
}


/*

    step 5. quad to quad with optional scale factors

*/

int mix_quad_to_quad_sounds(char *mainfile, int main_sample, char *mergefile, 
			    int merge_sample, int samples, 
			    int scalerA, int scalerB, int scalerC, int scalerD, int straight_mix, int initial_value, int *env)
{

  int mainfd,mergefd,main_loc,merge_loc,rate,pass,curenvtime,curenvloc,scaler,temp,file_type;
  char *i;
  short *ii,*jj,*kk,*mm,*ii0,*jj0,*kk0,*mm0;
  int k,bytes,rtn_main,rtn_merge,lim,n,bufnum,left,curloc,curbytes,bytes_merged;
  short sclA,sclB,sclC,sclD;
  int maindataloc,mergedataloc,maindatasize,mainloc,mergeloc,samples_merged,newdatasize;

  if ((mainfd=clm_reopen_write(mainfile)) == -1) return(-1);
  if ((mergefd=clm_open_read(mergefile)) == -1) {close(mainfd); return(-2);}

  check_mergebuf();
  merge_read_header(mainfd);
  maindataloc = c_snd_header_data_location();
  maindatasize = c_snd_header_data_size();
  mainloc = maindataloc+(4*2*main_sample);
  file_type = c_snd_header_type();

  merge_read_header(mergefd);
  mergedataloc = c_snd_header_data_location();
  mergeloc=mergedataloc+(4*2*merge_sample);
  if (samples == -1) 
    {
      samples = c_snd_header_data_size();
    }
  else samples = (samples*4);

  main_loc=clm_seek(mainfd,mainloc,0);

  if (main_loc<mainloc)   /* output not long enough, so add zeros until main_byte_location */
    {
      main_loc = mainloc-main_loc;
      for (n=0,i=mainbuf;n<main_loc;n++,i++)
	{
	  *i=0;
	}
      clm_write(mainfd,mainbuf,main_loc);
    }

  merge_loc=clm_seek(mergefd,mergeloc,0);
  if (merge_loc<mergeloc) return 0;
  
  /* now loop through both files, reading a bufferfull, merging and writing it back out */

  bufnum = (MERGEBUFLIM);
  lim=samples*2;
  left=lim;
  curloc=mainloc;
  bytes_merged = 0;

  if (straight_mix > 1)
    {
      scaler = initial_value;
      curenvtime = 0;
      curenvloc = 2;
      rate = env[1];
      pass = env[2];
    }
  else
    {
      sclA = (short)scalerA;
      sclB = (short)scalerB;
      sclC = (short)scalerC;
      sclD = (short)scalerD;
    }

  for (n=0;n<lim;n+=bufnum)
    {
      if (left<bufnum)
	curbytes=left;
      else curbytes=bufnum;
      
      rtn_merge=clm_read(mergefd,mergebuf,curbytes);
      rtn_main=clm_read(mainfd,mainbuf,curbytes);

      if (rtn_merge<rtn_main)
	bytes=rtn_merge;
      else bytes=rtn_main;

      if (straight_mix == 0)
	{
	  for (k=0,ii=(short *)mainbuf,jj=(short *)(mainbuf+2),kk=(short *)(mainbuf+4),mm=(short *)(mainbuf+6),
	       ii0=(short *)mergebuf,jj0=(short *)(mergebuf+2),kk0=(short *)(mergebuf+4),mm0=(short *)(mergebuf+6);
	       k<bytes;
	       k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
	    {
	      (*ii)+=(short) (*ii0);
	      (*jj)+=(short) (*jj0);
	      (*kk)+=(short) (*kk0);
	      (*mm)+=(short) (*mm0);
	    }
	  if (rtn_main<rtn_merge)
	    {
	      for (ii=(short *)(mainbuf+bytes),jj=(short *)(mainbuf+bytes+2),
		   kk=(short *)(mainbuf+bytes+4),mm=(short *)(mainbuf+bytes+6),
		   ii0=(short *)(mergebuf+bytes),jj0=(short *)(mergebuf+bytes+2),
		   kk0=(short *)(mergebuf+bytes+4),mm0=(short *)(mergebuf+bytes+6),
		   k=bytes;
		   k<rtn_merge;
		   k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
		{
		  (*ii)=(short) (*ii0);
		  (*jj)=(short) (*jj0);
		  (*kk)=(short) (*kk0);
		  (*mm)=(short) (*mm0);
		}
	    }
	}
      else
	{
	  if (straight_mix == 1)
	    {
	      for (k=0,ii=(short *)mainbuf,jj=(short *)(mainbuf+2),kk=(short *)(mainbuf+4),mm=(short *)(mainbuf+6),
		   ii0=(short *)mergebuf,jj0=(short *)(mergebuf+2),kk0=(short *)(mergebuf+4),mm0=(short *)(mergebuf+6);
		   k<bytes;
		   k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
		{
		  (*ii)+=(short) (((*ii0)*sclA)>>SHIFT);
		  (*jj)+=(short) (((*jj0)*sclB)>>SHIFT);
		  (*kk)+=(short) (((*kk0)*sclC)>>SHIFT);
		  (*mm)+=(short) (((*mm0)*sclD)>>SHIFT);
		}
	      if (rtn_main<rtn_merge)
		{
		  for (ii=(short *)(mainbuf+bytes),jj=(short *)(mainbuf+bytes+2),
		       kk=(short *)(mainbuf+bytes+4),mm=(short *)(mainbuf+bytes+6),
		       ii0=(short *)(mergebuf+bytes),jj0=(short *)(mergebuf+bytes+2),
		       kk0=(short *)(mergebuf+bytes+4),mm0=(short *)(mergebuf+bytes+6),
		       k=bytes;
		       k<rtn_merge;
		       k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
		    {
		      (*ii)=(short) (((*ii0)*sclA)>>SHIFT);
		      (*jj)=(short) (((*jj0)*sclB)>>SHIFT);
		      (*kk)=(short) (((*kk0)*sclC)>>SHIFT);
		      (*mm)=(short) (((*mm0)*sclD)>>SHIFT);
		    }
		}
	    }
	  else  /* amp-env given */
	    {
	      for (k=0,ii=(short *)mainbuf,jj=(short *)(mainbuf+2),kk=(short *)(mainbuf+4),mm=(short *)(mainbuf+6),
		   ii0=(short *)mergebuf,jj0=(short *)(mergebuf+2),kk0=(short *)(mergebuf+4),mm0=(short *)(mergebuf+6);
		   k<bytes;
		   k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
		{
		  temp = (int) (scaler >> ENV_SHIFT);
		  (*ii)+=(short) ((((int)(*ii0))*temp) >> SHIFT);
		  (*jj)+=(short) ((((int)(*jj0))*temp) >> SHIFT);
		  (*kk)+=(short) ((((int)(*kk0))*temp) >> SHIFT);
		  (*mm)+=(short) ((((int)(*mm0))*temp) >> SHIFT);
		  run_env;
		}
	      if (rtn_main<rtn_merge)
		{
		  for (ii=(short *)(mainbuf+bytes),jj=(short *)(mainbuf+bytes+2),
		       kk=(short *)(mainbuf+bytes+4),mm=(short *)(mainbuf+bytes+6),
		       ii0=(short *)(mergebuf+bytes),jj0=(short *)(mergebuf+bytes+2),
		       kk0=(short *)(mergebuf+bytes+4),mm0=(short *)(mergebuf+bytes+6),
		       k=bytes;
		       k<rtn_merge;
		       k+=8,ii+=4,jj+=4,kk+=4,mm+=4,ii0+=4,jj0+=4,kk0+=4,mm0+=4)
		    {
		      temp = (int) (scaler >> ENV_SHIFT);
		      (*ii)=(short) ((((int)(*ii0))*temp) >> SHIFT);
		      (*jj)=(short) ((((int)(*jj0))*temp) >> SHIFT);
		      (*kk)=(short) ((((int)(*kk0))*temp) >> SHIFT);
		      (*mm)=(short) ((((int)(*mm0))*temp) >> SHIFT);
		      run_env;
		    }
		}
	    }
	}

      bytes_merged += rtn_merge;
      clm_seek(mainfd,curloc,0);
      clm_write(mainfd,mainbuf,rtn_merge);
      curloc += rtn_merge;
      left -= bufnum;
    }
  samples_merged=(bytes_merged>>1);
  
  newdatasize = (2*(samples_merged+(4*main_sample)));
  if (newdatasize > maindatasize)
    c_update_header_with_fd(mainfd,file_type,newdatasize);
  clm_close(mainfd);
  clm_close(mergefd);
  return (samples_merged>>2);
}
