/******************************************************************************
 FILE ALPHA.C :
   This file contains the routines for alphabetizing a list of strings
 via a fast radix-insertion sort mongrel algorithm.

 Copyright (c) 1988 by Brian L. Donnell
 ******************************************************************************/

/******************************************************************************
 ==============================================================================
                                  HEADER FILES
 ==============================================================================
 ******************************************************************************/
#include "common.h"    /* Standard header files and useful constants/macros  */

/******************************************************************************
 ==============================================================================
                                   CONSTANTS
 ==============================================================================
 ******************************************************************************/
#define MAX_INSERTION_SORT_SIZE 75  /* The max-number of strings in a list
                                       before Insertion-Sort becomes less
                                       effective than Radix-Sort (typically) */

#define SPEC_BUCKETS    2
#define ALPHA_BUCKETS   68   /* Numbers (10) , Letters (26),
                                and other printable-characters (32) */
#define ALPHA_MAPSIZE   (ALPHA_BUCKETS+26)
#define ALPHA_MAPOFFSET 33   /* '!' is the first ASCII printable-char */

#define UPPERCASE       0    /* Bucket indices for intermediate-sort on */
#define LOWERCASE       1    /*    length buckets                       */

/******************************************************************************
 ==============================================================================
                                    MACROS
 ==============================================================================
 ******************************************************************************/
#define spec_bcktno(ch) (((ch >= 'a') && (ch <= 'z')) ? LOWERCASE : UPPERCASE)

/******************************************************************************
 ==============================================================================
                      EXTERNALLY VISIBLE FUNCTION PROTOTYPES
 ==============================================================================
 ******************************************************************************/
void *alpha_sort(void *listtop,
                 void *(*next)(void *),void (*set_next)(void *,void *),
                 char *(*word)(void *),int maxlen);

/******************************************************************************
 ==============================================================================
                      INTERNALLY VISIBLE FUNCTION PROTOTYPES
 ==============================================================================
 ******************************************************************************/
static void length_sort(void *(*next)(void*),void (*set_next)(void *,void *),
                        char *(*word)(void *),int maxlen),
            lngthbckt_radix_sort(void *(*next)(void*),
                                 void (*set_next)(void *,void *),
                                 char *(*word)(void *),int len),
            lngthbckt_insertion_sort(void *(*next)(void*),
                                     void (*set_next)(void *,void *),
                                     char *(*word)(void *),int len),
            main_sort(void *(*next)(void*),void (*set_next)(void *,void *),
                      char *(*word)(void *),int maxlen);

/******************************************************************************
 ==============================================================================
                      INTERNALLY VISIBLE GLOBAL VARIABLES
 ==============================================================================
 ******************************************************************************/
static void *top = NULL,            /* Top of list         */
            **lngthbckts = NULL,    /* Length-sort Buckets */
            **lngthbcktbots = NULL,
            **specbckts = NULL,     /* Intermediate-sort buckets for */
            **specbcktbots = NULL,  /*    length-sort                */
            **alphabckts = NULL,    /* Alphabetizing-sort buckets    */
            **alphabcktbots = NULL;

static int *lngthbcktcnts = NULL;  /* The number of entries in each length
                                      bucket (assumes will be < 32767) -
                                      Used to determin at run-time which
                                      sort algorithm to use on a length
                                      bucket                               */

/* ---------------------------------------------------------------------------
   Library Alphabetic Ordering for the 102 printable ASCII Characters :
      (This ordering ignores the difference between Upper and Lower Case)

   This table ignores the first 32 non-printable characters, Blank (Dec 32),
   and DELETE (Dec 127).  The index in the array corresponds to the
   ASCII code of the character - 33, and the value of the array element
   corresponds to character's position in the alphabetic ordering.

   The alphabetic ordering is :
      (uppercase and lowercase are letters considered equivalent)

      0-9a-z!"$%&'()*+,-./:;<=>?@[\]^_`{|}~

   -------------------------------------------------------------------------- */
static unsigned char alpha_map[ALPHA_MAPSIZE] =
  {
   /* '!'-'/' */
   36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,

   /* '0'-'9' */
   0,1,2,3,4,5,6,7,8,9,

   /* ':' - '@' */
   51,52,53,54,55,56,57,

   /* 'A'-'Z' */
   10,11,12,13,14,15,16,17,18,19,20,21,22,
   23,24,25,26,27,28,29,30,31,32,33,34,35,

   /* '['-'`' */
   58,59,60,61,62,63,

   /* 'a'-'z' */
   10,11,12,13,14,15,16,17,18,19,20,21,22,
   23,24,25,26,27,28,29,30,31,32,33,34,35,

   /* '{'-'~' */
   64,65,66,67
  };

/******************************************************************************
 ==============================================================================
                          EXTERNALLY VISIBLE FUNCTIONS
 ==============================================================================
 ******************************************************************************/

/*******************************************************************************
 NAME        : alpha_sort
 PURPOSE     : Radix sorts a list of items using the "atomic" element of
               those items as the radix, e.g. radix-sorting a list of strings
               using a character as the radix
 DESCRIPTION : 
 INPUTS      : 1) The head of the list
               2) A function which when passed the valid address of a node
                  in the list returns the address of the following node
                  The function should return NULL if there are no more nodes.
               3) A function which when passed two addresses of nodes in the
                  list will set the list such that second node is the
                  next in the list after the second node
               4) A function which when passed the valid address of a node
                  will return the address of the string corresponding to that
                  node
               5) The length of the longest string in the list
 RETURNS     : 1) NULL if there was an error
               2) The new top of the list - the caller's internal list has
                  actually been modified
 NOTES       : 1) Assumes the data-structure access 
                  function addresses are valid.
               2) The function tries to allocate all memory it will need
                  up front, so that if there are any errors, the caller's
                  list will be left in an uncorrupted state.  If you are
                  completely confident of your memory, however, you can
                  conserve some dynamic memory by only allocating some of
                  the buckets when their needed and throwing them way after
                  you're finished with them.  The length-buckets should be
                  immediately allocated and should stick around for the whole
                  process.  The length-bucket-count and special-buckets can
                  be thrown away right after the call to length_sort().
                  Finally you need not allocate the alpha_buckets until right
                  before the call to main_sort().  Such methods could save you
                  anywhere from 0.5-2.0K.

               3) Radix Sort Algorithm

               For every possible value of the radix in sequential order do
                 {
                  Empty the buckets
                  Place all items in the list in the buckets according to
                     the current radix
                  Rebuild the list by linking the lists in the buckets together
                     in the sequential order of the buckets
                 }

               Alphabetizing a List of Strings

               Radix-Sort the list by length (longest first), leaving the
                 strings in the length-buckets
               Insertion/Radix-Sort each length-bucket by case
                 (These steps are fairly quick and allows the main alphabetizing
                  sort to optimize on length and disregard special cases -
                    Insertion-Sort performs better than Radix-Sort
                  for small-lists - so one of a choice is made at run-time
                  as to which algorithm to sort the length bucket by 
                  according to the number of strings in it)
               Radix-Sort the list by number/alphabetic value/special chars
                 SortedList = Empty
                 for each possible length do
                   Empty the sort-buckets
                   put strings in that length-bucket in the sort-buckets
                   put strings already in SortedList in the sort-buckets
                   Reform the SortedList from the sort-buckets

               The method described above insures the minimum number of
                 radix comparisons for any string in the list.  This algorithm
                 really shows it power over others when the list has a lot
                 of strings of greatly diverging lengths.
               The list is now ordered according to the specified radix.
 ******************************************************************************/
void *alpha_sort(void *listtop,
                 void *(*next)(void*),void (*set_next)(void *,void *),
                 char *(*word)(void *),int maxlen)
  {
   /* ==================================================================
      Make sure the list is not empty and that the longest string is > 0
      ================================================================== */
   if (((top = listtop) == NULL) ? TRUE : (maxlen <= 0))
     return(NULL);

   /* =============================================================
      Make sure all the necessary memory for this sort is available
      ============================================================= */
   if ((lngthbckts = balloc(maxlen*2,void *)) == NULL)
     return(NULL);
   lngthbcktbots = lngthbckts+maxlen;

   if ((lngthbcktcnts = balloc(maxlen,int)) == NULL)
     {
      release(maxlen*2,void *,lngthbckts);
      lngthbckts = NULL;
      lngthbcktbots = NULL;
      return(NULL);
     }
   if ((specbckts = balloc(SPEC_BUCKETS*2,void *)) == NULL)
     {
      release(maxlen*2,void *,lngthbckts);
      lngthbckts = NULL;
      lngthbcktbots = NULL;
      release(maxlen,int,lngthbcktcnts);
      lngthbcktcnts = NULL;
      return(NULL);
     }
   specbcktbots = specbckts+SPEC_BUCKETS;

   if ((alphabckts = balloc(ALPHA_BUCKETS*2,void *)) == NULL)
     {
      release(maxlen*2,void *,lngthbckts);
      lngthbckts = NULL;
      lngthbcktbots = NULL;
      release(maxlen,int,lngthbcktcnts);
      lngthbcktcnts = NULL;
      release(SPEC_BUCKETS*2,void *,specbckts);
      specbckts = NULL;
      specbcktbots = NULL;
      return(NULL);
     }
   alphabcktbots = alphabckts+ALPHA_BUCKETS;

   /* ============================================================
      Sort the list from longest-to-shortest length, and sort each
        length bucket by number/uppercase/lowercase/other
      ============================================================ */
   length_sort(next,set_next,word,maxlen);

   release(maxlen,int,lngthbcktcnts);
   lngthbcktcnts = NULL;
   release(SPEC_BUCKETS*2,void *,specbckts);
   specbckts = NULL;
   specbcktbots = NULL;

   /* ============================
      Sort the list alphabetically
      ============================ */
   main_sort(next,set_next,word,maxlen);

   /* ==============================================
      Release all memory associated with the buckets
      ============================================== */
   release(maxlen*2,void *,lngthbckts);
   lngthbckts = NULL;
   lngthbcktbots = NULL;
   release(ALPHA_BUCKETS*2,void *,alphabckts);
   alphabckts = NULL;
   alphabcktbots = NULL;

   /* =================================
      Return the top of the sorted list
      ================================= */
   return(top);
  }

/******************************************************************************
 ==============================================================================
                          INTERNALLY VISIBLE FUNCTIONS
 ==============================================================================
 ******************************************************************************/

/*******************************************************************************
 NAME        : length_sort
 PURPOSE     : Radix-sorts list into buckets of decreasing length - each bucket
               is sorted according to 
                  number/uppercase letters/lowercase letters/special characters
 DESCRIPTION : 
 INPUTS      : 1-3) The data structure access functions
               4) The length of the longest string on the list
 RETURNS     : Nothing useful 
 NOTES       : None
 ******************************************************************************/
static void length_sort(void *(*next)(void*),void (*set_next)(void *,void *),
                        char *(*word)(void *),int maxlen)
  {

   void *curr = NULL;
   int bcktno;
   register int i,si;

   /* ========================
      Empty the length buckets
      ======================== */
   for (i = 0 ; i < maxlen ; i++)
     {
      lngthbckts[i] = lngthbcktbots[i] = NULL;
      lngthbcktcnts[i] = 0;
     }

   /* ==============================================================
      Put everything in the list into the appropriate length buckets
      ============================================================== */
   while (top != NULL)
     {
      curr = top;
      bcktno = strlen((*word)(top))-1;
      if (lngthbckts[bcktno] == NULL)
        lngthbckts[bcktno] = top;
      else
        (*set_next)(lngthbcktbots[bcktno],top);
      lngthbcktbots[bcktno] = top;
      top = (*next)(top);
      (*set_next)(curr,NULL);
      lngthbcktcnts[bcktno]++;
     }

   /* ===================================================================
      Radix-sort each length bucket by the following order :
          Non-Lowercase Letters / Lowercase Letters
      =================================================================== */
   for (i = 0 ; i < maxlen ; i++)
     {
      if (lngthbckts[i] != NULL)
        {
         if (lngthbcktcnts[i] < MAX_INSERTION_SORT_SIZE)
           lngthbckt_insertion_sort(next,set_next,word,i+1);
         else
           lngthbckt_radix_sort(next,set_next,word,i+1);
        }
     }   

   /* ==============================================================
      Leave all the strings in the length-buckets for the time being
      ============================================================== */
  }

/*******************************************************************************
 NAME        : lngthbckt_radix_sort
 PURPOSE     : Sorts a bucket of strings all of same length according
               to the following order :
                 Non-Lowercase Letters / Lowercase Letters 
 DESCRIPTION : Depending on the number of strings in the bucket, either this
               routine or the one below is called to sort a length-bucket
               by case :
                 1) Small-lists (< MAX_INSERT_SORT_LIST_SIZE) --> Insert-Sort
                 2) otherwise Radix-Sort
               The reason for this is that a good deal of the entire sort is
               spent in this routine - thus it is extremely advantageous to
               speed it up.  Insertion-Sort is more effective on small-lists
               than Radix-Sort and vice versa.
 INPUTS      : 1-3) The data structure access functions
               4) The length of the strings in the bucket
 RETURNS     : Nothing useful
 NOTES       : None
 ******************************************************************************/
static void lngthbckt_radix_sort(void *(*next)(void*),
                                 void (*set_next)(void *,void *),
                                 char *(*word)(void *),int len)
  {
   void *llist,*curr;
   register int i;
   char ch;
   int bcktno;

   for (i = len-1 ; i >=0 ; i--)
     {
      llist = lngthbckts[len-1];
      lngthbckts[len-1] = lngthbcktbots[len-1] = NULL;

      /* ======================================
         Empty the intermediate-special buckets
         ====================================== */
      specbckts[UPPERCASE] = specbcktbots[UPPERCASE] = NULL;
      specbckts[LOWERCASE] = specbcktbots[LOWERCASE] = NULL;

      /* ====================================================================
         Put everything in the current length bucket into the special buckets
         ==================================================================== */
      while (llist != NULL)
        {
         curr = llist;
         llist = (*next)(llist);
         (*set_next)(curr,NULL);
         ch = ((*word)(curr))[i];
         bcktno = spec_bcktno(ch);
         if (specbckts[bcktno] == NULL)
           specbckts[bcktno] = curr;
         else
           (*set_next)(specbcktbots[bcktno],curr);
         specbcktbots[bcktno] = curr;
        }

      /* =================================================================
         Rebuild the length bucket by linking the special buckets in order
         ================================================================= */
      if (specbckts[UPPERCASE] != NULL)
        {
         llist = lngthbckts[len-1] = specbckts[UPPERCASE];
         (*set_next)(specbcktbots[UPPERCASE],specbckts[LOWERCASE]);
         if (specbcktbots[LOWERCASE] != NULL)
           lngthbcktbots[len-1] = specbcktbots[LOWERCASE];
         else
           lngthbcktbots[len-1] = specbcktbots[UPPERCASE];
        }
      else
        {
         llist = lngthbckts[len-1] = specbckts[LOWERCASE];
         lngthbcktbots[len-1] = specbcktbots[LOWERCASE];
        }
     }
  }

/*******************************************************************************
 NAME        : lngthbckt_insertion_sort
 PURPOSE     : Sorts a bucket of strings all of same length according
               to the following order :
                 Non-Lowercase Letters / Lowercase Letters 
 DESCRIPTION : See above
 INPUTS      : 1-3) The data structure access functions
               4) The length of the strings in the bucket
 RETURNS     : Nothing useful
 NOTES       : None
 ******************************************************************************/
static void lngthbckt_insertion_sort(void *(*next)(void*),
                                 void (*set_next)(void *,void *),
                                 char *(*word)(void *),int len)
  {
   void *newlist = NULL,
        *tmp,*ptr;

   lngthbcktbots[len-1] = NULL;
   while (lngthbckts[len-1] != NULL)
     {
      tmp = lngthbckts[len-1];
      lngthbckts[len-1] = (*next)(lngthbckts[len-1]);
      (*set_next)(tmp,NULL);
      if (newlist == NULL)
        {
         newlist = tmp;
         lngthbcktbots[len-1] = tmp;
        }
      else if (strcmp((*word)(tmp),(*word)(newlist)) < 0)
        {
         (*set_next)(tmp,newlist);
         newlist = tmp;
        }
      else
        {
         for (ptr = newlist ; 
              (((*next)(ptr) != NULL) ?
                   (strcmp((*word)(tmp),(*word)((*next)(ptr))) >= 0) : FALSE) ;
              ptr = (*next)(ptr)) ;
         (*set_next)(tmp,(*next)(ptr));
         (*set_next)(ptr,tmp);
         if (ptr == lngthbcktbots[len-1])
           lngthbcktbots[len-1] = tmp;
        }
     }
   lngthbckts[len-1] = newlist;
  }

/*******************************************************************************
 NAME        : main_sort
 PURPOSE     : Takes the contents of the length-buckets and radix-sorts them
               by the following order :
                 1) Numbers (10 buckets)
                 2) Alphabet (disregarding case - 26 buckets)
                 3) Other printable-characters in ASCII order (32 buckets)
 DESCRIPTION : 
 INPUTS      : 1-3) Data structure accessing functions
               4) The length of the longest string in the list
 RETURNS     : Nothing useful
 NOTES       : This routine assumes that length_sort() has already successfully
               completed and that all the strings in the list now sit in the
               length-buckets.
 ******************************************************************************/
static void main_sort(void *(*next)(void*),void (*set_next)(void *,void *),
                      char *(*word)(void *),int maxlen)
  {
   void *sorted = NULL,
        *curr = NULL;
   register int i,j,k;
   int bcktno = NULL;

   /* =======================================================================
      To sort the list completely requires a number of iterations equal to
      at most the length of the longest string in the list.  Strings
      must be tossed into the buckets based on a progression of their radices
      from right to left - this insures that the ordering progresses from
      left to right (like we want) since it will be the first characters in
      each string which determine the final ordering of the list
      ======================================================================= */
   for (i = maxlen-1 ; i >= 0 ; i--)
     {
      /* ======================
         Empty the sort buckets
         ====================== */
      for (j = 0 ; j < ALPHA_BUCKETS ; j++)
        alphabckts[j] = alphabcktbots[j] = NULL;

      /* ==================================================================
         Put all strings of exactly the length of the current radix-posn i
         in the sort buckets.  These are the strings residing in the
         appropriate length-bucket as set by the routine length_sort()
         We don't have to worry about strings shorter than this until
         another iteration
         ================================================================== */
      lngthbcktbots[i] = NULL;
      while (lngthbckts[i] != NULL)
        {
         curr = lngthbckts[i];
         lngthbckts[i] = (*next)(lngthbckts[i]);
         (*set_next)(curr,NULL);
         bcktno = (int) alpha_map[((int) (((*word)(curr))[i]))-ALPHA_MAPOFFSET];
         if (alphabckts[bcktno] == NULL)
           alphabckts[bcktno] = curr;
         else
           (*set_next)(alphabcktbots[bcktno],curr);
         alphabcktbots[bcktno] = curr;
        }

      /* ================================================================
         Put all strings currently in the sorted list back in the buckets
         These are strings that are longer than the current radix-posn i
         ================================================================ */
      while (sorted != NULL)
        {
         curr = sorted;
         sorted = (*next)(sorted);
         (*set_next)(curr,NULL);
         bcktno = (int) alpha_map[((int) (((*word)(curr))[i]))-ALPHA_MAPOFFSET];
         if (alphabckts[bcktno] == NULL)
           alphabckts[bcktno] = curr;
         else
           (*set_next)(alphabcktbots[bcktno],curr);
         alphabcktbots[bcktno] = curr;
        }

      /* ===========================================================
         Rebuild the sorted list by linking the buckets together in
         sequential order
         =========================================================== */
      for (j = 0 ; ((j < ALPHA_BUCKETS) ? (alphabckts[j] == NULL) : FALSE) 
           ; j++) ;
      sorted = alphabckts[j];
      for ( ; j < (ALPHA_BUCKETS-1) ; j = k)
        {
         for (k = j+1 ; ((k < (ALPHA_BUCKETS-1)) ? 
                         (alphabckts[k] == NULL) : FALSE) ; k++) ;
         (*set_next)(alphabcktbots[j],alphabckts[k]);
        }
     }

   /* =====================================================
      Set the top of the list to the new sorted permutation
      ===================================================== */
   top = sorted;
  }
