Main Page   Compound List   File List   Compound Members   File Members  

rr_libs/read_voc.c

Go to the documentation of this file.
00001 /* read_vocab: create a vocabulary hash table and optionally a vocabulary direct-access table */
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1994, Carnegie Mellon University and Ronald Rosenfeld.
00005 All rights reserved.
00006 
00007 This software is made available for research purposes only.  It may be
00008 redistributed freely for this purpose, in full or in part, provided
00009 that this entire copyright notice is included on any copies of this
00010 software and applications and derivations thereof.
00011 
00012 This software is provided on an "as is" basis, without warranty of any
00013 kind, either expressed or implied, as to any matter including, but not
00014 limited to warranty of fitness of purpose, or merchantability, or
00015 results obtained from use of this software.
00016 ======================================================================*/
00017 
00026 /* Edited by Philip Clarkson, March 1997 to prevent compilation warnings */
00027 
00028 
00029 #include <stdio.h>
00030 #include <strings.h>
00031 #include "general.h"
00032 #include "sih.h"
00033 
00034 void read_voc(char *filename, int verbosity,   
00035               sih_t *p_vocab_ht, char ***p_vocab, 
00036               unsigned short *p_vocab_size)
00037 /* p_vocab==NULL means: build only a hash table */
00038 {
00039 
00040   /*  static char rname[] = "rd_voc"; */  /* Never used anyway! */
00041   char *pperiod;
00042   int   vocab_size;
00043 
00044   pperiod = rindex(filename,'.');
00045   if (pperiod==NULL) pperiod = filename-1;
00046 
00047   if (strcmp(pperiod+1,"vocab_ht")==0) {             /* file == hash_table */
00048      FILE *fp=rr_iopen(filename);
00049      sih_val_read_from_file(p_vocab_ht, fp, filename, verbosity);
00050      rr_iclose(fp);
00051      vocab_size = p_vocab_ht->nentries;
00052      if (p_vocab!=NULL) {
00053         get_vocab_from_vocab_ht(p_vocab_ht, vocab_size, verbosity, p_vocab);
00054         *p_vocab[0] = salloc("<UNK>");
00055      }
00056   }
00057   else {                                             /* file == vocab(ascii) */
00058      read_wlist_into_siht(filename, verbosity, p_vocab_ht, &vocab_size);
00059      if (p_vocab!=NULL) {
00060         read_wlist_into_array(filename, verbosity, p_vocab, &vocab_size);
00061         *p_vocab[0] = salloc("<UNK>");
00062      }
00063   }
00064 
00065   if (p_vocab_size) {
00066     *p_vocab_size = vocab_size;
00067   }
00068   
00069 }
00070 
00071 
00074 void get_vocab_from_vocab_ht(sih_t *ht, int vocab_size, int verbosity, char ***p_vocab)
00075 {
00076   static char rname[]="get_vocab_fm_ht";
00077   char   **wlist;
00078   int    islot, wordid;
00079 
00080   wlist = (char **) rr_malloc((vocab_size+1)*sizeof(char *));
00081 
00082   for (islot=0; islot<ht->nslots; islot++) {
00083      wordid = (int) ht->slots[islot].intval;
00084      if (wordid>0) wlist[wordid] = ht->slots[islot].string;
00085   }
00086 
00087   for (wordid=1; wordid<=vocab_size; wordid++)
00088     if (wlist[wordid]==NULL)
00089       quit(-1,"%s ERROR: the hash table does not contain wordid %d\n",
00090                rname, wordid);
00091 
00092   if (verbosity) fprintf(stderr,
00093      "%s: vocabulary was constructed from the vocab hash table\n",rname);
00094   *p_vocab = wlist;
00095 }

Generated on Tue Dec 21 13:54:46 2004 by doxygen1.2.18