Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

Terms.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 
00015 
00016 
00017 // ======================================================================
00018 // Terms.hpp
00019 // Adam Berger
00020 // Stores a hash table of recognized term spellings
00021 // ======================================================================
00022 
00023 #ifndef _TERMSH_
00024 #define _TERMSH_
00025 
00026 #include "common_headers.hpp"
00027 #include "String.hpp"
00028 #include "ISet.hpp"
00029 #include "Array.hpp"
00030 
00031 static const char *OOV_SPELLING  = "[OOV]";
00032 static const char *NULL_SPELLING = "[NULL]";
00033 static const int MAX_SETS_OPEN = 128;
00034 
00035 
00036 class Terms {
00037 public:
00038   static Terms *construct(const char *path_);
00039   const char *operator[](int idx) const { return terms[idx]; }
00040   int operator[](const char *t)   const 
00041     { int k=terms[t]; if (k==-1) return getIndexOfOOV(); else return k;} 
00042   int size()                      const { return terms.size(); }
00043   int getIndexOfOOV()             const { return terms[OOV_SPELLING];  }
00044   int getIndexOfNullTerm()        const { return terms[NULL_SPELLING]; }
00045   const char *getPath()           const { return path; }  
00046   static const char * getOOVSpelling() { return OOV_SPELLING; }
00047   int tokenize(const char *buff, Array<int> &tokenized) const;
00048 
00049   // for back-compatibility with 089 code 
00050   int tokenize(const char *buff, Array<unsigned short> &tokenized) const;
00051 
00052 public:
00053   void open(const char *path_);
00054 
00055 private:
00056   ISet<String> terms;
00057   String path; 
00058   
00059 private:
00060   static Terms* setOfSets;
00061   static int nSetsOpen;
00062   static Terms *SetOfSets() { 
00063     if (setOfSets==NULL) setOfSets = new Terms [MAX_SETS_OPEN]; 
00064     return setOfSets; 
00065   }
00066   static int &NSetsOpen() { return nSetsOpen; } 
00067 };
00068 
00069 class Source : public Terms {
00070 };
00071 
00072 class Target : public Terms {
00073 };
00074 
00075 
00076 #endif

Generated at Fri Jul 26 18:22:27 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000