00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 #ifndef _TERMSH_
00024 #define _TERMSH_
00025 
00026 #include "common_headers.hpp"
00027 #include "String.hpp"
00028 #include "ISet.hpp"
00029 #include "Array.hpp"
00030 
00031 static const char *OOV_SPELLING  = "[OOV]";
00032 static const char *NULL_SPELLING = "[NULL]";
00033 static const int MAX_SETS_OPEN = 128;
00034 
00035 
00036 class Terms {
00037 public:
00038   static Terms *construct(const char *path_);
00039   const char *operator[](int idx) const { return terms[idx]; }
00040   int operator[](const char *t)   const 
00041     { int k=terms[t]; if (k==-1) return getIndexOfOOV(); else return k;} 
00042   int size()                      const { return terms.size(); }
00043   int getIndexOfOOV()             const { return terms[OOV_SPELLING];  }
00044   int getIndexOfNullTerm()        const { return terms[NULL_SPELLING]; }
00045   const char *getPath()           const { return path; }  
00046   static const char * getOOVSpelling() { return OOV_SPELLING; }
00047   int tokenize(const char *buff, Array<int> &tokenized) const;
00048 
00049   
00050   int tokenize(const char *buff, Array<unsigned short> &tokenized) const;
00051 
00052 public:
00053   void open(const char *path_);
00054 
00055 private:
00056   ISet<String> terms;
00057   String path; 
00058   
00059 private:
00060   static Terms* setOfSets;
00061   static int nSetsOpen;
00062   static Terms *SetOfSets() { 
00063     if (setOfSets==NULL) setOfSets = new Terms [MAX_SETS_OPEN]; 
00064     return setOfSets; 
00065   }
00066   static int &NSetsOpen() { return nSetsOpen; } 
00067 };
00068 
00069 class Source : public Terms {
00070 };
00071 
00072 class Target : public Terms {
00073 };
00074 
00075 
00076 #endif