00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _TERMSH_
00024 #define _TERMSH_
00025
00026 #include "common_headers.hpp"
00027 #include "String.hpp"
00028 #include "ISet.hpp"
00029 #include "Array.hpp"
00030
00031 static const char *OOV_SPELLING = "[OOV]";
00032 static const char *NULL_SPELLING = "[NULL]";
00033 static const int MAX_SETS_OPEN = 128;
00034
00035
00036 class Terms {
00037 public:
00038 static Terms *construct(const char *path_);
00039 const char *operator[](int idx) const { return terms[idx]; }
00040 int operator[](const char *t) const
00041 { int k=terms[t]; if (k==-1) return getIndexOfOOV(); else return k;}
00042 int size() const { return terms.size(); }
00043 int getIndexOfOOV() const { return terms[OOV_SPELLING]; }
00044 int getIndexOfNullTerm() const { return terms[NULL_SPELLING]; }
00045 const char *getPath() const { return path; }
00046 static const char * getOOVSpelling() { return OOV_SPELLING; }
00047 int tokenize(const char *buff, Array<int> &tokenized) const;
00048
00049
00050 int tokenize(const char *buff, Array<unsigned short> &tokenized) const;
00051
00052 public:
00053 void open(const char *path_);
00054
00055 private:
00056 ISet<String> terms;
00057 String path;
00058
00059 private:
00060 static Terms* setOfSets;
00061 static int nSetsOpen;
00062 static Terms *SetOfSets() {
00063 if (setOfSets==NULL) setOfSets = new Terms [MAX_SETS_OPEN];
00064 return setOfSets;
00065 }
00066 static int &NSetsOpen() { return nSetsOpen; }
00067 };
00068
00069 class Source : public Terms {
00070 };
00071
00072 class Target : public Terms {
00073 };
00074
00075
00076 #endif