Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

BasicDocStream.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.cs.cmu.edu/~lemur/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _BASICFILESTREAM_HPP
00014 #define _BASICFILESTREAM_HPP
00015 
00017 
00055 #include "common_headers.hpp"
00056 #include <cassert>
00057 #include <cstdio>
00058 #include <cstring>
00059 #include "DocStream.hpp"
00060 #include "Exception.hpp"
00061 
00062 
00063 #define MAXLINE 65536
00064 
00065 
00067 class BasicTokenTerm : public TokenTerm {
00068  public:
00069   BasicTokenTerm() {}
00070   virtual ~BasicTokenTerm() {}
00071   virtual const char *spelling() { return str;}
00072   friend class BasicTokenDoc;
00073  private:
00074   char *str;
00075 };
00076 
00077 
00079 
00080 class BasicTokenDoc : public Document {
00081  public:
00082   BasicTokenDoc(ifstream *stream): docStr(stream) {
00083   }
00084    void startTermIteration(); 
00085   
00086   char *getID() const { return (char *)id;}
00087 
00088   bool hasMore() { return (strcmp(curWord, "</DOC>") != 0);}
00089     
00090   TokenTerm * nextTerm();
00091 
00092   friend class BasicDocStream;
00093  private:
00094   void readID(); 
00095   char *curWord;
00096   char buf1[20000];
00097   char buf2[20000];
00098   char id[2000];
00099   ifstream *docStr;
00100 };
00101 
00102 
00104 class BasicDocStream : public DocStream
00105 {
00106 public:
00107   BasicDocStream() {}
00108   BasicDocStream (const char * inputFile);
00109 
00110   virtual ~BasicDocStream() {  delete ifs;}
00111 
00112 public:
00113         
00114   bool hasMore(); 
00115 
00116   void startDocIteration();
00117 
00118   Document *nextDoc();
00119 
00120 private:
00121   char file[1024];
00122   ifstream *ifs;
00123   char buf[2000];
00124   bool nextTokenRead;
00125 };
00126 
00127 
00128 
00129 
00130 #endif
00131 
00132 
00133 
00134 

Generated at Fri Jul 26 18:22:25 2002 for LEMUR by doxygen1.2.4 written by Dimitri van Heesch, © 1997-2000