edu.cmu.cs.readweb.text
Class DocumentItem

java.lang.Object
  extended byedu.cmu.cs.readweb.text.DocumentItem

public class DocumentItem
extends java.lang.Object


Constructor Summary
DocumentItem()
           
DocumentItem(java.lang.String _documentId)
          get web page directly from internet by given url
DocumentItem(java.lang.String _documentId, java.lang.String cacheDir)
          given an url, get web page firstly search in cache, if not exist, get it from internet
DocumentItem(java.lang.String _documentId, java.lang.String _documentStream, java.lang.String _documentLinks)
           
 
Method Summary
 java.lang.String cropString(int startPos, int endPos)
          crop a string between start and end position.
 java.lang.String getDocId()
           
 java.lang.String getDocLinks()
           
 java.lang.String getDocStream()
           
 SpanItem getSpan(int pos, int direction)
          get the first word and its location in the direction of document position.
 java.lang.String getWord(int pos, int direction)
          get the first word in the direction of document position.
 int searchString(java.lang.String target, int offset)
          search a first postion of a target string from the offset postion of doc stream
 int searchStringInRange(java.lang.String target, int offset, int rangeSize, int direction)
          search a first postion of a target string from the offset postion of doc stream, search range and search direction (0:left, 1:right) are specified.
 void writeLinksToFile(java.io.BufferedWriter out)
          write documentation links into file
 void writeToFile(java.io.BufferedWriter out)
          write documentation stream into file
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

DocumentItem

public DocumentItem()

DocumentItem

public DocumentItem(java.lang.String _documentId,
                    java.lang.String _documentStream,
                    java.lang.String _documentLinks)

DocumentItem

public DocumentItem(java.lang.String _documentId)
get web page directly from internet by given url


DocumentItem

public DocumentItem(java.lang.String _documentId,
                    java.lang.String cacheDir)
given an url, get web page firstly search in cache, if not exist, get it from internet

Method Detail

getDocId

public java.lang.String getDocId()

getDocStream

public java.lang.String getDocStream()

getDocLinks

public java.lang.String getDocLinks()

searchString

public int searchString(java.lang.String target,
                        int offset)
search a first postion of a target string from the offset postion of doc stream


searchStringInRange

public int searchStringInRange(java.lang.String target,
                               int offset,
                               int rangeSize,
                               int direction)
search a first postion of a target string from the offset postion of doc stream, search range and search direction (0:left, 1:right) are specified.


cropString

public java.lang.String cropString(int startPos,
                                   int endPos)
crop a string between start and end position.


getSpan

public SpanItem getSpan(int pos,
                        int direction)
get the first word and its location in the direction of document position. 0: left; 1: right;


getWord

public java.lang.String getWord(int pos,
                                int direction)
get the first word in the direction of document position. 0: left; 1: right;


writeToFile

public void writeToFile(java.io.BufferedWriter out)
write documentation stream into file


writeLinksToFile

public void writeLinksToFile(java.io.BufferedWriter out)
write documentation links into file