|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectinfo.ephyra.nlp.OpenNLP
public class OpenNLP
This class provides a common interface to the OpenNLP toolkit.
It supports the following natural language processing tools:
| Field Summary | |
|---|---|
private static java.util.regex.Pattern |
ABUNDANT_BLANKS
Pattern for abundant blanks. |
private static opennlp.tools.lang.english.TreebankChunker |
chunker
Chunker from the OpenNLP project. |
private static opennlp.tools.lang.english.TreebankLinker |
linker
Linker from the OpenNLP project. |
private static opennlp.tools.parser.ParserME |
parser
Full parser from the OpenNLP project. |
private static opennlp.tools.lang.english.SentenceDetector |
sentenceDetector
Sentence detector from the OpenNLP project. |
private static opennlp.tools.lang.english.PosTagger |
tagger
Part of speech tagger from the OpenNLP project. |
private static opennlp.tools.lang.english.Tokenizer |
tokenizer
Tokenizer from the OpenNLP project. |
private static java.util.HashSet<java.lang.String> |
unJoinablePrepositions
|
| Constructor Summary | |
|---|---|
OpenNLP()
|
|
| Method Summary | |
|---|---|
static boolean |
createChunker(java.lang.String model)
Creates the chunker from a model file. |
static boolean |
createLinker(java.lang.String dir)
Creates the linker from a directory containing models. |
static boolean |
createParser(java.lang.String dir)
Creates the parser from a directory containing models. |
static boolean |
createPosTagger(java.lang.String model,
java.lang.String tagdict)
Creates the part of speech tagger from a model file and a case sensitive tag dictionary. |
static boolean |
createSentenceDetector(java.lang.String model)
Creates the sentence detector from a model file. |
static boolean |
createTokenizer(java.lang.String model)
Creates the tokenizer from a model file. |
static java.lang.String[] |
joinNounPhrases(java.lang.String[] tokens,
java.lang.String[] chunkTags)
|
static void |
link(opennlp.tools.parser.Parse[] parses)
Identifies coreferences in an array of full parses of sentences. |
static opennlp.tools.parser.Parse |
parse(java.lang.String sentence)
Peforms a full parsing on a sentence of space-delimited tokens. |
static java.lang.String[] |
sentDetect(java.lang.String text)
Splits a text into sentences. |
static java.lang.String[] |
tagChunks(java.lang.String[] tokens,
java.lang.String[] pos)
Assigns chunk tags to an array of tokens and POS tags. |
static java.lang.String |
tagPos(java.lang.String sentence)
Assigns POS tags to a sentence of space-delimited tokens. |
static java.lang.String[] |
tagPos(java.lang.String[] sentence)
Assigns POS tags to an array of tokens that form a sentence. |
static java.lang.String[] |
tokenize(java.lang.String text)
A model-based tokenizer used to prepare a sentence for POS tagging. |
static java.lang.String |
tokenizeWithSpaces(java.lang.String text)
Applies the model-based tokenizer and concatenates the tokens with spaces. |
static java.lang.String |
untokenize(java.lang.String text)
Untokenizes a text by removing abundant blanks. |
static java.lang.String |
untokenize(java.lang.String text,
java.lang.String original)
Untokenizes a text by mapping it to a string that contains the original text as a subsequence. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
private static final java.util.regex.Pattern ABUNDANT_BLANKS
private static opennlp.tools.lang.english.SentenceDetector sentenceDetector
private static opennlp.tools.lang.english.Tokenizer tokenizer
private static opennlp.tools.lang.english.PosTagger tagger
private static opennlp.tools.lang.english.TreebankChunker chunker
private static opennlp.tools.parser.ParserME parser
private static opennlp.tools.lang.english.TreebankLinker linker
private static java.util.HashSet<java.lang.String> unJoinablePrepositions
| Constructor Detail |
|---|
public OpenNLP()
| Method Detail |
|---|
public static boolean createSentenceDetector(java.lang.String model)
model - model file
public static boolean createTokenizer(java.lang.String model)
model - model file
public static boolean createPosTagger(java.lang.String model,
java.lang.String tagdict)
model - model filetagdict - case sensitive tag dictionary
public static boolean createChunker(java.lang.String model)
model - model file
public static boolean createParser(java.lang.String dir)
dir - model directory
public static boolean createLinker(java.lang.String dir)
dir - model directory
public static java.lang.String[] sentDetect(java.lang.String text)
text - sequence of sentences
null, if the
sentence detector is not initializedpublic static java.lang.String[] tokenize(java.lang.String text)
text - text to tokenize
null, if the tokenizer is not
initializedpublic static java.lang.String tokenizeWithSpaces(java.lang.String text)
text - text to tokenize
null, if the
tokenizer is not initializedpublic static java.lang.String untokenize(java.lang.String text)
Untokenizes a text by removing abundant blanks.
Note that it is not guaranteed that this method exactly reverts the
effect of tokenize().
text - text to untokenize
public static java.lang.String untokenize(java.lang.String text,
java.lang.String original)
Untokenizes a text by mapping it to a string that contains the original text as a subsequence.
Note that it is not guaranteed that this method exactly reverts the
effect of tokenize().
text - text to untokenizeoriginal - string that contains the original text as a subsequence
public static java.lang.String tagPos(java.lang.String sentence)
sentence - sentence to be annotated with POS tags
null, if the tagger is not
initializedpublic static java.lang.String[] tagPos(java.lang.String[] sentence)
sentence - array of tokens to be annotated with POS tags
null, if the tagger is not
initialized
public static java.lang.String[] tagChunks(java.lang.String[] tokens,
java.lang.String[] pos)
tokens - array of tokenspos - array of corresponding POS tags
null, if the chunker is not
initializedpublic static opennlp.tools.parser.Parse parse(java.lang.String sentence)
sentence - the sentence
null, if the parser is not
initialized or the sentence is emptypublic static void link(opennlp.tools.parser.Parse[] parses)
parses - array of full parses of sentences
public static java.lang.String[] joinNounPhrases(java.lang.String[] tokens,
java.lang.String[] chunkTags)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||