|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectinfo.ephyra.answerselection.filters.Filter
info.ephyra.answerselection.filters.WebTermImportanceFilter
public abstract class WebTermImportanceFilter
A web reinforcement approach that ranks answer candidates for definitional questions. Several variations of the target of the question are generated and are used to retrieve relevant text snippets from the web. The frequencies of content words in these snippets are counted and the scores of the answers are adjusted to assign higher scores to candidates that cover frequent keywords. This approach is based on the assumption that terms that often cooccur with the target provide relevant information on the target that should be covered by the answers.
Several instances of this web term importance filter have been implemented that use different sources for text snippets.
This class extends the class Filter.
| Nested Class Summary | |
|---|---|
private static class |
WebTermImportanceFilter.CacheEntry
|
private static class |
WebTermImportanceFilter.TargetGeneratorTest
|
protected class |
WebTermImportanceFilter.TermCounter
|
| Field Summary | |
|---|---|
private static java.util.HashMap<java.lang.String,WebTermImportanceFilter.CacheEntry> |
cache
|
protected static java.lang.String |
event
|
private java.util.HashSet<java.lang.String> |
extensionList
|
private static java.lang.String[] |
extensions
|
private boolean |
isCombined
|
static int |
LINEAR_LENGTH_NORMALIZATION
|
protected static java.lang.String |
location
|
static int |
LOG_10_LENGTH_NORMALIZATION
|
static int |
LOG_LENGTH_NORMALIZATION
|
static int |
NO_NORMALIZATION
|
private int |
normalizationMode
|
protected static java.lang.String |
organization
|
protected static java.lang.String |
person
|
static int |
SQUARE_ROOT_LENGTH_NORMALIZATION
|
protected static boolean |
TEST_TARGET_GENERATION
|
private int |
tfNormalizationMode
|
| Constructor Summary | |
|---|---|
protected |
WebTermImportanceFilter(int normalizationMode,
int tfNormalizationMode,
boolean isCombined)
|
| Method Summary | |
|---|---|
protected void |
addTermCounters(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> source,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> target)
add all the term counters in source to target (perform a union of the key sets, summing up the counters) |
Result[] |
apply(Result[] results)
Increment the score of each result snippet for each word in it according to the number of top-100 web search engine snippets containing this particular word. |
private void |
cache(java.lang.String target,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> termCounters)
|
private java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> |
cacheLookup(java.lang.String target)
|
private java.lang.String |
checkType(java.lang.String target)
find the NE type of a target |
private boolean |
cutExtension(java.lang.String target,
java.util.ArrayList<java.lang.String> targets)
cut tailing words like "University", "International", "Corporation": "Microsoft Corporation" --> "Microsoft" and add the non-cut part to target list |
private void |
cutFirstNpInNpSequence(java.util.ArrayList<java.lang.String> targets)
remove first NP in a sequence of NPs: "the film 'Star Wars'" --> "'Star Wars'" |
private void |
extractAcronyms(java.util.ArrayList<java.lang.String> targets)
extract acronyms from the targets: "Basque ETA" --> "ETA" |
private void |
extractUpperCaseParts(java.util.ArrayList<java.lang.String> targets)
extract non lower case parts from the targets: "the film 'Star Wars'" --> "'Star Wars'" "1998 indictment and trial of Susan McDougal" --> "Susan McDougal" "Miss Universe 2000 crowned" --> "Miss Universe 2000" "Abraham from the bible" --> "Abraham" "Gobi desert" --> "Gobi" |
protected int |
getCountSum(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters)
get the sum of a set of counters |
protected int |
getMaxCount(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters)
get the maximum count out of a set of counters |
java.lang.String[] |
getTargets(java.lang.String target)
produce the target variations for a given target |
abstract java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> |
getTermCounters(java.lang.String[] targets)
fetch the term frequencies in the top X result snippets of a web search for some target |
static void |
main(java.lang.String[] args)
|
private void |
postProcess(java.util.ArrayList<java.lang.String> targets)
take care of remaining brackets |
protected int |
sumDiff(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> compare)
get the sum of a set of counters, each one minus the count in another set of counters |
| Methods inherited from class info.ephyra.answerselection.filters.Filter |
|---|
apply |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected static final java.lang.String person
protected static final java.lang.String organization
protected static final java.lang.String location
protected static final java.lang.String event
public static final int NO_NORMALIZATION
public static final int LINEAR_LENGTH_NORMALIZATION
public static final int SQUARE_ROOT_LENGTH_NORMALIZATION
public static final int LOG_LENGTH_NORMALIZATION
public static final int LOG_10_LENGTH_NORMALIZATION
private final int normalizationMode
private final int tfNormalizationMode
private final boolean isCombined
private java.util.HashSet<java.lang.String> extensionList
private static final java.lang.String[] extensions
private static java.util.HashMap<java.lang.String,WebTermImportanceFilter.CacheEntry> cache
protected static boolean TEST_TARGET_GENERATION
| Constructor Detail |
|---|
protected WebTermImportanceFilter(int normalizationMode,
int tfNormalizationMode,
boolean isCombined)
| Method Detail |
|---|
public abstract java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> getTermCounters(java.lang.String[] targets)
targets - an array of strings containing the targets
public java.lang.String[] getTargets(java.lang.String target)
target - the original traget String
private java.lang.String checkType(java.lang.String target)
target - the target String to check
private boolean cutExtension(java.lang.String target,
java.util.ArrayList<java.lang.String> targets)
target - the target String to cuttargets - the target list to add the cut part to
private void extractUpperCaseParts(java.util.ArrayList<java.lang.String> targets)
targets - the list of targetsprivate void extractAcronyms(java.util.ArrayList<java.lang.String> targets)
targets - the list of targetsprivate void cutFirstNpInNpSequence(java.util.ArrayList<java.lang.String> targets)
targets - the list of targetsprivate void postProcess(java.util.ArrayList<java.lang.String> targets)
targets - the list of targetspublic Result[] apply(Result[] results)
apply in class Filterresults - array of Result objects
Result objects
private void cache(java.lang.String target,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> termCounters)
private java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> cacheLookup(java.lang.String target)
protected void addTermCounters(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> source,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> target)
source - target - protected int getMaxCount(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters)
counters - protected int getCountSum(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters)
counters -
protected int sumDiff(java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> counters,
java.util.HashMap<java.lang.String,WebTermImportanceFilter.TermCounter> compare)
counters - compare - public static void main(java.lang.String[] args)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||