#include "arabic_stemmer.h"#include "WordSet.hpp"Defines | |
| #define | STEM_TO_WORD 99 |
| #define | STEM_WORD 0 |
| #define | WAW "0xe6" |
| #define | CHAR_WAW 0xe6 |
Functions | |
| void | light_stem (char *, char *) |
| int | remove_diacritics (char *, char *) |
| void | remove_definite_articles (char *, char *) |
| char * | substring (const char *, int, int) |
| void | freeWordSets () |
| void | substring_copy (char dest[], const char *word, int start, int end) |
| int | Str_equals (const char *s1, const char *s2) |
| int | is_whitespace (const char c) |
| void | load_static_files (const char *path) |
| void | check_stemmer_files () |
| void | remove_all_suffixes (char *word, char *result, size_t lenlimit) |
| void | arabic_clean_up (void) |
| void | no_stem (char *word, char *result) |
| int | on_stop_list (char *word) |
| void | arabic_stop (char *word, char *result) |
| void | arabic_norm2 (char *word, char *result) |
| void | arabic_norm2_stop (char *word, char *result) |
| void | arabic_light10 (char *word, char *result) |
| void | arabic_light10_stop (char *word, char *result) |
| void | show_stemmer_options () |
| void * | set_stemmer (char *stemval) |
| char * | stem_phrase (char *phrase, int *numtoks, void(*stemmer)(char *, char *)) |
Variables | |
| char * | defarticles [] = {"ال", "وال","بال", "كال", "فال", "لل", "\0"} |
| char * | suffixes [] = {"ها","ان","ات","ون","ين","يه","ية","ه","ة","ي","\0"} |
| stem_info_t | stemtable [NUMSTEMMERS] |
| int | files_loaded = 0 |
| const int | isWhitespace [256] |
| const int | NormChar [256] |
| const int | Norm3Char [256] |
| const int | ArabicVowel [256] |
| WordSet * | stop_words_ht |
| char * | arabic_stemdir |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
||||||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
||||||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||||||
|
|
|
||||||||||||||||||||
|
|
|
|
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
|
|
|
|
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf,
0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf,
0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
|
Initial value: {
{"none", "none", no_stem},
{"arabic_stop", "arabic_stop", arabic_stop},
{"arabic_norm2", "table normalization", arabic_norm2},
{"arabic_norm2_stop", "table normalization with stopping", arabic_norm2_stop},
{"arabic_light10", "light stemming", arabic_light10},
{"arabic_light10_stop", "light10 and remove stop words", arabic_light10_stop}
} |
|
|
|
|
|
|
1.2.18