#include "arabic_stemmer.h"#include "WordSet.hpp"| Defines | |
| #define | STEM_TO_WORD 99 | 
| #define | STEM_WORD 0 | 
| #define | WAW "0xe6" | 
| #define | CHAR_WAW 0xe6 | 
| Functions | |
| void | light_stem (char *, char *) | 
| int | remove_diacritics (char *, char *) | 
| void | remove_definite_articles (char *, char *) | 
| char * | substring (const char *, int, int) | 
| void | freeWordSets () | 
| void | substring_copy (char dest[], const char *word, int start, int end) | 
| int | Str_equals (const char *s1, const char *s2) | 
| int | is_whitespace (const char c) | 
| void | load_static_files (const char *path) | 
| void | check_stemmer_files () | 
| void | remove_all_suffixes (char *word, char *result, size_t lenlimit) | 
| void | arabic_clean_up (void) | 
| void | no_stem (char *word, char *result) | 
| int | on_stop_list (char *word) | 
| void | arabic_stop (char *word, char *result) | 
| void | arabic_norm2 (char *word, char *result) | 
| void | arabic_norm2_stop (char *word, char *result) | 
| void | arabic_light10 (char *word, char *result) | 
| void | arabic_light10_stop (char *word, char *result) | 
| void | show_stemmer_options () | 
| void * | set_stemmer (char *stemval) | 
| char * | stem_phrase (char *phrase, int *numtoks, void(*stemmer)(char *, char *)) | 
| Variables | |
| char * | defarticles [] = {"ال", "وال","بال", "كال", "فال", "لل", "\0"} | 
| char * | suffixes [] = {"ها","ان","ات","ون","ين","يه","ية","ه","ة","ي","\0"} | 
| stem_info_t | stemtable [NUMSTEMMERS] | 
| int | files_loaded = 0 | 
| const int | isWhitespace [256] | 
| const int | NormChar [256] | 
| const int | Norm3Char [256] | 
| const int | ArabicVowel [256] | 
| WordSet * | stop_words_ht | 
| char * | arabic_stemdir | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | 
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | 
| 
 | 
| 
 | ||||||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | ||||||||||||||||
| 
 | 
| 
 | ||||||||||||
| 
 | 
| 
 | ||||||||||||||||
| 
 | 
| 
 | ||||||||||||||||||||
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| Initial value:  {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,   0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
| Initial value:  {
0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} | 
| 
 | 
| Initial value:  {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,   0,0xd8,0xd9,0xda,0xdb,   0,0xdd,0xde,0xdf,
   0,0xe1,   0,0xe3,0xe4,0xe5,0xe6,   0,   0,   0,   0,   0,0xed, 0xed,   0,   0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} | 
| 
 | 
| Initial value:  {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,   0,0xd8,0xd9,0xda,0xdb,   0,0xdd,0xde,0xdf,
   0,0xe1,   0,0xe3,0xe4,0xe5,0xe6,   0,   0,   0,   0,   0,0xed, 0xed,   0,   0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} | 
| 
 | 
| Initial value:  {
 {"none", "none", no_stem},
 {"arabic_stop", "arabic_stop", arabic_stop},
 {"arabic_norm2", "table normalization", arabic_norm2},
 {"arabic_norm2_stop", "table normalization with stopping", arabic_norm2_stop},
 {"arabic_light10", "light stemming", arabic_light10}, 
 {"arabic_light10_stop", "light10 and remove stop words", arabic_light10_stop}
 } | 
| 
 | 
| 
 | 
| 
 | 
| 
 | 
 1.2.18
1.2.18