OLD | NEW |
| (Empty) |
1 #ifndef _AFFIXMGR_HXX_ | |
2 #define _AFFIXMGR_HXX_ | |
3 | |
4 #include "hunvisapi.h" | |
5 | |
6 #include <stdio.h> | |
7 | |
8 #include "atypes.hxx" | |
9 #include "baseaffix.hxx" | |
10 #include "hashmgr.hxx" | |
11 #include "phonet.hxx" | |
12 #include "replist.hxx" | |
13 | |
14 // check flag duplication | |
15 #define dupSFX (1 << 0) | |
16 #define dupPFX (1 << 1) | |
17 | |
18 class PfxEntry; | |
19 class SfxEntry; | |
20 | |
21 #ifdef HUNSPELL_CHROME_CLIENT | |
22 | |
23 #include <vector> | |
24 | |
25 // This class provides an implementation of the contclasses array in AffixMgr | |
26 // that is normally a large static array. We should almost never need more than | |
27 // 256 elements, so this class only allocates that much to start off with. If | |
28 // elements higher than that are actually used, we'll automatically expand. | |
29 class ContClasses { | |
30 public: | |
31 ContClasses() { | |
32 // Pre-allocate a buffer so that typically, we'll never have to resize. | |
33 EnsureSizeIs(256); | |
34 } | |
35 | |
36 char& operator[](size_t index) { | |
37 EnsureSizeIs(index + 1); | |
38 return data[index]; | |
39 } | |
40 | |
41 void EnsureSizeIs(size_t new_size) { | |
42 if (data.size() >= new_size) | |
43 return; // Nothing to do. | |
44 | |
45 size_t old_size = data.size(); | |
46 data.resize(new_size); | |
47 memset(&data[old_size], 0, new_size - old_size); | |
48 } | |
49 | |
50 std::vector<char> data; | |
51 }; | |
52 | |
53 #endif // HUNSPELL_CHROME_CLIENT | |
54 | |
55 class LIBHUNSPELL_DLL_EXPORTED AffixMgr | |
56 { | |
57 | |
58 PfxEntry * pStart[SETSIZE]; | |
59 SfxEntry * sStart[SETSIZE]; | |
60 PfxEntry * pFlag[SETSIZE]; | |
61 SfxEntry * sFlag[SETSIZE]; | |
62 HashMgr * pHMgr; | |
63 HashMgr ** alldic; | |
64 int * maxdic; | |
65 char * keystring; | |
66 char * trystring; | |
67 char * encoding; | |
68 struct cs_info * csconv; | |
69 int utf8; | |
70 int complexprefixes; | |
71 FLAG compoundflag; | |
72 FLAG compoundbegin; | |
73 FLAG compoundmiddle; | |
74 FLAG compoundend; | |
75 FLAG compoundroot; | |
76 FLAG compoundforbidflag; | |
77 FLAG compoundpermitflag; | |
78 int compoundmoresuffixes; | |
79 int checkcompounddup; | |
80 int checkcompoundrep; | |
81 int checkcompoundcase; | |
82 int checkcompoundtriple; | |
83 int simplifiedtriple; | |
84 FLAG forbiddenword; | |
85 FLAG nosuggest; | |
86 FLAG nongramsuggest; | |
87 FLAG needaffix; | |
88 int cpdmin; | |
89 int numrep; | |
90 replentry * reptable; | |
91 RepList * iconvtable; | |
92 RepList * oconvtable; | |
93 int nummap; | |
94 mapentry * maptable; | |
95 int numbreak; | |
96 char ** breaktable; | |
97 int numcheckcpd; | |
98 patentry * checkcpdtable; | |
99 int simplifiedcpd; | |
100 int numdefcpd; | |
101 flagentry * defcpdtable; | |
102 phonetable * phone; | |
103 int maxngramsugs; | |
104 int maxcpdsugs; | |
105 int maxdiff; | |
106 int onlymaxdiff; | |
107 int nosplitsugs; | |
108 int sugswithdots; | |
109 int cpdwordmax; | |
110 int cpdmaxsyllable; | |
111 char * cpdvowels; | |
112 w_char * cpdvowels_utf16; | |
113 int cpdvowels_utf16_len; | |
114 char * cpdsyllablenum; | |
115 const char * pfxappnd; // BUG: not stateless | |
116 const char * sfxappnd; // BUG: not stateless | |
117 FLAG sfxflag; // BUG: not stateless | |
118 char * derived; // BUG: not stateless | |
119 SfxEntry * sfx; // BUG: not stateless | |
120 PfxEntry * pfx; // BUG: not stateless | |
121 int checknum; | |
122 char * wordchars; | |
123 unsigned short * wordchars_utf16; | |
124 int wordchars_utf16_len; | |
125 char * ignorechars; | |
126 unsigned short * ignorechars_utf16; | |
127 int ignorechars_utf16_len; | |
128 char * version; | |
129 char * lang; | |
130 int langnum; | |
131 FLAG lemma_present; | |
132 FLAG circumfix; | |
133 FLAG onlyincompound; | |
134 FLAG keepcase; | |
135 FLAG forceucase; | |
136 FLAG warn; | |
137 int forbidwarn; | |
138 FLAG substandard; | |
139 int checksharps; | |
140 int fullstrip; | |
141 | |
142 int havecontclass; // boolean variable | |
143 #ifdef HUNSPELL_CHROME_CLIENT | |
144 ContClasses contclasses; | |
145 #else | |
146 char contclasses[CONTSIZE]; // flags of possible continuing cla
sses (twofold affix) | |
147 #endif | |
148 | |
149 public: | |
150 | |
151 #ifdef HUNSPELL_CHROME_CLIENT | |
152 AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md); | |
153 #else | |
154 AffixMgr(const char * affpath, HashMgr** ptr, int * md, | |
155 const char * key = NULL); | |
156 #endif | |
157 ~AffixMgr(); | |
158 struct hentry * affix_check(const char * word, int len, | |
159 const unsigned short needflag = (unsigned short) 0, | |
160 char in_compound = IN_CPD_NOT); | |
161 struct hentry * prefix_check(const char * word, int len, | |
162 char in_compound, const FLAG needflag = FLAG_NULL); | |
163 inline int isSubset(const char * s1, const char * s2); | |
164 struct hentry * prefix_check_twosfx(const char * word, int len, | |
165 char in_compound, const FLAG needflag = FLAG_NULL); | |
166 inline int isRevSubset(const char * s1, const char * end_of_s2, int len); | |
167 struct hentry * suffix_check(const char * word, int len, int sfxopts, | |
168 PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, | |
169 const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, | |
170 char in_compound = IN_CPD_NOT); | |
171 struct hentry * suffix_check_twosfx(const char * word, int len, | |
172 int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); | |
173 | |
174 char * affix_check_morph(const char * word, int len, | |
175 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); | |
176 char * prefix_check_morph(const char * word, int len, | |
177 char in_compound, const FLAG needflag = FLAG_NULL); | |
178 char * suffix_check_morph (const char * word, int len, int sfxopts, | |
179 PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, | |
180 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); | |
181 | |
182 char * prefix_check_twosfx_morph(const char * word, int len, | |
183 char in_compound, const FLAG needflag = FLAG_NULL); | |
184 char * suffix_check_twosfx_morph(const char * word, int len, | |
185 int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); | |
186 | |
187 char * morphgen(char * ts, int wl, const unsigned short * ap, | |
188 unsigned short al, char * morph, char * targetmorph, int level); | |
189 | |
190 int expand_rootword(struct guessword * wlst, int maxn, const char * ts, | |
191 int wl, const unsigned short * ap, unsigned short al, char * bad, | |
192 int, char *); | |
193 | |
194 short get_syllable (const char * word, int wlen); | |
195 int cpdrep_check(const char * word, int len); | |
196 int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, | |
197 const char affixed); | |
198 int defcpd_check(hentry *** words, short wnum, hentry * rv, | |
199 hentry ** rwords, char all); | |
200 int cpdcase_check(const char * word, int len); | |
201 inline int candidate_check(const char * word, int len); | |
202 void setcminmax(int * cmin, int * cmax, const char * word, int len); | |
203 struct hentry * compound_check(const char * word, int len, short wordnum, | |
204 short numsyllable, short maxwordnum, short wnum, hentry ** words, | |
205 char hu_mov_rule, char is_sug, int * info); | |
206 | |
207 int compound_check_morph(const char * word, int len, short wordnum, | |
208 short numsyllable, short maxwordnum, short wnum, hentry ** words, | |
209 char hu_mov_rule, char ** result, char * partresult); | |
210 | |
211 struct hentry * lookup(const char * word); | |
212 int get_numrep() const; | |
213 struct replentry * get_reptable() const; | |
214 RepList * get_iconvtable() const; | |
215 RepList * get_oconvtable() const; | |
216 struct phonetable * get_phonetable() const; | |
217 int get_nummap() const; | |
218 struct mapentry * get_maptable() const; | |
219 int get_numbreak() const; | |
220 char ** get_breaktable() const; | |
221 char * get_encoding(); | |
222 int get_langnum() const; | |
223 char * get_key_string(); | |
224 char * get_try_string() const; | |
225 const char * get_wordchars() const; | |
226 unsigned short * get_wordchars_utf16(int * len) const; | |
227 char * get_ignore() const; | |
228 unsigned short * get_ignore_utf16(int * len) const; | |
229 int get_compound() const; | |
230 FLAG get_compoundflag() const; | |
231 FLAG get_compoundbegin() const; | |
232 FLAG get_forbiddenword() const; | |
233 FLAG get_nosuggest() const; | |
234 FLAG get_nongramsuggest() const; | |
235 FLAG get_needaffix() const; | |
236 FLAG get_onlyincompound() const; | |
237 FLAG get_compoundroot() const; | |
238 FLAG get_lemma_present() const; | |
239 int get_checknum() const; | |
240 const char * get_prefix() const; | |
241 const char * get_suffix() const; | |
242 const char * get_derived() const; | |
243 const char * get_version() const; | |
244 int have_contclass() const; | |
245 int get_utf8() const; | |
246 int get_complexprefixes() const; | |
247 char * get_suffixed(char ) const; | |
248 int get_maxngramsugs() const; | |
249 int get_maxcpdsugs() const; | |
250 int get_maxdiff() const; | |
251 int get_onlymaxdiff() const; | |
252 int get_nosplitsugs() const; | |
253 int get_sugswithdots(void) const; | |
254 FLAG get_keepcase(void) const; | |
255 FLAG get_forceucase(void) const; | |
256 FLAG get_warn(void) const; | |
257 int get_forbidwarn(void) const; | |
258 int get_checksharps(void) const; | |
259 char * encode_flag(unsigned short aflag) const; | |
260 int get_fullstrip() const; | |
261 | |
262 private: | |
263 #ifdef HUNSPELL_CHROME_CLIENT | |
264 // Not owned by us, owned by the Hunspell object. | |
265 hunspell::BDictReader* bdict_reader; | |
266 #endif | |
267 int parse_file(const char * affpath, const char * key); | |
268 int parse_flag(char * line, unsigned short * out, FileMgr * af); | |
269 int parse_num(char * line, int * out, FileMgr * af); | |
270 int parse_cpdsyllable(char * line, FileMgr * af); | |
271 int parse_reptable(char * line, FileMgr * af); | |
272 int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * ke
yword); | |
273 int parse_phonetable(char * line, FileMgr * af); | |
274 int parse_maptable(char * line, FileMgr * af); | |
275 int parse_breaktable(char * line, FileMgr * af); | |
276 int parse_checkcpdtable(char * line, FileMgr * af); | |
277 int parse_defcpdtable(char * line, FileMgr * af); | |
278 int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); | |
279 | |
280 void reverse_condition(char *); | |
281 void debugflag(char * result, unsigned short flag); | |
282 int condlen(char *); | |
283 int encodeit(affentry &entry, char * cs); | |
284 int build_pfxtree(PfxEntry* pfxptr); | |
285 int build_sfxtree(SfxEntry* sfxptr); | |
286 int process_pfx_order(); | |
287 int process_sfx_order(); | |
288 PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); | |
289 SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); | |
290 int process_pfx_tree_to_list(); | |
291 int process_sfx_tree_to_list(); | |
292 int redundant_condition(char, char * strip, int stripl, | |
293 const char * cond, int); | |
294 void finishFileMgr(FileMgr *afflst); | |
295 }; | |
296 | |
297 #endif | |
OLD | NEW |