Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1104)

Side by Side Diff: chrome/third_party/hunspell/src/hunspell/hashmgr.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 #include "license.hunspell" 1 #include "license.hunspell"
2 #include "license.myspell" 2 #include "license.myspell"
3 3
4 #ifndef MOZILLA_CLIENT 4 #ifndef MOZILLA_CLIENT
5 #include <cstdlib> 5 #include <cstdlib>
6 #include <cstring> 6 #include <cstring>
7 #include <cstdio> 7 #include <cstdio>
8 #include <cctype> 8 #include <cctype>
9 #else 9 #else
10 #include <stdlib.h> 10 #include <stdlib.h>
11 #include <string.h> 11 #include <string.h>
12 #include <stdio.h> 12 #include <stdio.h>
13 #include <ctype.h> 13 #include <ctype.h>
14 #endif 14 #endif
15 15
16 #include "hashmgr.hxx" 16 #include "hashmgr.hxx"
17 #include "csutil.hxx" 17 #include "csutil.hxx"
18 #include "atypes.hxx" 18 #include "atypes.hxx"
19 19
20 #ifdef MOZILLA_CLIENT 20 #ifdef MOZILLA_CLIENT
21 #ifdef __SUNPRO_CC // for SunONE Studio compiler 21 #ifdef __SUNPRO_CC // for SunONE Studio compiler
22 using namespace std; 22 using namespace std;
23 #endif 23 #endif
24 #else 24 #else
25 #ifndef W32 25 #ifndef WIN32
26 using namespace std; 26 using namespace std;
27 #endif 27 #endif
28 #endif 28 #endif
29 29
30 // build a hash table from a munched word list 30 // build a hash table from a munched word list
31
31 #ifdef HUNSPELL_CHROME_CLIENT 32 #ifdef HUNSPELL_CHROME_CLIENT
32 HashMgr::HashMgr(hunspell::BDictReader* reader) 33 HashMgr::HashMgr(hunspell::BDictReader* reader)
33 { 34 {
34 bdict_reader = reader; 35 bdict_reader = reader;
35 #else 36 #else
36 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) 37 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle, const char * key)
37 { 38 {
38 #endif 39 #endif
39 tablesize = 0; 40 tablesize = 0;
40 tableptr = NULL; 41 tableptr = NULL;
41 flag_mode = FLAG_CHAR; 42 flag_mode = FLAG_CHAR;
42 complexprefixes = 0; 43 complexprefixes = 0;
43 utf8 = 0; 44 utf8 = 0;
45 langnum = 0;
46 lang = NULL;
47 enc = NULL;
48 csconv = 0;
44 ignorechars = NULL; 49 ignorechars = NULL;
45 ignorechars_utf16 = NULL; 50 ignorechars_utf16 = NULL;
46 ignorechars_utf16_len = 0; 51 ignorechars_utf16_len = 0;
47 numaliasf = 0; 52 numaliasf = 0;
48 aliasf = NULL; 53 aliasf = NULL;
49 numaliasm = 0; 54 numaliasm = 0;
50 aliasm = NULL; 55 aliasm = NULL;
56 forbiddenword = FORBIDDENWORD; // forbidden word signing flag
51 #ifdef HUNSPELL_CHROME_CLIENT 57 #ifdef HUNSPELL_CHROME_CLIENT
52 // No tables to load, just the AF config. 58 // No tables to load, just the AF config.
53 int ec = load_config(); 59 int ec = load_config();
54 #else 60 #else
55 load_config(aff_handle); 61 load_config(aff_handle);
56 int ec = load_tables(dic_handle); 62 int ec = load_tables(dic_handle, key);
57 #endif 63 #endif
58 if (ec) { 64 if (ec) {
59 /* error condition - what should we do here */ 65 /* error condition - what should we do here */
60 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); 66 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
61 if (tableptr) { 67 if (tableptr) {
62 free(tableptr); 68 free(tableptr);
63 tableptr = NULL; 69 tableptr = NULL;
64 } 70 }
65 tablesize = 0; 71 tablesize = 0;
66 } 72 }
67 } 73 }
68 74
69 75
70 HashMgr::~HashMgr() 76 HashMgr::~HashMgr()
71 { 77 {
72 if (tableptr) { 78 if (tableptr) {
73 // now pass through hash table freeing up everything 79 // now pass through hash table freeing up everything
74 // go through column by column of the table 80 // go through column by column of the table
75 for (int i=0; i < tablesize; i++) { 81 for (int i=0; i < tablesize; i++) {
76 struct hentry * pt = &tableptr[i]; 82 struct hentry * pt = tableptr[i];
77 struct hentry * nt = NULL; 83 struct hentry * nt = NULL;
78 if (pt) {
79 if (pt->astr && !aliasf) free(pt->astr);
80 if (pt->word) free(pt->word);
81 #ifdef HUNSPELL_EXPERIMENTAL
82 if (pt->description && !aliasm) free(pt->description);
83 #endif
84 pt = pt->next;
85 }
86 while(pt) { 84 while(pt) {
87 nt = pt->next; 85 nt = pt->next;
88 if (pt->astr && !aliasf) free(pt->astr); 86 if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)) ) free(pt->astr);
89 if (pt->word) free(pt->word);
90 #ifdef HUNSPELL_EXPERIMENTAL
91 if (pt->description && !aliasm) free(pt->description);
92 #endif
93 free(pt); 87 free(pt);
94 pt = nt; 88 pt = nt;
95 } 89 }
96 } 90 }
97 free(tableptr); 91 free(tableptr);
98 tableptr = NULL;
99 } 92 }
100 tablesize = 0; 93 tablesize = 0;
101 94
102 if (aliasf) { 95 if (aliasf) {
103 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); 96 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
104 free(aliasf); 97 free(aliasf);
105 aliasf = NULL; 98 aliasf = NULL;
106 if (aliasflen) { 99 if (aliasflen) {
107 free(aliasflen); 100 free(aliasflen);
108 aliasflen = NULL; 101 aliasflen = NULL;
109 } 102 }
110 } 103 }
111 if (aliasm) { 104 if (aliasm) {
112 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); 105 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
113 free(aliasm); 106 free(aliasm);
114 aliasm = NULL; 107 aliasm = NULL;
115 } 108 }
109
110 #ifndef OPENOFFICEORG
111 #ifndef MOZILLA_CLIENT
112 if (utf8) free_utf_tbl();
113 #endif
114 #endif
115
116 if (enc) free(enc);
117 if (lang) free(lang);
116 118
117 if (ignorechars) free(ignorechars); 119 if (ignorechars) free(ignorechars);
118 if (ignorechars_utf16) free(ignorechars_utf16); 120 if (ignorechars_utf16) free(ignorechars_utf16);
119 121
120 #ifdef HUNSPELL_CHROME_CLIENT 122 #ifdef HUNSPELL_CHROME_CLIENT
121 EmptyHentryCache(); 123 EmptyHentryCache();
122 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); 124 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
123 it != pointer_to_strings_.end(); ++it) { 125 it != pointer_to_strings_.end(); ++it) {
124 delete *it; 126 delete *it;
125 } 127 }
(...skipping 11 matching lines...) Expand all
137 hentry* next = cur->next_homonym; 139 hentry* next = cur->next_homonym;
138 delete cur; 140 delete cur;
139 cur = next; 141 cur = next;
140 } 142 }
141 } 143 }
142 hentry_cache.clear(); 144 hentry_cache.clear();
143 } 145 }
144 #endif 146 #endif
145 147
146 // lookup a root word in the hashtable 148 // lookup a root word in the hashtable
147
148 struct hentry * HashMgr::lookup(const char *word) const 149 struct hentry * HashMgr::lookup(const char *word) const
149 { 150 {
150 #ifdef HUNSPELL_CHROME_CLIENT 151 #ifdef HUNSPELL_CHROME_CLIENT
151 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; 152 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
152 int affix_count = bdict_reader->FindWord(word, affix_ids); 153 int affix_count = bdict_reader->FindWord(word, affix_ids);
153 if (affix_count == 0) { // look for custom added word 154 if (affix_count == 0) { // look for custom added word
154 std::map<StringPiece, int>::const_iterator iter = 155 std::map<StringPiece, int>::const_iterator iter =
155 custom_word_to_affix_id_map_.find(word); 156 custom_word_to_affix_id_map_.find(word);
156 if (iter != custom_word_to_affix_id_map_.end()) { 157 if (iter != custom_word_to_affix_id_map_.end()) {
157 affix_count = 1; 158 affix_count = 1;
158 affix_ids[0] = iter->second; 159 affix_ids[0] = iter->second;
159 } 160 }
160 } 161 }
161 162
162 static const int kMaxWordLen = 128; 163 static const int kMaxWordLen = 128;
163 static char word_buf[kMaxWordLen]; 164 static char word_buf[kMaxWordLen];
164 strncpy(word_buf, word, kMaxWordLen); 165 strncpy(word_buf, word, kMaxWordLen);
165 166
166 return AffixIDsToHentry(word_buf, affix_ids, affix_count); 167 return AffixIDsToHentry(word_buf, affix_ids, affix_count);
167 #else 168 #else
168 struct hentry * dp; 169 struct hentry * dp;
169 if (tableptr) { 170 if (tableptr) {
170 dp = &tableptr[hash(word)]; 171 dp = tableptr[hash(word)];
171 if (dp->word == NULL) return NULL; 172 if (!dp) return NULL;
172 for ( ; dp != NULL; dp = dp->next) { 173 for ( ; dp != NULL; dp = dp->next) {
173 if (strcmp(word,dp->word) == 0) return dp; 174 if (strcmp(word,&(dp->word)) == 0) return dp;
174 } 175 }
175 } 176 }
176 return NULL; 177 return NULL;
177 #endif 178 #endif
178 } 179 }
179 180
180 // add a word to the hash table (private) 181 // add a word to the hash table (private)
181 182 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
182 int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, c onst char * desc) 183 int al, const char * desc, bool onlyupcase)
183 { 184 {
184 #ifndef HUNSPELL_CHROME_CLIENT 185 #ifndef HUNSPELL_CHROME_CLIENT
185 char * st = mystrdup(word); 186 bool upcasehomonym = false;
186 if (wl && !st) return 1; 187 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
188 // variable-length hash record with word and optional fields
189 struct hentry* hp =
190 » (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
191 if (!hp) return 1;
192 char * hpw = &(hp->word);
193 strcpy(hpw, word);
187 if (ignorechars != NULL) { 194 if (ignorechars != NULL) {
188 if (utf8) { 195 if (utf8) {
189 remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len); 196 remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
190 } else { 197 } else {
191 remove_ignored_chars(st, ignorechars); 198 remove_ignored_chars(hpw, ignorechars);
192 } 199 }
193 } 200 }
194 if (complexprefixes) { 201 if (complexprefixes) {
195 if (utf8) reverseword_utf(st); else reverseword(st); 202 if (utf8) reverseword_utf(hpw); else reverseword(hpw);
196 } 203 }
197 int i = hash(st); 204
198 struct hentry * dp = &tableptr[i]; 205 int i = hash(hpw);
199 if (dp->word == NULL) { 206
200 dp->wlen = (short) wl; 207 hp->blen = (unsigned char) wbl;
201 dp->alen = (short) al; 208 hp->clen = (unsigned char) wcl;
202 dp->word = st; 209 hp->alen = (short) al;
203 dp->astr = aff; 210 hp->astr = aff;
204 dp->next = NULL; 211 hp->next = NULL;
205 dp->next_homonym = NULL; 212 hp->next_homonym = NULL;
206 #ifdef HUNSPELL_EXPERIMENTAL 213
207 if (aliasm) { 214 // store the description string or its pointer
208 dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); 215 if (desc) {
209 } else { 216 hp->var = H_OPT;
210 dp->description = mystrdup(desc); 217 if (aliasm) {
211 if (desc && !dp->description) return 1; 218 hp->var += H_OPT_ALIASM;
212 if (dp->description && complexprefixes) { 219 store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
213 if (utf8) reverseword_utf(dp->description); else reverseword(dp- >description); 220 } else {
221 » strcpy(hpw + wbl + 1, desc);
222 if (complexprefixes) {
223 if (utf8) reverseword_utf(HENTRY_DATA(hp));
224 else reverseword(HENTRY_DATA(hp));
225 }
226 }
227 » if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
228 } else hp->var = 0;
229
230 struct hentry * dp = tableptr[i];
231 if (!dp) {
232 tableptr[i] = hp;
233 return 0;
234 }
235 while (dp->next != NULL) {
236 if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) {
237 » // remove hidden onlyupcase homonym
238 if (!onlyupcase) {
239 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
240 » » free(dp->astr);
241 » » dp->astr = hp->astr;
242 » » dp->alen = hp->alen;
243 » » free(hp);
244 » » return 0;
245 » » } else {
246 » » dp->next_homonym = hp;
247 » » }
248 } else {
249 » upcasehomonym = true;
250 }
251 }
252 dp=dp->next;
253 }
254 if (strcmp(&(hp->word), &(dp->word)) == 0) {
255 » // remove hidden onlyupcase homonym
256 if (!onlyupcase) {
257 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
258 » » free(dp->astr);
259 » » dp->astr = hp->astr;
260 » » dp->alen = hp->alen;
261 » » free(hp);
262 » » return 0;
263 » » } else {
264 » » dp->next_homonym = hp;
265 » » }
266 } else {
267 » upcasehomonym = true;
214 } 268 }
215 } 269 }
216 #endif 270 if (!upcasehomonym) {
217 } else { 271 » dp->next = hp;
218 struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
219 if (!hp) return 1;
220 hp->wlen = (short) wl;
221 hp->alen = (short) al;
222 hp->word = st;
223 hp->astr = aff;
224 hp->next = NULL;
225 hp->next_homonym = NULL;
226 #ifdef HUNSPELL_EXPERIMENTAL
227 if (aliasm) {
228 hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
229 } else { 272 } else {
230 hp->description = mystrdup(desc); 273 » // remove hidden onlyupcase homonym
231 if (desc && !hp->description) return 1; 274 » if (hp->astr) free(hp->astr);
232 if (dp->description && complexprefixes) { 275 » free(hp);
233 if (utf8) reverseword_utf(hp->description); else reverseword(hp- >description);
234 }
235 } 276 }
236 #endif
237 while (dp->next != NULL) {
238 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_ homonym = hp;
239 dp=dp->next;
240 }
241 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_ho monym = hp;
242 dp->next = hp;
243 }
244 #endif // HUNSPELL_CHROME_CLIENT 277 #endif // HUNSPELL_CHROME_CLIENT
245 std::map<StringPiece, int>::iterator iter = 278 std::map<StringPiece, int>::iterator iter =
246 custom_word_to_affix_id_map_.find(word); 279 custom_word_to_affix_id_map_.find(word);
247 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added 280 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
248 std::string* new_string_word = new std::string(word); 281 std::string* new_string_word = new std::string(word);
249 pointer_to_strings_.push_back(new_string_word); 282 pointer_to_strings_.push_back(new_string_word);
250 StringPiece sp(*(new_string_word)); 283 StringPiece sp(*(new_string_word));
251 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words 284 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
252 return 1; 285 return 1;
253 } 286 }
254 287
255 return 0; 288 return 0;
256 } 289 }
257 290
258 // add a custom dic. word to the hash table (public) 291 int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
259 int HashMgr::put_word(const char * word, int wl, char * aff) 292 unsigned short * flags, int al, char * dp, int captype)
260 { 293 {
261 unsigned short * flags; 294 // add inner capitalized forms to handle the following allcap forms:
262 int al = 0; 295 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
263 if (aff) { 296 // Allcaps with suffixes: CIA's -> CIA'S
264 al = decode_flags(&flags, aff); 297 if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
265 flag_qsort(flags, 0, al); 298 ((captype == ALLCAP) && (flags != NULL))) &&
266 } else { 299 !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
267 flags = NULL; 300 unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned s hort) * (al+1));
301 » if (!flags2) return 1;
302 if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
303 flags2[al] = ONLYUPCASEFLAG;
304 if (utf8) {
305 char st[BUFSIZE];
306 w_char w[BUFSIZE];
307 int wlen = u8_u16(w, BUFSIZE, word);
308 mkallsmall_utf(w, wlen, langnum);
309 mkallcap_utf(w, 1, langnum);
310 u16_u8(st, BUFSIZE, w, wlen);
311 return add_word(st,wbl,wcl,flags2,al+1,dp, true);
312 } else {
313 mkallsmall(word, csconv);
314 mkinitcap(word, csconv);
315 return add_word(word,wbl,wcl,flags2,al+1,dp, true);
316 }
268 } 317 }
269 add_word(word, wl, flags, al, NULL);
270 return 0; 318 return 0;
271 } 319 }
272 320
273 int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern) 321 // detect captype and modify word length for UTF-8 encoding
322 int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
323 int len;
324 if (utf8) {
325 w_char dest_utf[BUFSIZE];
326 len = u8_u16(dest_utf, BUFSIZE, word);
327 *captype = get_captype_utf8(dest_utf, len, langnum);
328 } else {
329 len = wbl;
330 *captype = get_captype((char *) word, len, csconv);
331 }
332 return len;
333 }
334
335 // remove word (personal dictionary function for standalone applications)
336 int HashMgr::remove(const char * word)
274 { 337 {
275 unsigned short * flags; 338 struct hentry * dp = lookup(word);
276 struct hentry * dp = lookup(pattern); 339 while (dp) {
277 if (!dp || !dp->astr) return 1; 340 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
278 flags = (unsigned short *) malloc (dp->alen * sizeof(short)); 341 unsigned short * flags =
279 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); 342 (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1));
280 add_word(word, wl, flags, dp->alen, NULL); 343 if (!flags) return 1;
344 for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
345 flags[dp->alen] = forbiddenword;
346 dp->astr = flags;
347 dp->alen++;
348 flag_qsort(flags, 0, dp->alen);
349 }
350 dp = dp->next_homonym;
351 }
281 return 0; 352 return 0;
282 } 353 }
283 354
355 /* remove forbidden flag to add a personal word to the hash */
356 int HashMgr::remove_forbidden_flag(const char * word) {
357 struct hentry * dp = lookup(word);
358 if (!dp) return 1;
359 while (dp) {
360 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
361 if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
362 else {
363 unsigned short * flags2 =
364 (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1));
365 if (!flags2) return 1;
366 int i, j = 0;
367 for (i = 0; i < dp->alen; i++) {
368 if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
369 }
370 dp->alen--;
371 dp->astr = flags2; // XXX allowed forbidden words
372 }
373 }
374 dp = dp->next_homonym;
375 }
376 return 0;
377 }
378
379 // add a custom dic. word to the hash table (public)
380 int HashMgr::add(const char * word)
381 {
382 unsigned short * flags = NULL;
383 int al = 0;
384 if (remove_forbidden_flag(word)) {
385 int captype;
386 int wbl = strlen(word);
387 int wcl = get_clen_and_captype(word, wbl, &captype);
388 add_word(word, wbl, wcl, flags, al, NULL, false);
389 return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, N ULL, captype);
390 }
391 return 0;
392 }
393
394 int HashMgr::add_with_affix(const char * word, const char * example)
395 {
396 // detect captype and modify word length for UTF-8 encoding
397 struct hentry * dp = lookup(example);
398 remove_forbidden_flag(word);
399 if (dp && dp->astr) {
400 int captype;
401 int wbl = strlen(word);
402 int wcl = get_clen_and_captype(word, wbl, &captype);
403 if (aliasf) {
404 add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
405 } else {
406 unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeo f(short));
407 if (flags) {
408 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(shor t));
409 add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
410 } else return 1;
411 }
412 return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp ->alen, NULL, captype);
413 }
414 return 1;
415 }
416
284 // walk the hash table entry by entry - null at end 417 // walk the hash table entry by entry - null at end
418 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
285 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const 419 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
286 { 420 {
287 #ifdef HUNSPELL_CHROME_CLIENT 421 #ifdef HUNSPELL_CHROME_CLIENT
288 // This function creates a new hentry if NULL is passed as hp. It also takes 422 // This function creates a new hentry if NULL is passed as hp. It also takes
289 // the responsibility of deleting the pointer hp when walk is over. 423 // the responsibility of deleting the pointer hp when walk is over.
290 424
291 // This function is only ever called by one place and not nested. We can 425 // This function is only ever called by one place and not nested. We can
292 // therefore keep static state between calls and use |col| as a "reset" flag 426 // therefore keep static state between calls and use |col| as a "reset" flag
293 // to avoid changing the API. It is set to -1 for the first call. 427 // to avoid changing the API. It is set to -1 for the first call.
294 static hunspell::WordIterator word_iterator = 428 static hunspell::WordIterator word_iterator =
(...skipping 10 matching lines...) Expand all
305 if (affix_count == 0) { 439 if (affix_count == 0) {
306 delete hp; 440 delete hp;
307 return NULL; 441 return NULL;
308 } 442 }
309 short word_len = static_cast<short>(strlen(word)); 443 short word_len = static_cast<short>(strlen(word));
310 444
311 // For now, just re-compute the |hp| and return it. No need to create linked 445 // For now, just re-compute the |hp| and return it. No need to create linked
312 // lists for the extra affixes. If hp is NULL, create it here. 446 // lists for the extra affixes. If hp is NULL, create it here.
313 if (!hp) 447 if (!hp)
314 hp = new hentry; 448 hp = new hentry;
315 hp->word = word; 449 hp->word = *word;
316 hp->wlen = word_len; 450 hp->blen = word_len;
317 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0], 451 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0],
318 &hp->astr); 452 &hp->astr);
319 hp->next = NULL; 453 hp->next = NULL;
320 hp->next_homonym = NULL; 454 hp->next_homonym = NULL;
321 455 hp->var = 0;
456 hp->clen = 0;
322 return hp; 457 return hp;
323 #else 458 #else
324 //reset to start 459 »
325 if ((col < 0) || (hp == NULL)) { 460 if (hp && hp->next != NULL) return hp->next;
326 col = -1; 461 for (col++; col < tablesize; col++) {
327 hp = NULL; 462 if (tableptr[col]) return tableptr[col];
328 } 463 }
329 464 // null at end and reset to start
330 if (hp && hp->next != NULL) { 465 col = -1;
331 hp = hp->next; 466 return NULL;
332 } else {
333 col++;
334 hp = (col < tablesize) ? &tableptr[col] : NULL;
335 // search for next non-blank column entry
336 while (hp && (hp->word == NULL)) {
337 col ++;
338 hp = (col < tablesize) ? &tableptr[col] : NULL;
339 }
340 if (col < tablesize) return hp;
341 hp = NULL;
342 col = -1;
343 }
344 return hp;
345 #endif 467 #endif
346 } 468 }
347 469
348 // load a munched word list and build a hash table on the fly 470 // load a munched word list and build a hash table on the fly
349 int HashMgr::load_tables(FILE* t_handle) 471 int HashMgr::load_tables(FILE* t_handle, const char * key)
350 { 472 {
351 #ifndef HUNSPELL_CHROME_CLIENT 473 #ifndef HUNSPELL_CHROME_CLIENT
352 int wl, al; 474 int al;
353 char * ap; 475 char * ap;
354 char * dp; 476 char * dp;
477 char * dp2;
355 unsigned short * flags; 478 unsigned short * flags;
479 char * ts;
356 480
357 // raw dictionary - munched file 481 // open dictionary file
358 FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); 482 FileMgr * dict = new FileMgr(tpath, key);
359 if (rawdict == NULL) return 1; 483 if (dict == NULL) return 1;
360 fseek(rawdict, 0, SEEK_SET);
361 484
362 // first read the first line of file to get hash table size */ 485 // first read the first line of file to get hash table size */
363 char ts[MAXDELEN]; 486 if (!(ts = dict->getline())) {
364 if (! fgets(ts, MAXDELEN-1,rawdict)) return 2; 487 HUNSPELL_WARNING(stderr, "error: empty dic file\n");
488 delete dict;
489 return 2;
490 }
365 mychomp(ts); 491 mychomp(ts);
366 492
367 /* remove byte order mark */ 493 /* remove byte order mark */
368 if (strncmp(ts,"\xef\xbb\xbf",3) == 0) { 494 if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
369 memmove(ts, ts+3, strlen(ts+3)+1); 495 memmove(ts, ts+3, strlen(ts+3)+1);
370 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos sible incompatibility with old Hunspell versions\n"); 496 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos sible incompatibility with old Hunspell versions\n");
371 } 497 }
372 498
373 if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");
374 tablesize = atoi(ts); 499 tablesize = atoi(ts);
375 if (!tablesize) return 4; 500 if (tablesize == 0) {
501 HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the di c file\n");
502 delete dict;
503 return 4;
504 }
376 tablesize = tablesize + 5 + USERWORD; 505 tablesize = tablesize + 5 + USERWORD;
377 if ((tablesize %2) == 0) tablesize++; 506 if ((tablesize %2) == 0) tablesize++;
378 507
379 // allocate the hash table 508 // allocate the hash table
380 tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry)); 509 tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
381 if (! tableptr) return 3; 510 if (! tableptr) {
382 for (int i=0; i<tablesize; i++) tableptr[i].word = NULL; 511 delete dict;
512 return 3;
513 }
514 for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
383 515
384 // loop through all words on much list and add to hash 516 // loop through all words on much list and add to hash
385 // table and create word and affix strings 517 // table and create word and affix strings
386 518
387 while (fgets(ts,MAXDELEN-1,rawdict)) { 519 while ((ts = dict->getline())) {
388 mychomp(ts); 520 mychomp(ts);
389 // split each line into word and morphological description 521 // split each line into word and morphological description
390 dp = strchr(ts,'\t'); 522 dp = ts;
523 while ((dp = strchr(dp, ':'))) {
524 » if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
525 » for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
526 » if (dp < ts) { // missing word
527 » » dp = NULL;
528 » } else {
529 » » *(dp + 1) = '\0';
530 » » dp = dp + 2;
531 » }
532 » break;
533 » }
534 » dp++;
535 }
391 536
392 if (dp) { 537 // tabulator is the old morphological field separator
393 *dp = '\0'; 538 dp2 = strchr(ts, '\t');
394 dp++; 539 if (dp2 && (!dp || dp2 < dp)) {
395 } else { 540 » *dp2 = '\0';
396 dp = NULL; 541 » dp = dp2 + 1;
397 } 542 }
398 543
399 // split each line into word and affix char strings 544 // split each line into word and affix char strings
400 // "\/" signs slash in words (not affix separator) 545 // "\/" signs slash in words (not affix separator)
401 // "/" at beginning of the line is word character (not affix separator) 546 // "/" at beginning of the line is word character (not affix separator)
402 ap = strchr(ts,'/'); 547 ap = strchr(ts,'/');
403 while (ap) { 548 while (ap) {
404 if (ap == ts) { 549 if (ap == ts) {
405 ap++; 550 ap++;
406 continue; 551 continue;
407 } else if (*(ap - 1) != '\\') break; 552 } else if (*(ap - 1) != '\\') break;
408 // replace "\/" with "/" 553 // replace "\/" with "/"
409 for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); 554 for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
410 ap = strchr(ap,'/'); 555 ap = strchr(ap,'/');
411 } 556 }
412 557
413 if (ap) { 558 if (ap) {
414 *ap = '\0'; 559 *ap = '\0';
415 if (aliasf) { 560 if (aliasf) {
416 int index = atoi(ap + 1); 561 int index = atoi(ap + 1);
417 al = get_aliasf(index, &flags); 562 al = get_aliasf(index, &flags, dict);
418 if (!al) { 563 if (!al) {
419 HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts); 564 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
420 *ap = '\0'; 565 *ap = '\0';
421 } 566 }
422 } else { 567 } else {
423 al = decode_flags(&flags, ap + 1); 568 al = decode_flags(&flags, ap + 1, dict);
424 flag_qsort(flags, 0, al); 569 flag_qsort(flags, 0, al);
425 } 570 }
426 } else { 571 } else {
427 al = 0; 572 al = 0;
428 ap = NULL; 573 ap = NULL;
429 flags = NULL; 574 flags = NULL;
430 } 575 }
431 576
432 wl = strlen(ts); 577 int captype;
578 int wbl = strlen(ts);
579 int wcl = get_clen_and_captype(ts, wbl, &captype);
580 // add the word and its index plus its capitalized form optionally
581 if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
582 » add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
583 » delete dict;
584 » return 5;
585 }
586 }
433 587
434 // add the word and its index 588 delete dict;
435 if (add_word(ts,wl,flags,al,dp)) return 5;
436
437 }
438
439 fclose(rawdict);
440 #endif 589 #endif
441 return 0; 590 return 0;
442 } 591 }
443 592
444
445 // the hash function is a simple load and rotate 593 // the hash function is a simple load and rotate
446 // algorithm borrowed 594 // algorithm borrowed
447 595
448 int HashMgr::hash(const char * word) const 596 int HashMgr::hash(const char * word) const
449 { 597 {
450 #ifdef HUNSPELL_CHROME_CLIENT 598 #ifdef HUNSPELL_CHROME_CLIENT
451 return 0; 599 return 0;
452 #else 600 #else
453 long hv = 0; 601 long hv = 0;
454 for (int i=0; i < 4 && *word != 0; i++) 602 for (int i=0; i < 4 && *word != 0; i++)
455 hv = (hv << 8) | (*word++); 603 hv = (hv << 8) | (*word++);
456 while (*word != 0) { 604 while (*word != 0) {
457 ROTATE(hv,ROTATE_LEN); 605 ROTATE(hv,ROTATE_LEN);
458 hv ^= (*word++); 606 hv ^= (*word++);
459 } 607 }
460 return (unsigned long) hv % tablesize; 608 return (unsigned long) hv % tablesize;
461 #endif 609 #endif
462 } 610 }
463 611
464 int HashMgr::decode_flags(unsigned short ** result, char * flags) { 612 int HashMgr::decode_flags(unsigned short ** result, char * flags) {
465 int len; 613 int len;
466 switch (flag_mode) { 614 switch (flag_mode) {
467 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) 615 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
468 len = strlen(flags); 616 len = strlen(flags);
469 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG fla gvector is odd: %s\n", flags); 617 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: bad flagvector\n");
470 len = len/2; 618 len /= 2;
471 *result = (unsigned short *) malloc(len * sizeof(short)); 619 *result = (unsigned short *) malloc(len * sizeof(short));
620 if (!*result) return -1;
472 for (int i = 0; i < len; i++) { 621 for (int i = 0; i < len; i++) {
473 (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned sh ort) flags[i * 2 + 1]; 622 (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned sh ort) flags[i * 2 + 1];
474 } 623 }
475 break; 624 break;
476 } 625 }
477 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452 1 23 233) 626 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452 1 23 233)
627 int i;
478 len = 1; 628 len = 1;
479 char * src = flags; 629 char * src = flags;
480 unsigned short * dest; 630 unsigned short * dest;
481 char * p; 631 char * p;
482 for (p = flags; *p; p++) { 632 for (p = flags; *p; p++) {
483 if (*p == ',') len++; 633 if (*p == ',') len++;
484 } 634 }
485 *result = (unsigned short *) malloc(len * sizeof(short)); 635 *result = (unsigned short *) malloc(len * sizeof(short));
636 if (!*result) return -1;
486 dest = *result; 637 dest = *result;
487 for (p = flags; *p; p++) { 638 for (p = flags; *p; p++) {
488 if (*p == ',') { 639 if (*p == ',') {
489 *dest = (unsigned short) atoi(src); 640 i = atoi(src);
641 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d i s too large (max: %d)\n", i, DEFAULTFLAGS - 1);
642 *dest = (unsigned short) i;
490 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\ n"); 643 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\ n");
491 src = p + 1; 644 src = p + 1;
492 dest++; 645 dest++;
493 } 646 }
494 } 647 }
495 *dest = (unsigned short) atoi(src); 648 i = atoi(src);
649 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to o large (max: %d)\n", i, DEFAULTFLAGS - 1);
650 *dest = (unsigned short) i;
496 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); 651 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
497 break; 652 break;
498 } 653 }
499 case FLAG_UNI: { // UTF-8 characters 654 case FLAG_UNI: { // UTF-8 characters
500 w_char w[MAXDELEN/2]; 655 w_char w[BUFSIZE/2];
501 len = u8_u16(w, MAXDELEN/2, flags); 656 len = u8_u16(w, BUFSIZE/2, flags);
502 *result = (unsigned short *) malloc(len * sizeof(short)); 657 *result = (unsigned short *) malloc(len * sizeof(short));
658 if (!*result) return -1;
503 memcpy(*result, w, len * sizeof(short)); 659 memcpy(*result, w, len * sizeof(short));
504 break; 660 break;
505 } 661 }
506 default: { // Ispell's one-character flags (erfg -> e r f g) 662 default: { // Ispell's one-character flags (erfg -> e r f g)
507 unsigned short * dest; 663 unsigned short * dest;
508 len = strlen(flags); 664 len = strlen(flags);
509 *result = (unsigned short *) malloc(len * sizeof(short)); 665 *result = (unsigned short *) malloc(len * sizeof(short));
666 if (!*result) return -1;
510 dest = *result; 667 dest = *result;
511 for (unsigned char * p = (unsigned char *) flags; *p; p++) { 668 for (unsigned char * p = (unsigned char *) flags; *p; p++) {
512 *dest = (unsigned short) *p; 669 *dest = (unsigned short) *p;
513 dest++; 670 dest++;
514 } 671 }
515 } 672 }
516 } 673 }
517 return len; 674 return len;
518 } 675 }
519 676
520 unsigned short HashMgr::decode_flag(const char * f) { 677 unsigned short HashMgr::decode_flag(const char * f) {
521 unsigned short s = 0; 678 unsigned short s = 0;
679 int i;
522 switch (flag_mode) { 680 switch (flag_mode) {
523 case FLAG_LONG: 681 case FLAG_LONG:
524 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; 682 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
525 break; 683 break;
526 case FLAG_NUM: 684 case FLAG_NUM:
527 s = (unsigned short) atoi(f); 685 i = atoi(f);
686 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to o large (max: %d)\n", i, DEFAULTFLAGS - 1);
687 s = (unsigned short) i;
528 break; 688 break;
529 case FLAG_UNI: 689 case FLAG_UNI:
530 u8_u16((w_char *) &s, 1, f); 690 u8_u16((w_char *) &s, 1, f);
531 break; 691 break;
532 default: 692 default:
533 s = (unsigned short) *((unsigned char *)f); 693 s = (unsigned short) *((unsigned char *)f);
534 } 694 }
535 if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); 695 if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
536 return s; 696 return s;
537 } 697 }
538 698
539 char * HashMgr::encode_flag(unsigned short f) { 699 char * HashMgr::encode_flag(unsigned short f) {
540 unsigned char ch[10]; 700 unsigned char ch[10];
541 if (f==0) return mystrdup("(NULL)"); 701 if (f==0) return mystrdup("(NULL)");
542 if (flag_mode == FLAG_LONG) { 702 if (flag_mode == FLAG_LONG) {
543 ch[0] = (unsigned char) (f >> 8); 703 ch[0] = (unsigned char) (f >> 8);
544 ch[1] = (unsigned char) (f - ((f >> 8) << 8)); 704 ch[1] = (unsigned char) (f - ((f >> 8) << 8));
545 ch[2] = '\0'; 705 ch[2] = '\0';
(...skipping 16 matching lines...) Expand all
562 // Read in the regular commands from the affix file. We care about the FLAG 722 // Read in the regular commands from the affix file. We care about the FLAG
563 // line becuase the AF lines depend on this value, and the IGNORE line. 723 // line becuase the AF lines depend on this value, and the IGNORE line.
564 // The rest of the commands will be read by the affix manager. 724 // The rest of the commands will be read by the affix manager.
565 char line[MAXDELEN+1]; 725 char line[MAXDELEN+1];
566 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); 726 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
567 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { 727 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {
568 // Parse in the ignored characters (for example, Arabic optional 728 // Parse in the ignored characters (for example, Arabic optional
569 // diacritics characters. 729 // diacritics characters.
570 if (strncmp(line,"IGNORE",6) == 0) { 730 if (strncmp(line,"IGNORE",6) == 0) {
571 parse_array(line, &ignorechars, &ignorechars_utf16, 731 parse_array(line, &ignorechars, &ignorechars_utf16,
572 &ignorechars_utf16_len, "IGNORE", utf8); 732 &ignorechars_utf16_len, utf8, 0);
573 } 733 }
574 // Retrieve the format of an AF line. 734 // Retrieve the format of an AF line.
575 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { 735 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
576 if (strstr(line, "long")) flag_mode = FLAG_LONG; 736 if (strstr(line, "long")) flag_mode = FLAG_LONG;
577 if (strstr(line, "num")) flag_mode = FLAG_NUM; 737 if (strstr(line, "num")) flag_mode = FLAG_NUM;
578 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; 738 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
579 } 739 }
580 } 740 }
581 741
582 // Read in all the AF lines which tell us the rules for each affix group ID. 742 // Read in all the AF lines which tell us the rules for each affix group ID.
583 iterator = bdict_reader->GetAfLineIterator(); 743 iterator = bdict_reader->GetAfLineIterator();
584 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { 744 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {
585 int rv = parse_aliasf(line, &iterator); 745 int rv = parse_aliasf(line, &iterator);
586 if (rv) 746 if (rv)
587 return rv; 747 return rv;
588 } 748 }
589 749
590 return 0; 750 return 0;
591 } 751 }
592 #else 752 #else
593 // read in aff file and set flag mode 753 // read in aff file and set flag mode
594 int HashMgr::load_config(FILE* aff_handle) 754 int HashMgr::load_config(FILE* aff_handle, const char * key)
595 { 755 {
756 char * line; // io buffers
596 int firstline = 1; 757 int firstline = 1;
597
598 // io buffers
599 char line[MAXDELEN+1];
600 758
601 // open the affix file 759 // open the affix file
602 FILE * afflst; 760 FileMgr * afflst = new FileMgr(affpath, key);
603 afflst = _fdopen(_dup(_fileno(aff_handle)), "r");
604 if (!afflst) { 761 if (!afflst) {
605 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); 762 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");
606 return 1; 763 return 1;
607 } 764 }
608 fseek(afflst, 0, SEEK_SET);
609 765
610 // read in each line ignoring any that do not 766 // read in each line ignoring any that do not
611 // start with a known line type indicator 767 // start with a known line type indicator
612 768
613 while (fgets(line,MAXDELEN,afflst)) { 769 while ((line = afflst->getline())) {
614 mychomp(line); 770 mychomp(line);
615 771
616 /* remove byte order mark */ 772 /* remove byte order mark */
617 if (firstline) { 773 if (firstline) {
618 firstline = 0; 774 firstline = 0;
619 if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(l ine+3)+1); 775 if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(l ine+3)+1);
620 } 776 }
621 777
622 /* parse in the try string */ 778 /* parse in the try string */
623 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { 779 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
624 if (flag_mode != FLAG_CHAR) { 780 if (flag_mode != FLAG_CHAR) {
625 HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n"); 781 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions o f the FLAG affix file parameter\n", afflst->getlinenum());
626 } 782 }
627 if (strstr(line, "long")) flag_mode = FLAG_LONG; 783 if (strstr(line, "long")) flag_mode = FLAG_LONG;
628 if (strstr(line, "num")) flag_mode = FLAG_NUM; 784 if (strstr(line, "num")) flag_mode = FLAG_NUM;
629 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; 785 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
630 if (flag_mode == FLAG_CHAR) { 786 if (flag_mode == FLAG_CHAR) {
631 HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF -8' parameter: %s\n", line); 787 HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `lon g' or `UTF-8' parameter\n", afflst->getlinenum());
632 } 788 }
633 } 789 }
634 if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UT F-8")) utf8 = 1; 790 if (strncmp(line,"FORBIDDENWORD",13) == 0) {
791 char * st = NULL;
792 if (parse_string(line, &st, afflst->getlinenum())) {
793 delete afflst;
794 return 1;
795 }
796 forbiddenword = decode_flag(st);
797 free(st);
798 }
799 if (strncmp(line, "SET", 3) == 0) {
800 » if (parse_string(line, &enc, afflst->getlinenum())) {
801 delete afflst;
802 return 1;
803 } »
804 » if (strcmp(enc, "UTF-8") == 0) {
805 » utf8 = 1;
806 #ifndef OPENOFFICEORG
807 #ifndef MOZILLA_CLIENT
808 » initialize_utf_tbl();
809 #endif
810 #endif
811 » } else csconv = get_current_cs(enc);
812 » }
813 if (strncmp(line, "LANG", 4) == 0) {
814 » if (parse_string(line, &lang, afflst->getlinenum())) {
815 delete afflst;
816 return 1;
817 } »
818 » langnum = get_lang_num(lang);
819 » }
635 820
636 /* parse in the ignored characters (for example, Arabic optional diacriti cs characters */ 821 /* parse in the ignored characters (for example, Arabic optional diacriti cs characters */
637 if (strncmp(line,"IGNORE",6) == 0) { 822 if (strncmp(line,"IGNORE",6) == 0) {
638 if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_u tf16_len, "IGNORE", utf8)) { 823 if (parse_array(line, &ignorechars, &ignorechars_utf16,
639 fclose(afflst); 824 &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
825 delete afflst;
640 return 1; 826 return 1;
641 } 827 }
642 } 828 }
643 829
644 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { 830 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
645 if (parse_aliasf(line, afflst)) { 831 if (parse_aliasf(line, afflst)) {
646 fclose(afflst); 832 delete afflst;
647 return 1; 833 return 1;
648 } 834 }
649 } 835 }
650 836
651 #ifdef HUNSPELL_EXPERIMENTAL
652 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { 837 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
653 if (parse_aliasm(line, afflst)) { 838 if (parse_aliasm(line, afflst)) {
654 fclose(afflst); 839 delete afflst;
655 return 1; 840 return 1;
656 } 841 }
657 } 842 }
658 #endif 843
659 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; 844 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
660 if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && is space(line[3])) break; 845 if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && iss pace(line[3])) break;
661 } 846 }
662 fclose(afflst); 847 if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
848 delete afflst;
663 return 0; 849 return 0;
664 } 850 }
665 #endif // HUNSPELL_CHROME_CLIENT 851 #endif // HUNSPELL_CHROME_CLIENT
666 852
667 /* parse in the ALIAS table */ 853 /* parse in the ALIAS table */
668 #ifdef HUNSPELL_CHROME_CLIENT 854 #ifdef HUNSPELL_CHROME_CLIENT
669 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator) 855 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator)
670 { 856 {
671 #else 857 #else
672 int HashMgr::parse_aliasf(char * line, FILE * af) 858 int HashMgr::parse_aliasf(char * line, FileMgr * af)
673 { 859 {
674 #endif 860 #endif
675 if (numaliasf != 0) { 861 if (numaliasf != 0) {
676 HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tabl es used\n"); 862 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");
677 return 1; 863 return 1;
678 } 864 }
679 char * tp = line; 865 char * tp = line;
680 char * piece; 866 char * piece;
681 int i = 0; 867 int i = 0;
682 int np = 0; 868 int np = 0;
683 piece = mystrsep(&tp, 0); 869 piece = mystrsep(&tp, 0);
684 while (piece) { 870 while (piece) {
685 if (*piece != '\0') { 871 if (*piece != '\0') {
686 switch(i) { 872 switch(i) {
687 case 0: { np++; break; } 873 case 0: { np++; break; }
688 case 1: { 874 case 1: {
689 numaliasf = atoi(piece); 875 numaliasf = atoi(piece);
690 if (numaliasf < 1) { 876 if (numaliasf < 1) {
691 numaliasf = 0; 877 numaliasf = 0;
692 aliasf = NULL; 878 aliasf = NULL;
693 aliasflen = NULL; 879 aliasflen = NULL;
694 HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n"); 880 HUNSPELL_WARNING(stderr, "error: bad entry number\n");
695 free(piece);
696 return 1; 881 return 1;
697 } 882 }
698 aliasf = (unsigned short **) malloc(numaliasf * sizeof(un signed short *)); 883 aliasf = (unsigned short **) malloc(numaliasf * sizeof(un signed short *));
699 aliasflen = (unsigned short *) malloc(numaliasf * sizeof( short)); 884 aliasflen = (unsigned short *) malloc(numaliasf * sizeof( short));
700 if (!aliasf || !aliasflen) { 885 if (!aliasf || !aliasflen) {
701 numaliasf = 0; 886 numaliasf = 0;
702 if (aliasf) free(aliasf); 887 if (aliasf) free(aliasf);
703 if (aliasflen) free(aliasflen); 888 if (aliasflen) free(aliasflen);
704 aliasf = NULL; 889 aliasf = NULL;
705 aliasflen = NULL; 890 aliasflen = NULL;
706 return 1; 891 return 1;
707 } 892 }
708 np++; 893 np++;
709 break; 894 break;
710 } 895 }
711 default: break; 896 default: break;
712 } 897 }
713 i++; 898 i++;
714 } 899 }
715 free(piece);
716 piece = mystrsep(&tp, 0); 900 piece = mystrsep(&tp, 0);
717 } 901 }
718 if (np != 2) { 902 if (np != 2) {
719 numaliasf = 0; 903 numaliasf = 0;
720 free(aliasf); 904 free(aliasf);
721 free(aliasflen); 905 free(aliasflen);
722 aliasf = NULL; 906 aliasf = NULL;
723 aliasflen = NULL; 907 aliasflen = NULL;
724 HUNSPELL_WARNING(stderr, "error: missing AF table information\n"); 908 HUNSPELL_WARNING(stderr, "error: missing data\n");
725 return 1; 909 return 1;
726 } 910 }
727 911
728 /* now parse the numaliasf lines to read in the remainder of the table */ 912 /* now parse the numaliasf lines to read in the remainder of the table */
729 char * nl = line; 913 char * nl = line;
730 for (int j=0; j < numaliasf; j++) { 914 for (int j=0; j < numaliasf; j++) {
731 #ifdef HUNSPELL_CHROME_CLIENT 915 #ifdef HUNSPELL_CHROME_CLIENT
732 if (!iterator->AdvanceAndCopy(nl, MAXDELEN)) 916 if (!iterator->AdvanceAndCopy(nl, MAXDELEN))
733 return 1; 917 return 1;
734 #else 918 #else
735 if (!fgets(nl,MAXDELEN,af)) return 1; 919 if (!(nl = af->getline())) return 1;
736 #endif 920 #endif
737 mychomp(nl); 921 » mychomp(nl);
738 tp = nl; 922 tp = nl;
739 i = 0; 923 i = 0;
740 aliasf[j] = NULL; 924 aliasf[j] = NULL;
741 aliasflen[j] = 0; 925 aliasflen[j] = 0;
742 piece = mystrsep(&tp, 0); 926 piece = mystrsep(&tp, 0);
743 while (piece) { 927 while (piece) {
744 if (*piece != '\0') { 928 if (*piece != '\0') {
745 switch(i) { 929 switch(i) {
746 case 0: { 930 case 0: {
747 if (strncmp(piece,"AF",2) != 0) { 931 if (strncmp(piece,"AF",2) != 0) {
748 numaliasf = 0; 932 numaliasf = 0;
749 free(aliasf); 933 free(aliasf);
750 free(aliasflen); 934 free(aliasflen);
751 aliasf = NULL; 935 aliasf = NULL;
752 aliasflen = NULL; 936 aliasflen = NULL;
753 HUNSPELL_WARNING(stderr, "error: AF table is co rrupt\n"); 937 HUNSPELL_WARNING(stderr, "error: table is corru pt\n");
754 free(piece);
755 return 1; 938 return 1;
756 } 939 }
757 break; 940 break;
758 } 941 }
759 case 1: { 942 case 1: {
760 aliasflen[j] = (unsigned short) decode_flags(&(alias f[j]), piece); 943 aliasflen[j] = (unsigned short) decode_flags(&(alias f[j]), piece);
761 flag_qsort(aliasf[j], 0, aliasflen[j]); 944 flag_qsort(aliasf[j], 0, aliasflen[j]);
762 break; 945 break;
763 } 946 }
764 default: break; 947 default: break;
765 } 948 }
766 i++; 949 i++;
767 } 950 }
768 free(piece);
769 piece = mystrsep(&tp, 0); 951 piece = mystrsep(&tp, 0);
770 } 952 }
771 if (!aliasf[j]) { 953 if (!aliasf[j]) {
772 free(aliasf); 954 free(aliasf);
773 free(aliasflen); 955 free(aliasflen);
774 aliasf = NULL; 956 aliasf = NULL;
775 aliasflen = NULL; 957 aliasflen = NULL;
776 numaliasf = 0; 958 numaliasf = 0;
777 HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); 959 HUNSPELL_WARNING(stderr, "error: table is corrupt\n");
778 return 1; 960 return 1;
779 } 961 }
780 } 962 }
781 return 0; 963 return 0;
782 } 964 }
783 965
784 #ifdef HUNSPELL_CHROME_CLIENT 966 #ifdef HUNSPELL_CHROME_CLIENT
785 hentry* HashMgr::AffixIDsToHentry(char* word, 967 hentry* HashMgr::AffixIDsToHentry(char* word,
786 int* affix_ids, 968 int* affix_ids,
787 int affix_count) const 969 int affix_count) const
(...skipping 15 matching lines...) Expand all
803 985
804 // We can get a number of prefixes per word. There will normally be only one, 986 // We can get a number of prefixes per word. There will normally be only one,
805 // but if not, there will be a linked list of "hentry"s for the "homonym"s 987 // but if not, there will be a linked list of "hentry"s for the "homonym"s
806 // for the word. 988 // for the word.
807 struct hentry* first_he = NULL; 989 struct hentry* first_he = NULL;
808 struct hentry* prev_he = NULL; // For making linked list. 990 struct hentry* prev_he = NULL; // For making linked list.
809 for (int i = 0; i < affix_count; i++) { 991 for (int i = 0; i < affix_count; i++) {
810 struct hentry* he = new hentry; 992 struct hentry* he = new hentry;
811 if (i == 0) 993 if (i == 0)
812 first_he = he; 994 first_he = he;
813 he->word = word; 995 he->word = *word;
814 he->wlen = word_len; 996 he->blen = word_len;
815 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i], 997 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i],
816 &he->astr); 998 &he->astr);
817 he->next = NULL; 999 he->next = NULL;
818 he->next_homonym = NULL; 1000 he->next_homonym = NULL;
819 if (prev_he) 1001 if (prev_he)
820 prev_he->next_homonym = he; 1002 prev_he->next_homonym = he;
821 prev_he = he; 1003 prev_he = he;
822 } 1004 }
823 1005
824 cache[std_word] = first_he; // Save this word in the cache for later. 1006 cache[std_word] = first_he; // Save this word in the cache for later.
(...skipping 22 matching lines...) Expand all
847 int HashMgr::get_aliasf(int index, unsigned short ** fvec) { 1029 int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
848 if ((index > 0) && (index <= numaliasf)) { 1030 if ((index > 0) && (index <= numaliasf)) {
849 *fvec = aliasf[index - 1]; 1031 *fvec = aliasf[index - 1];
850 return aliasflen[index - 1]; 1032 return aliasflen[index - 1];
851 } 1033 }
852 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index); 1034 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index);
853 *fvec = NULL; 1035 *fvec = NULL;
854 return 0; 1036 return 0;
855 } 1037 }
856 1038
857 #ifdef HUNSPELL_EXPERIMENTAL
858 /* parse morph alias definitions */ 1039 /* parse morph alias definitions */
859 int HashMgr::parse_aliasm(char * line, FILE * af) 1040 int HashMgr::parse_aliasm(char * line, FileMgr * af)
860 { 1041 {
861 if (numaliasm != 0) { 1042 if (numaliasm != 0) {
862 HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological d escriptions) tables used\n"); 1043 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");
863 return 1; 1044 return 1;
864 } 1045 }
865 char * tp = line; 1046 char * tp = line;
866 char * piece; 1047 char * piece;
867 int i = 0; 1048 int i = 0;
868 int np = 0; 1049 int np = 0;
869 piece = mystrsep(&tp, 0); 1050 piece = mystrsep(&tp, 0);
870 while (piece) { 1051 while (piece) {
871 if (*piece != '\0') { 1052 if (*piece != '\0') {
872 switch(i) { 1053 switch(i) {
873 case 0: { np++; break; } 1054 case 0: { np++; break; }
874 case 1: { 1055 case 1: {
875 numaliasm = atoi(piece); 1056 numaliasm = atoi(piece);
876 if (numaliasm < 1) { 1057 if (numaliasm < 1) {
877 HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n"); 1058 HUNSPELL_WARNING(stderr, "error: line %d: bad entry nu mber\n", af->getlinenum());
878 free(piece);
879 return 1; 1059 return 1;
880 } 1060 }
881 aliasm = (char **) malloc(numaliasm * sizeof(char *)); 1061 aliasm = (char **) malloc(numaliasm * sizeof(char *));
882 if (!aliasm) { 1062 if (!aliasm) {
883 numaliasm = 0; 1063 numaliasm = 0;
884 return 1; 1064 return 1;
885 } 1065 }
886 np++; 1066 np++;
887 break; 1067 break;
888 } 1068 }
889 default: break; 1069 default: break;
890 } 1070 }
891 i++; 1071 i++;
892 } 1072 }
893 free(piece);
894 piece = mystrsep(&tp, 0); 1073 piece = mystrsep(&tp, 0);
895 } 1074 }
896 if (np != 2) { 1075 if (np != 2) {
897 numaliasm = 0; 1076 numaliasm = 0;
898 free(aliasm); 1077 free(aliasm);
899 aliasm = NULL; 1078 aliasm = NULL;
900 HUNSPELL_WARNING(stderr, "error: missing AM alias information\n"); 1079 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum( ));
901 return 1; 1080 return 1;
902 } 1081 }
903 1082
904 /* now parse the numaliasm lines to read in the remainder of the table */ 1083 /* now parse the numaliasm lines to read in the remainder of the table */
905 char * nl = line; 1084 char * nl = line;
906 for (int j=0; j < numaliasm; j++) { 1085 for (int j=0; j < numaliasm; j++) {
907 if (!fgets(nl,MAXDELEN,af)) return 1; 1086 if (!(nl = af->getline())) return 1;
908 mychomp(nl); 1087 mychomp(nl);
909 tp = nl; 1088 tp = nl;
910 i = 0; 1089 i = 0;
911 aliasm[j] = NULL; 1090 aliasm[j] = NULL;
912 piece = mystrsep(&tp, 0); 1091 piece = mystrsep(&tp, ' ');
913 while (piece) { 1092 while (piece) {
914 if (*piece != '\0') { 1093 if (*piece != '\0') {
915 switch(i) { 1094 switch(i) {
916 case 0: { 1095 case 0: {
917 if (strncmp(piece,"AM",2) != 0) { 1096 if (strncmp(piece,"AM",2) != 0) {
918 HUNSPELL_WARNING(stderr, "error: AM table is co rrupt\n"); 1097 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
919 free(piece);
920 numaliasm = 0; 1098 numaliasm = 0;
921 free(aliasm); 1099 free(aliasm);
922 aliasm = NULL; 1100 aliasm = NULL;
923 return 1; 1101 return 1;
924 } 1102 }
925 break; 1103 break;
926 } 1104 }
927 case 1: { 1105 case 1: {
1106 // add the remaining of the line
1107 if (*tp) {
1108 *(tp - 1) = ' ';
1109 tp = tp + strlen(tp);
1110 }
928 if (complexprefixes) { 1111 if (complexprefixes) {
929 if (utf8) reverseword_utf(piece); 1112 if (utf8) reverseword_utf(piece);
930 else reverseword(piece); 1113 else reverseword(piece);
931 } 1114 }
932 aliasm[j] = mystrdup(piece); 1115 aliasm[j] = mystrdup(piece);
1116 if (!aliasm[j]) {
1117 numaliasm = 0;
1118 free(aliasm);
1119 aliasm = NULL;
1120 return 1;
1121 }
933 break; } 1122 break; }
934 default: break; 1123 default: break;
935 } 1124 }
936 i++; 1125 i++;
937 } 1126 }
938 free(piece); 1127 piece = mystrsep(&tp, ' ');
939 piece = mystrsep(&tp, 0);
940 } 1128 }
941 if (!aliasm[j]) { 1129 if (!aliasm[j]) {
942 numaliasm = 0; 1130 numaliasm = 0;
943 free(aliasm); 1131 free(aliasm);
944 aliasm = NULL; 1132 aliasm = NULL;
945 HUNSPELL_WARNING(stderr, "error: map table is corrupt\n"); 1133 HUNSPELL_WARNING(stderr, "error: table is corrupt\n");
946 return 1; 1134 return 1;
947 } 1135 }
948 } 1136 }
949 return 0; 1137 return 0;
950 } 1138 }
951 1139
952 int HashMgr::is_aliasm() { 1140 int HashMgr::is_aliasm() {
953 return (aliasm != NULL); 1141 return (aliasm != NULL);
954 } 1142 }
955 1143
956 char * HashMgr::get_aliasm(int index) { 1144 char * HashMgr::get_aliasm(int index) {
957 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; 1145 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
958 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); 1146 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
959 return NULL; 1147 return NULL;
960 } 1148 }
961 #endif
OLDNEW
« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | chrome/third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698