| OLD | NEW |
| (Empty) |
| 1 #include "license.hunspell" | |
| 2 #include "license.myspell" | |
| 3 | |
| 4 #include <stdlib.h> | |
| 5 #include <string.h> | |
| 6 #include <stdio.h> | |
| 7 #include <ctype.h> | |
| 8 | |
| 9 #include "hashmgr.hxx" | |
| 10 #include "csutil.hxx" | |
| 11 #include "atypes.hxx" | |
| 12 | |
| 13 // build a hash table from a munched word list | |
| 14 | |
| 15 #ifdef HUNSPELL_CHROME_CLIENT | |
| 16 HashMgr::HashMgr(hunspell::BDictReader* reader) | |
| 17 { | |
| 18 bdict_reader = reader; | |
| 19 #else | |
| 20 HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) | |
| 21 { | |
| 22 #endif | |
| 23 tablesize = 0; | |
| 24 tableptr = NULL; | |
| 25 flag_mode = FLAG_CHAR; | |
| 26 complexprefixes = 0; | |
| 27 utf8 = 0; | |
| 28 langnum = 0; | |
| 29 lang = NULL; | |
| 30 enc = NULL; | |
| 31 csconv = 0; | |
| 32 ignorechars = NULL; | |
| 33 ignorechars_utf16 = NULL; | |
| 34 ignorechars_utf16_len = 0; | |
| 35 numaliasf = 0; | |
| 36 aliasf = NULL; | |
| 37 numaliasm = 0; | |
| 38 aliasm = NULL; | |
| 39 forbiddenword = FORBIDDENWORD; // forbidden word signing flag | |
| 40 #ifdef HUNSPELL_CHROME_CLIENT | |
| 41 // No tables to load, just the AF lines. | |
| 42 load_config(NULL, NULL); | |
| 43 int ec = LoadAFLines(); | |
| 44 #else | |
| 45 load_config(apath, key); | |
| 46 int ec = load_tables(tpath, key); | |
| 47 #endif | |
| 48 if (ec) { | |
| 49 /* error condition - what should we do here */ | |
| 50 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); | |
| 51 if (tableptr) { | |
| 52 free(tableptr); | |
| 53 tableptr = NULL; | |
| 54 } | |
| 55 tablesize = 0; | |
| 56 } | |
| 57 } | |
| 58 | |
| 59 | |
| 60 HashMgr::~HashMgr() | |
| 61 { | |
| 62 if (tableptr) { | |
| 63 // now pass through hash table freeing up everything | |
| 64 // go through column by column of the table | |
| 65 for (int i=0; i < tablesize; i++) { | |
| 66 struct hentry * pt = tableptr[i]; | |
| 67 struct hentry * nt = NULL; | |
| 68 while(pt) { | |
| 69 nt = pt->next; | |
| 70 if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))
) free(pt->astr); | |
| 71 free(pt); | |
| 72 pt = nt; | |
| 73 } | |
| 74 } | |
| 75 free(tableptr); | |
| 76 } | |
| 77 tablesize = 0; | |
| 78 | |
| 79 if (aliasf) { | |
| 80 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); | |
| 81 free(aliasf); | |
| 82 aliasf = NULL; | |
| 83 if (aliasflen) { | |
| 84 free(aliasflen); | |
| 85 aliasflen = NULL; | |
| 86 } | |
| 87 } | |
| 88 if (aliasm) { | |
| 89 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); | |
| 90 free(aliasm); | |
| 91 aliasm = NULL; | |
| 92 } | |
| 93 | |
| 94 #ifndef OPENOFFICEORG | |
| 95 #ifndef MOZILLA_CLIENT | |
| 96 if (utf8) free_utf_tbl(); | |
| 97 #endif | |
| 98 #endif | |
| 99 | |
| 100 if (enc) free(enc); | |
| 101 if (lang) free(lang); | |
| 102 | |
| 103 if (ignorechars) free(ignorechars); | |
| 104 if (ignorechars_utf16) free(ignorechars_utf16); | |
| 105 | |
| 106 #ifdef HUNSPELL_CHROME_CLIENT | |
| 107 EmptyHentryCache(); | |
| 108 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); | |
| 109 it != pointer_to_strings_.end(); ++it) { | |
| 110 delete *it; | |
| 111 } | |
| 112 #endif | |
| 113 #ifdef MOZILLA_CLIENT | |
| 114 delete [] csconv; | |
| 115 #endif | |
| 116 } | |
| 117 | |
| 118 #ifdef HUNSPELL_CHROME_CLIENT | |
| 119 void HashMgr::EmptyHentryCache() { | |
| 120 // We need to delete each cache entry, and each additional one in the linked | |
| 121 // list of homonyms. | |
| 122 for (HEntryCache::iterator i = hentry_cache.begin(); | |
| 123 i != hentry_cache.end(); ++i) { | |
| 124 hentry* cur = i->second; | |
| 125 while (cur) { | |
| 126 hentry* next = cur->next_homonym; | |
| 127 DeleteHashEntry(cur); | |
| 128 cur = next; | |
| 129 } | |
| 130 } | |
| 131 hentry_cache.clear(); | |
| 132 } | |
| 133 #endif | |
| 134 | |
| 135 // lookup a root word in the hashtable | |
| 136 | |
| 137 struct hentry * HashMgr::lookup(const char *word) const | |
| 138 { | |
| 139 #ifdef HUNSPELL_CHROME_CLIENT | |
| 140 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; | |
| 141 int affix_count = bdict_reader->FindWord(word, affix_ids); | |
| 142 if (affix_count == 0) { // look for custom added word | |
| 143 std::map<base::StringPiece, int>::const_iterator iter = | |
| 144 custom_word_to_affix_id_map_.find(word); | |
| 145 if (iter != custom_word_to_affix_id_map_.end()) { | |
| 146 affix_count = 1; | |
| 147 affix_ids[0] = iter->second; | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 static const int kMaxWordLen = 128; | |
| 152 static char word_buf[kMaxWordLen]; | |
| 153 // To take account of null-termination, we use upto 127. | |
| 154 strncpy(word_buf, word, kMaxWordLen - 1); | |
| 155 | |
| 156 return AffixIDsToHentry(word_buf, affix_ids, affix_count); | |
| 157 #else | |
| 158 struct hentry * dp; | |
| 159 if (tableptr) { | |
| 160 dp = tableptr[hash(word)]; | |
| 161 if (!dp) return NULL; | |
| 162 for ( ; dp != NULL; dp = dp->next) { | |
| 163 if (strcmp(word, dp->word) == 0) return dp; | |
| 164 } | |
| 165 } | |
| 166 return NULL; | |
| 167 #endif | |
| 168 } | |
| 169 | |
| 170 // add a word to the hash table (private) | |
| 171 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, | |
| 172 int al, const char * desc, bool onlyupcase) | |
| 173 { | |
| 174 #ifndef HUNSPELL_CHROME_CLIENT | |
| 175 bool upcasehomonym = false; | |
| 176 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; | |
| 177 // variable-length hash record with word and optional fields | |
| 178 struct hentry* hp = | |
| 179 (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); | |
| 180 if (!hp) return 1; | |
| 181 char * hpw = hp->word; | |
| 182 strcpy(hpw, word); | |
| 183 if (ignorechars != NULL) { | |
| 184 if (utf8) { | |
| 185 remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); | |
| 186 } else { | |
| 187 remove_ignored_chars(hpw, ignorechars); | |
| 188 } | |
| 189 } | |
| 190 if (complexprefixes) { | |
| 191 if (utf8) reverseword_utf(hpw); else reverseword(hpw); | |
| 192 } | |
| 193 | |
| 194 int i = hash(hpw); | |
| 195 | |
| 196 hp->blen = (unsigned char) wbl; | |
| 197 hp->clen = (unsigned char) wcl; | |
| 198 hp->alen = (short) al; | |
| 199 hp->astr = aff; | |
| 200 hp->next = NULL; | |
| 201 hp->next_homonym = NULL; | |
| 202 | |
| 203 // store the description string or its pointer | |
| 204 if (desc) { | |
| 205 hp->var = H_OPT; | |
| 206 if (aliasm) { | |
| 207 hp->var += H_OPT_ALIASM; | |
| 208 store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); | |
| 209 } else { | |
| 210 strcpy(hpw + wbl + 1, desc); | |
| 211 if (complexprefixes) { | |
| 212 if (utf8) reverseword_utf(HENTRY_DATA(hp)); | |
| 213 else reverseword(HENTRY_DATA(hp)); | |
| 214 } | |
| 215 } | |
| 216 if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; | |
| 217 } else hp->var = 0; | |
| 218 | |
| 219 struct hentry * dp = tableptr[i]; | |
| 220 if (!dp) { | |
| 221 tableptr[i] = hp; | |
| 222 return 0; | |
| 223 } | |
| 224 while (dp->next != NULL) { | |
| 225 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { | |
| 226 // remove hidden onlyupcase homonym | |
| 227 if (!onlyupcase) { | |
| 228 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { | |
| 229 free(dp->astr); | |
| 230 dp->astr = hp->astr; | |
| 231 dp->alen = hp->alen; | |
| 232 free(hp); | |
| 233 return 0; | |
| 234 } else { | |
| 235 dp->next_homonym = hp; | |
| 236 } | |
| 237 } else { | |
| 238 upcasehomonym = true; | |
| 239 } | |
| 240 } | |
| 241 dp=dp->next; | |
| 242 } | |
| 243 if (strcmp(hp->word, dp->word) == 0) { | |
| 244 // remove hidden onlyupcase homonym | |
| 245 if (!onlyupcase) { | |
| 246 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { | |
| 247 free(dp->astr); | |
| 248 dp->astr = hp->astr; | |
| 249 dp->alen = hp->alen; | |
| 250 free(hp); | |
| 251 return 0; | |
| 252 } else { | |
| 253 dp->next_homonym = hp; | |
| 254 } | |
| 255 } else { | |
| 256 upcasehomonym = true; | |
| 257 } | |
| 258 } | |
| 259 if (!upcasehomonym) { | |
| 260 dp->next = hp; | |
| 261 } else { | |
| 262 // remove hidden onlyupcase homonym | |
| 263 if (hp->astr) free(hp->astr); | |
| 264 free(hp); | |
| 265 } | |
| 266 #else | |
| 267 std::map<base::StringPiece, int>::iterator iter = | |
| 268 custom_word_to_affix_id_map_.find(word); | |
| 269 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added | |
| 270 std::string* new_string_word = new std::string(word); | |
| 271 pointer_to_strings_.push_back(new_string_word); | |
| 272 base::StringPiece sp(*(new_string_word)); | |
| 273 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words | |
| 274 return 1; | |
| 275 } | |
| 276 #endif | |
| 277 return 0; | |
| 278 } | |
| 279 | |
| 280 int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, | |
| 281 unsigned short * flags, int al, char * dp, int captype) | |
| 282 { | |
| 283 // add inner capitalized forms to handle the following allcap forms: | |
| 284 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG | |
| 285 // Allcaps with suffixes: CIA's -> CIA'S | |
| 286 if (((captype == HUHCAP) || (captype == HUHINITCAP) || | |
| 287 ((captype == ALLCAP) && (flags != NULL))) && | |
| 288 !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { | |
| 289 unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned s
hort) * (al+1)); | |
| 290 if (!flags2) return 1; | |
| 291 if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); | |
| 292 flags2[al] = ONLYUPCASEFLAG; | |
| 293 if (utf8) { | |
| 294 char st[BUFSIZE]; | |
| 295 w_char w[BUFSIZE]; | |
| 296 int wlen = u8_u16(w, BUFSIZE, word); | |
| 297 mkallsmall_utf(w, wlen, langnum); | |
| 298 mkallcap_utf(w, 1, langnum); | |
| 299 u16_u8(st, BUFSIZE, w, wlen); | |
| 300 return add_word(st,wbl,wcl,flags2,al+1,dp, true); | |
| 301 } else { | |
| 302 mkallsmall(word, csconv); | |
| 303 mkinitcap(word, csconv); | |
| 304 return add_word(word,wbl,wcl,flags2,al+1,dp, true); | |
| 305 } | |
| 306 } | |
| 307 return 0; | |
| 308 } | |
| 309 | |
| 310 // detect captype and modify word length for UTF-8 encoding | |
| 311 int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { | |
| 312 int len; | |
| 313 if (utf8) { | |
| 314 w_char dest_utf[BUFSIZE]; | |
| 315 len = u8_u16(dest_utf, BUFSIZE, word); | |
| 316 *captype = get_captype_utf8(dest_utf, len, langnum); | |
| 317 } else { | |
| 318 len = wbl; | |
| 319 *captype = get_captype((char *) word, len, csconv); | |
| 320 } | |
| 321 return len; | |
| 322 } | |
| 323 | |
| 324 // remove word (personal dictionary function for standalone applications) | |
| 325 int HashMgr::remove(const char * word) | |
| 326 { | |
| 327 #ifdef HUNSPELL_CHROME_CLIENT | |
| 328 std::map<base::StringPiece, int>::iterator iter = | |
| 329 custom_word_to_affix_id_map_.find(word); | |
| 330 if (iter != custom_word_to_affix_id_map_.end()) | |
| 331 custom_word_to_affix_id_map_.erase(iter); | |
| 332 #else | |
| 333 struct hentry * dp = lookup(word); | |
| 334 while (dp) { | |
| 335 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { | |
| 336 unsigned short * flags = | |
| 337 (unsigned short *) malloc(sizeof(short) * (dp->alen + 1)); | |
| 338 if (!flags) return 1; | |
| 339 for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i]; | |
| 340 flags[dp->alen] = forbiddenword; | |
| 341 dp->astr = flags; | |
| 342 dp->alen++; | |
| 343 flag_qsort(flags, 0, dp->alen); | |
| 344 } | |
| 345 dp = dp->next_homonym; | |
| 346 } | |
| 347 #endif | |
| 348 return 0; | |
| 349 } | |
| 350 | |
| 351 /* remove forbidden flag to add a personal word to the hash */ | |
| 352 int HashMgr::remove_forbidden_flag(const char * word) { | |
| 353 struct hentry * dp = lookup(word); | |
| 354 if (!dp) return 1; | |
| 355 while (dp) { | |
| 356 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { | |
| 357 if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal
dic. | |
| 358 else { | |
| 359 unsigned short * flags2 = | |
| 360 (unsigned short *) malloc(sizeof(short) * (dp->alen - 1)); | |
| 361 if (!flags2) return 1; | |
| 362 int i, j = 0; | |
| 363 for (i = 0; i < dp->alen; i++) { | |
| 364 if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i]; | |
| 365 } | |
| 366 dp->alen--; | |
| 367 dp->astr = flags2; // XXX allowed forbidden words | |
| 368 } | |
| 369 } | |
| 370 dp = dp->next_homonym; | |
| 371 } | |
| 372 return 0; | |
| 373 } | |
| 374 | |
| 375 // add a custom dic. word to the hash table (public) | |
| 376 int HashMgr::add(const char * word) | |
| 377 { | |
| 378 unsigned short * flags = NULL; | |
| 379 int al = 0; | |
| 380 if (remove_forbidden_flag(word)) { | |
| 381 int captype; | |
| 382 int wbl = strlen(word); | |
| 383 int wcl = get_clen_and_captype(word, wbl, &captype); | |
| 384 add_word(word, wbl, wcl, flags, al, NULL, false); | |
| 385 return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, N
ULL, captype); | |
| 386 } | |
| 387 return 0; | |
| 388 } | |
| 389 | |
| 390 int HashMgr::add_with_affix(const char * word, const char * example) | |
| 391 { | |
| 392 // detect captype and modify word length for UTF-8 encoding | |
| 393 struct hentry * dp = lookup(example); | |
| 394 remove_forbidden_flag(word); | |
| 395 if (dp && dp->astr) { | |
| 396 int captype; | |
| 397 int wbl = strlen(word); | |
| 398 int wcl = get_clen_and_captype(word, wbl, &captype); | |
| 399 if (aliasf) { | |
| 400 add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); | |
| 401 } else { | |
| 402 unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeo
f(short)); | |
| 403 if (flags) { | |
| 404 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(shor
t)); | |
| 405 add_word(word, wbl, wcl, flags, dp->alen, NULL, false); | |
| 406 } else return 1; | |
| 407 } | |
| 408 return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp
->alen, NULL, captype); | |
| 409 } | |
| 410 return 1; | |
| 411 } | |
| 412 | |
| 413 // walk the hash table entry by entry - null at end | |
| 414 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); | |
| 415 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const | |
| 416 { | |
| 417 #ifdef HUNSPELL_CHROME_CLIENT | |
| 418 // Return NULL if dictionary is not valid. | |
| 419 if (!bdict_reader->IsValid()) | |
| 420 return NULL; | |
| 421 | |
| 422 // This function is only ever called by one place and not nested. We can | |
| 423 // therefore keep static state between calls and use |col| as a "reset" flag | |
| 424 // to avoid changing the API. It is set to -1 for the first call. | |
| 425 // Allocate the iterator on the heap to prevent an exit time destructor. | |
| 426 static hunspell::WordIterator& word_iterator = | |
| 427 *new hunspell::WordIterator(bdict_reader->GetAllWordIterator()); | |
| 428 if (col < 0) { | |
| 429 col = 1; | |
| 430 word_iterator = bdict_reader->GetAllWordIterator(); | |
| 431 } | |
| 432 | |
| 433 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; | |
| 434 static const int kMaxWordLen = 128; | |
| 435 static char word[kMaxWordLen]; | |
| 436 int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids); | |
| 437 if (affix_count == 0) | |
| 438 return NULL; | |
| 439 short word_len = static_cast<short>(strlen(word)); | |
| 440 | |
| 441 // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct, | |
| 442 // i.e. a struct which uses its array 'word[1]' as a variable-length array. | |
| 443 // As noted above, this function is not nested. So, we just use a static | |
| 444 // struct which consists of an hentry and a char[kMaxWordLen], and initialize | |
| 445 // the static struct and return it for now. | |
| 446 // No need to create linked lists for the extra affixes. | |
| 447 static struct { | |
| 448 hentry entry; | |
| 449 char word[kMaxWordLen]; | |
| 450 } hash_entry; | |
| 451 | |
| 452 return InitHashEntry(&hash_entry.entry, sizeof(hash_entry), | |
| 453 &word[0], word_len, affix_ids[0]); | |
| 454 #else | |
| 455 if (hp && hp->next != NULL) return hp->next; | |
| 456 for (col++; col < tablesize; col++) { | |
| 457 if (tableptr[col]) return tableptr[col]; | |
| 458 } | |
| 459 // null at end and reset to start | |
| 460 col = -1; | |
| 461 return NULL; | |
| 462 #endif | |
| 463 } | |
| 464 | |
| 465 // load a munched word list and build a hash table on the fly | |
| 466 int HashMgr::load_tables(const char * tpath, const char * key) | |
| 467 { | |
| 468 #ifndef HUNSPELL_CHROME_CLIENT | |
| 469 int al; | |
| 470 char * ap; | |
| 471 char * dp; | |
| 472 char * dp2; | |
| 473 unsigned short * flags; | |
| 474 char * ts; | |
| 475 | |
| 476 // open dictionary file | |
| 477 FileMgr * dict = new FileMgr(tpath, key); | |
| 478 if (dict == NULL) return 1; | |
| 479 | |
| 480 // first read the first line of file to get hash table size */ | |
| 481 if ((ts = dict->getline()) == NULL) { | |
| 482 HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); | |
| 483 delete dict; | |
| 484 return 2; | |
| 485 } | |
| 486 mychomp(ts); | |
| 487 | |
| 488 /* remove byte order mark */ | |
| 489 if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) { | |
| 490 memmove(ts, ts+3, strlen(ts+3)+1); | |
| 491 // warning: dic file begins with byte order mark: possible incompatibility w
ith old Hunspell versions | |
| 492 } | |
| 493 | |
| 494 tablesize = atoi(ts); | |
| 495 if (tablesize == 0) { | |
| 496 HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the di
c file\n"); | |
| 497 delete dict; | |
| 498 return 4; | |
| 499 } | |
| 500 tablesize = tablesize + 5 + USERWORD; | |
| 501 if ((tablesize %2) == 0) tablesize++; | |
| 502 | |
| 503 // allocate the hash table | |
| 504 tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); | |
| 505 if (! tableptr) { | |
| 506 delete dict; | |
| 507 return 3; | |
| 508 } | |
| 509 for (int i=0; i<tablesize; i++) tableptr[i] = NULL; | |
| 510 | |
| 511 // loop through all words on much list and add to hash | |
| 512 // table and create word and affix strings | |
| 513 | |
| 514 while ((ts = dict->getline()) != NULL) { | |
| 515 mychomp(ts); | |
| 516 // split each line into word and morphological description | |
| 517 dp = ts; | |
| 518 while ((dp = strchr(dp, ':')) != NULL) { | |
| 519 if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { | |
| 520 for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); | |
| 521 if (dp < ts) { // missing word | |
| 522 dp = NULL; | |
| 523 } else { | |
| 524 *(dp + 1) = '\0'; | |
| 525 dp = dp + 2; | |
| 526 } | |
| 527 break; | |
| 528 } | |
| 529 dp++; | |
| 530 } | |
| 531 | |
| 532 // tabulator is the old morphological field separator | |
| 533 dp2 = strchr(ts, '\t'); | |
| 534 if (dp2 && (!dp || dp2 < dp)) { | |
| 535 *dp2 = '\0'; | |
| 536 dp = dp2 + 1; | |
| 537 } | |
| 538 | |
| 539 // split each line into word and affix char strings | |
| 540 // "\/" signs slash in words (not affix separator) | |
| 541 // "/" at beginning of the line is word character (not affix separator) | |
| 542 ap = strchr(ts,'/'); | |
| 543 while (ap) { | |
| 544 if (ap == ts) { | |
| 545 ap++; | |
| 546 continue; | |
| 547 } else if (*(ap - 1) != '\\') break; | |
| 548 // replace "\/" with "/" | |
| 549 for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); | |
| 550 ap = strchr(ap,'/'); | |
| 551 } | |
| 552 | |
| 553 if (ap) { | |
| 554 *ap = '\0'; | |
| 555 if (aliasf) { | |
| 556 int index = atoi(ap + 1); | |
| 557 al = get_aliasf(index, &flags, dict); | |
| 558 if (!al) { | |
| 559 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
dict->getlinenum()); | |
| 560 *ap = '\0'; | |
| 561 } | |
| 562 } else { | |
| 563 al = decode_flags(&flags, ap + 1, dict); | |
| 564 if (al == -1) { | |
| 565 HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); | |
| 566 delete dict; | |
| 567 return 6; | |
| 568 } | |
| 569 flag_qsort(flags, 0, al); | |
| 570 } | |
| 571 } else { | |
| 572 al = 0; | |
| 573 ap = NULL; | |
| 574 flags = NULL; | |
| 575 } | |
| 576 | |
| 577 int captype; | |
| 578 int wbl = strlen(ts); | |
| 579 int wcl = get_clen_and_captype(ts, wbl, &captype); | |
| 580 // add the word and its index plus its capitalized form optionally | |
| 581 if (add_word(ts,wbl,wcl,flags,al,dp, false) || | |
| 582 add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { | |
| 583 delete dict; | |
| 584 return 5; | |
| 585 } | |
| 586 } | |
| 587 | |
| 588 delete dict; | |
| 589 #endif | |
| 590 return 0; | |
| 591 } | |
| 592 | |
| 593 // the hash function is a simple load and rotate | |
| 594 // algorithm borrowed | |
| 595 | |
| 596 int HashMgr::hash(const char * word) const | |
| 597 { | |
| 598 #ifdef HUNSPELL_CHROME_CLIENT | |
| 599 return 0; | |
| 600 #else | |
| 601 long hv = 0; | |
| 602 for (int i=0; i < 4 && *word != 0; i++) | |
| 603 hv = (hv << 8) | (*word++); | |
| 604 while (*word != 0) { | |
| 605 ROTATE(hv,ROTATE_LEN); | |
| 606 hv ^= (*word++); | |
| 607 } | |
| 608 return (unsigned long) hv % tablesize; | |
| 609 #endif | |
| 610 } | |
| 611 | |
| 612 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
{ | |
| 613 int len; | |
| 614 if (*flags == '\0') { | |
| 615 *result = NULL; | |
| 616 return 0; | |
| 617 } | |
| 618 switch (flag_mode) { | |
| 619 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) | |
| 620 len = strlen(flags); | |
| 621 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector
\n", af->getlinenum()); | |
| 622 len /= 2; | |
| 623 *result = (unsigned short *) malloc(len * sizeof(short)); | |
| 624 if (!*result) return -1; | |
| 625 for (int i = 0; i < len; i++) { | |
| 626 (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned sh
ort) flags[i * 2 + 1]; | |
| 627 } | |
| 628 break; | |
| 629 } | |
| 630 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452
1 23 233) | |
| 631 int i; | |
| 632 len = 1; | |
| 633 char * src = flags; | |
| 634 unsigned short * dest; | |
| 635 char * p; | |
| 636 for (p = flags; *p; p++) { | |
| 637 if (*p == ',') len++; | |
| 638 } | |
| 639 *result = (unsigned short *) malloc(len * sizeof(short)); | |
| 640 if (!*result) return -1; | |
| 641 dest = *result; | |
| 642 for (p = flags; *p; p++) { | |
| 643 if (*p == ',') { | |
| 644 i = atoi(src); | |
| 645 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: fla
g id %d is too large (max: %d)\n", | |
| 646 af->getlinenum(), i, DEFAULTFLAGS - 1); | |
| 647 *dest = (unsigned short) i; | |
| 648 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong
flag id\n", af->getlinenum()); | |
| 649 src = p + 1; | |
| 650 dest++; | |
| 651 } | |
| 652 } | |
| 653 i = atoi(src); | |
| 654 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id
%d is too large (max: %d)\n", | |
| 655 af->getlinenum(), i, DEFAULTFLAGS - 1); | |
| 656 *dest = (unsigned short) i; | |
| 657 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong fla
g id\n", af->getlinenum()); | |
| 658 break; | |
| 659 } | |
| 660 case FLAG_UNI: { // UTF-8 characters | |
| 661 w_char w[BUFSIZE/2]; | |
| 662 len = u8_u16(w, BUFSIZE/2, flags); | |
| 663 *result = (unsigned short *) malloc(len * sizeof(short)); | |
| 664 if (!*result) return -1; | |
| 665 memcpy(*result, w, len * sizeof(short)); | |
| 666 break; | |
| 667 } | |
| 668 default: { // Ispell's one-character flags (erfg -> e r f g) | |
| 669 unsigned short * dest; | |
| 670 len = strlen(flags); | |
| 671 *result = (unsigned short *) malloc(len * sizeof(short)); | |
| 672 if (!*result) return -1; | |
| 673 dest = *result; | |
| 674 for (unsigned char * p = (unsigned char *) flags; *p; p++) { | |
| 675 *dest = (unsigned short) *p; | |
| 676 dest++; | |
| 677 } | |
| 678 } | |
| 679 } | |
| 680 return len; | |
| 681 } | |
| 682 | |
| 683 unsigned short HashMgr::decode_flag(const char * f) { | |
| 684 unsigned short s = 0; | |
| 685 int i; | |
| 686 switch (flag_mode) { | |
| 687 case FLAG_LONG: | |
| 688 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; | |
| 689 break; | |
| 690 case FLAG_NUM: | |
| 691 i = atoi(f); | |
| 692 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to
o large (max: %d)\n", i, DEFAULTFLAGS - 1); | |
| 693 s = (unsigned short) i; | |
| 694 break; | |
| 695 case FLAG_UNI: | |
| 696 u8_u16((w_char *) &s, 1, f); | |
| 697 break; | |
| 698 default: | |
| 699 s = (unsigned short) *((unsigned char *)f); | |
| 700 } | |
| 701 if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); | |
| 702 return s; | |
| 703 } | |
| 704 | |
| 705 char * HashMgr::encode_flag(unsigned short f) { | |
| 706 unsigned char ch[10]; | |
| 707 if (f==0) return mystrdup("(NULL)"); | |
| 708 if (flag_mode == FLAG_LONG) { | |
| 709 ch[0] = (unsigned char) (f >> 8); | |
| 710 ch[1] = (unsigned char) (f - ((f >> 8) << 8)); | |
| 711 ch[2] = '\0'; | |
| 712 } else if (flag_mode == FLAG_NUM) { | |
| 713 sprintf((char *) ch, "%d", f); | |
| 714 } else if (flag_mode == FLAG_UNI) { | |
| 715 u16_u8((char *) &ch, 10, (w_char *) &f, 1); | |
| 716 } else { | |
| 717 ch[0] = (unsigned char) (f); | |
| 718 ch[1] = '\0'; | |
| 719 } | |
| 720 return mystrdup((char *) ch); | |
| 721 } | |
| 722 | |
| 723 // read in aff file and set flag mode | |
| 724 int HashMgr::load_config(const char * affpath, const char * key) | |
| 725 { | |
| 726 char * line; // io buffers | |
| 727 int firstline = 1; | |
| 728 | |
| 729 // open the affix file | |
| 730 #ifdef HUNSPELL_CHROME_CLIENT | |
| 731 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); | |
| 732 FileMgr * afflst = new FileMgr(&iterator); | |
| 733 #else | |
| 734 FileMgr * afflst = new FileMgr(affpath, key); | |
| 735 #endif | |
| 736 if (!afflst) { | |
| 737 HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n
",affpath); | |
| 738 return 1; | |
| 739 } | |
| 740 | |
| 741 // read in each line ignoring any that do not | |
| 742 // start with a known line type indicator | |
| 743 | |
| 744 while ((line = afflst->getline()) != NULL) { | |
| 745 mychomp(line); | |
| 746 | |
| 747 /* remove byte order mark */ | |
| 748 if (firstline) { | |
| 749 firstline = 0; | |
| 750 if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(l
ine+3)+1); | |
| 751 } | |
| 752 | |
| 753 /* parse in the try string */ | |
| 754 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { | |
| 755 if (flag_mode != FLAG_CHAR) { | |
| 756 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions o
f the FLAG affix file parameter\n", afflst->getlinenum()); | |
| 757 } | |
| 758 if (strstr(line, "long")) flag_mode = FLAG_LONG; | |
| 759 if (strstr(line, "num")) flag_mode = FLAG_NUM; | |
| 760 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; | |
| 761 if (flag_mode == FLAG_CHAR) { | |
| 762 HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `lon
g' or `UTF-8' parameter\n", afflst->getlinenum()); | |
| 763 } | |
| 764 } | |
| 765 if (strncmp(line,"FORBIDDENWORD",13) == 0) { | |
| 766 char * st = NULL; | |
| 767 if (parse_string(line, &st, afflst->getlinenum())) { | |
| 768 delete afflst; | |
| 769 return 1; | |
| 770 } | |
| 771 forbiddenword = decode_flag(st); | |
| 772 free(st); | |
| 773 } | |
| 774 if (strncmp(line, "SET", 3) == 0) { | |
| 775 if (parse_string(line, &enc, afflst->getlinenum())) { | |
| 776 delete afflst; | |
| 777 return 1; | |
| 778 } | |
| 779 if (strcmp(enc, "UTF-8") == 0) { | |
| 780 utf8 = 1; | |
| 781 #ifndef OPENOFFICEORG | |
| 782 #ifndef MOZILLA_CLIENT | |
| 783 initialize_utf_tbl(); | |
| 784 #endif | |
| 785 #endif | |
| 786 } else csconv = get_current_cs(enc); | |
| 787 } | |
| 788 if (strncmp(line, "LANG", 4) == 0) { | |
| 789 if (parse_string(line, &lang, afflst->getlinenum())) { | |
| 790 delete afflst; | |
| 791 return 1; | |
| 792 } | |
| 793 langnum = get_lang_num(lang); | |
| 794 } | |
| 795 | |
| 796 /* parse in the ignored characters (for example, Arabic optional diacriti
cs characters */ | |
| 797 if (strncmp(line,"IGNORE",6) == 0) { | |
| 798 if (parse_array(line, &ignorechars, &ignorechars_utf16, | |
| 799 &ignorechars_utf16_len, utf8, afflst->getlinenum())) { | |
| 800 delete afflst; | |
| 801 return 1; | |
| 802 } | |
| 803 } | |
| 804 | |
| 805 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { | |
| 806 if (parse_aliasf(line, afflst)) { | |
| 807 delete afflst; | |
| 808 return 1; | |
| 809 } | |
| 810 } | |
| 811 | |
| 812 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { | |
| 813 if (parse_aliasm(line, afflst)) { | |
| 814 delete afflst; | |
| 815 return 1; | |
| 816 } | |
| 817 } | |
| 818 | |
| 819 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; | |
| 820 if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && iss
pace(line[3])) break; | |
| 821 } | |
| 822 if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING); | |
| 823 delete afflst; | |
| 824 return 0; | |
| 825 } | |
| 826 | |
| 827 /* parse in the ALIAS table */ | |
| 828 int HashMgr::parse_aliasf(char * line, FileMgr * af) | |
| 829 { | |
| 830 if (numaliasf != 0) { | |
| 831 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", a
f->getlinenum()); | |
| 832 return 1; | |
| 833 } | |
| 834 char * tp = line; | |
| 835 char * piece; | |
| 836 int i = 0; | |
| 837 int np = 0; | |
| 838 piece = mystrsep(&tp, 0); | |
| 839 while (piece) { | |
| 840 if (*piece != '\0') { | |
| 841 switch(i) { | |
| 842 case 0: { np++; break; } | |
| 843 case 1: { | |
| 844 numaliasf = atoi(piece); | |
| 845 if (numaliasf < 1) { | |
| 846 numaliasf = 0; | |
| 847 aliasf = NULL; | |
| 848 aliasflen = NULL; | |
| 849 HUNSPELL_WARNING(stderr, "error: line %d: bad entry nu
mber\n", af->getlinenum()); | |
| 850 return 1; | |
| 851 } | |
| 852 aliasf = (unsigned short **) malloc(numaliasf * sizeof(un
signed short *)); | |
| 853 aliasflen = (unsigned short *) malloc(numaliasf * sizeof(
short)); | |
| 854 if (!aliasf || !aliasflen) { | |
| 855 numaliasf = 0; | |
| 856 if (aliasf) free(aliasf); | |
| 857 if (aliasflen) free(aliasflen); | |
| 858 aliasf = NULL; | |
| 859 aliasflen = NULL; | |
| 860 return 1; | |
| 861 } | |
| 862 np++; | |
| 863 break; | |
| 864 } | |
| 865 default: break; | |
| 866 } | |
| 867 i++; | |
| 868 } | |
| 869 piece = mystrsep(&tp, 0); | |
| 870 } | |
| 871 if (np != 2) { | |
| 872 numaliasf = 0; | |
| 873 free(aliasf); | |
| 874 free(aliasflen); | |
| 875 aliasf = NULL; | |
| 876 aliasflen = NULL; | |
| 877 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum(
)); | |
| 878 return 1; | |
| 879 } | |
| 880 | |
| 881 /* now parse the numaliasf lines to read in the remainder of the table */ | |
| 882 char * nl; | |
| 883 for (int j=0; j < numaliasf; j++) { | |
| 884 if ((nl = af->getline()) == NULL) return 1; | |
| 885 mychomp(nl); | |
| 886 tp = nl; | |
| 887 i = 0; | |
| 888 aliasf[j] = NULL; | |
| 889 aliasflen[j] = 0; | |
| 890 piece = mystrsep(&tp, 0); | |
| 891 while (piece) { | |
| 892 if (*piece != '\0') { | |
| 893 switch(i) { | |
| 894 case 0: { | |
| 895 if (strncmp(piece,"AF",2) != 0) { | |
| 896 numaliasf = 0; | |
| 897 free(aliasf); | |
| 898 free(aliasflen); | |
| 899 aliasf = NULL; | |
| 900 aliasflen = NULL; | |
| 901 HUNSPELL_WARNING(stderr, "error: line %d: table
is corrupt\n", af->getlinenum()); | |
| 902 return 1; | |
| 903 } | |
| 904 break; | |
| 905 } | |
| 906 case 1: { | |
| 907 aliasflen[j] = (unsigned short) decode_flags(&(alias
f[j]), piece, af); | |
| 908 flag_qsort(aliasf[j], 0, aliasflen[j]); | |
| 909 break; | |
| 910 } | |
| 911 default: break; | |
| 912 } | |
| 913 i++; | |
| 914 } | |
| 915 piece = mystrsep(&tp, 0); | |
| 916 } | |
| 917 if (!aliasf[j]) { | |
| 918 free(aliasf); | |
| 919 free(aliasflen); | |
| 920 aliasf = NULL; | |
| 921 aliasflen = NULL; | |
| 922 numaliasf = 0; | |
| 923 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->
getlinenum()); | |
| 924 return 1; | |
| 925 } | |
| 926 } | |
| 927 return 0; | |
| 928 } | |
| 929 | |
| 930 #ifdef HUNSPELL_CHROME_CLIENT | |
| 931 int HashMgr::LoadAFLines() | |
| 932 { | |
| 933 utf8 = 1; // We always use UTF-8. | |
| 934 | |
| 935 // Read in all the AF lines which tell us the rules for each affix group ID. | |
| 936 hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator(); | |
| 937 FileMgr afflst(&iterator); | |
| 938 while (char* line = afflst.getline()) { | |
| 939 int rv = parse_aliasf(line, &afflst); | |
| 940 if (rv) | |
| 941 return rv; | |
| 942 } | |
| 943 | |
| 944 return 0; | |
| 945 } | |
| 946 | |
| 947 hentry* HashMgr::InitHashEntry(hentry* entry, | |
| 948 size_t item_size, | |
| 949 const char* word, | |
| 950 int word_length, | |
| 951 int affix_index) const { | |
| 952 // Return if the given buffer doesn't have enough space for a hentry struct | |
| 953 // or the given word is too long. | |
| 954 // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is | |
| 955 // better to return an error if the given word is too long and prevent | |
| 956 // an unexpected result caused by a long word. | |
| 957 const int kMaxWordLen = 128; | |
| 958 if (item_size < sizeof(hentry) + word_length + 1 || | |
| 959 word_length >= kMaxWordLen) | |
| 960 return NULL; | |
| 961 | |
| 962 // Initialize a hentry struct with the given parameters, and | |
| 963 // append the given string at the end of this hentry struct. | |
| 964 memset(entry, 0, item_size); | |
| 965 FileMgr af(NULL); | |
| 966 entry->alen = static_cast<short>( | |
| 967 const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af)); | |
| 968 entry->blen = static_cast<unsigned char>(word_length); | |
| 969 memcpy(&entry->word, word, word_length); | |
| 970 | |
| 971 return entry; | |
| 972 } | |
| 973 | |
| 974 hentry* HashMgr::CreateHashEntry(const char* word, | |
| 975 int word_length, | |
| 976 int affix_index) const { | |
| 977 // Return if the given word is too long. | |
| 978 // (See the comment in HashMgr::InitHashEntry().) | |
| 979 const int kMaxWordLen = 128; | |
| 980 if (word_length >= kMaxWordLen) | |
| 981 return NULL; | |
| 982 | |
| 983 const size_t kEntrySize = sizeof(hentry) + word_length + 1; | |
| 984 struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize)); | |
| 985 if (entry) | |
| 986 InitHashEntry(entry, kEntrySize, word, word_length, affix_index); | |
| 987 | |
| 988 return entry; | |
| 989 } | |
| 990 | |
| 991 void HashMgr::DeleteHashEntry(hentry* entry) const { | |
| 992 free(entry); | |
| 993 } | |
| 994 | |
| 995 hentry* HashMgr::AffixIDsToHentry(char* word, | |
| 996 int* affix_ids, | |
| 997 int affix_count) const | |
| 998 { | |
| 999 if (affix_count == 0) | |
| 1000 return NULL; | |
| 1001 | |
| 1002 HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; | |
| 1003 std::string std_word(word); | |
| 1004 HEntryCache::iterator found = cache.find(std_word); | |
| 1005 if (found != cache.end()) { | |
| 1006 // We must return an existing hentry for the same word if we've previously | |
| 1007 // handed one out. Hunspell will compare pointers in some cases to see if | |
| 1008 // two words it has found are the same. | |
| 1009 return found->second; | |
| 1010 } | |
| 1011 | |
| 1012 short word_len = static_cast<short>(strlen(word)); | |
| 1013 | |
| 1014 // We can get a number of prefixes per word. There will normally be only one, | |
| 1015 // but if not, there will be a linked list of "hentry"s for the "homonym"s | |
| 1016 // for the word. | |
| 1017 struct hentry* first_he = NULL; | |
| 1018 struct hentry* prev_he = NULL; // For making linked list. | |
| 1019 for (int i = 0; i < affix_count; i++) { | |
| 1020 struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]); | |
| 1021 if (!he) | |
| 1022 break; | |
| 1023 if (i == 0) | |
| 1024 first_he = he; | |
| 1025 if (prev_he) | |
| 1026 prev_he->next_homonym = he; | |
| 1027 prev_he = he; | |
| 1028 } | |
| 1029 | |
| 1030 cache[std_word] = first_he; // Save this word in the cache for later. | |
| 1031 return first_he; | |
| 1032 } | |
| 1033 | |
| 1034 hentry* HashMgr::GetHentryFromHEntryCache(char* word) { | |
| 1035 HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; | |
| 1036 std::string std_word(word); | |
| 1037 HEntryCache::iterator found = cache.find(std_word); | |
| 1038 if (found != cache.end()) | |
| 1039 return found->second; | |
| 1040 else | |
| 1041 return NULL; | |
| 1042 } | |
| 1043 #endif | |
| 1044 | |
| 1045 int HashMgr::is_aliasf() { | |
| 1046 return (aliasf != NULL); | |
| 1047 } | |
| 1048 | |
| 1049 int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) { | |
| 1050 if ((index > 0) && (index <= numaliasf)) { | |
| 1051 *fvec = aliasf[index - 1]; | |
| 1052 return aliasflen[index - 1]; | |
| 1053 } | |
| 1054 HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->g
etlinenum(), index); | |
| 1055 *fvec = NULL; | |
| 1056 return 0; | |
| 1057 } | |
| 1058 | |
| 1059 /* parse morph alias definitions */ | |
| 1060 int HashMgr::parse_aliasm(char * line, FileMgr * af) | |
| 1061 { | |
| 1062 if (numaliasm != 0) { | |
| 1063 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", a
f->getlinenum()); | |
| 1064 return 1; | |
| 1065 } | |
| 1066 char * tp = line; | |
| 1067 char * piece; | |
| 1068 int i = 0; | |
| 1069 int np = 0; | |
| 1070 piece = mystrsep(&tp, 0); | |
| 1071 while (piece) { | |
| 1072 if (*piece != '\0') { | |
| 1073 switch(i) { | |
| 1074 case 0: { np++; break; } | |
| 1075 case 1: { | |
| 1076 numaliasm = atoi(piece); | |
| 1077 if (numaliasm < 1) { | |
| 1078 HUNSPELL_WARNING(stderr, "error: line %d: bad entry nu
mber\n", af->getlinenum()); | |
| 1079 return 1; | |
| 1080 } | |
| 1081 aliasm = (char **) malloc(numaliasm * sizeof(char *)); | |
| 1082 if (!aliasm) { | |
| 1083 numaliasm = 0; | |
| 1084 return 1; | |
| 1085 } | |
| 1086 np++; | |
| 1087 break; | |
| 1088 } | |
| 1089 default: break; | |
| 1090 } | |
| 1091 i++; | |
| 1092 } | |
| 1093 piece = mystrsep(&tp, 0); | |
| 1094 } | |
| 1095 if (np != 2) { | |
| 1096 numaliasm = 0; | |
| 1097 free(aliasm); | |
| 1098 aliasm = NULL; | |
| 1099 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum(
)); | |
| 1100 return 1; | |
| 1101 } | |
| 1102 | |
| 1103 /* now parse the numaliasm lines to read in the remainder of the table */ | |
| 1104 char * nl = line; | |
| 1105 for (int j=0; j < numaliasm; j++) { | |
| 1106 if ((nl = af->getline()) == NULL) return 1; | |
| 1107 mychomp(nl); | |
| 1108 tp = nl; | |
| 1109 i = 0; | |
| 1110 aliasm[j] = NULL; | |
| 1111 piece = mystrsep(&tp, ' '); | |
| 1112 while (piece) { | |
| 1113 if (*piece != '\0') { | |
| 1114 switch(i) { | |
| 1115 case 0: { | |
| 1116 if (strncmp(piece,"AM",2) != 0) { | |
| 1117 HUNSPELL_WARNING(stderr, "error: line %d: table
is corrupt\n", af->getlinenum()); | |
| 1118 numaliasm = 0; | |
| 1119 free(aliasm); | |
| 1120 aliasm = NULL; | |
| 1121 return 1; | |
| 1122 } | |
| 1123 break; | |
| 1124 } | |
| 1125 case 1: { | |
| 1126 // add the remaining of the line | |
| 1127 if (*tp) { | |
| 1128 *(tp - 1) = ' '; | |
| 1129 tp = tp + strlen(tp); | |
| 1130 } | |
| 1131 if (complexprefixes) { | |
| 1132 if (utf8) reverseword_utf(piece); | |
| 1133 else reverseword(piece); | |
| 1134 } | |
| 1135 aliasm[j] = mystrdup(piece); | |
| 1136 if (!aliasm[j]) { | |
| 1137 numaliasm = 0; | |
| 1138 free(aliasm); | |
| 1139 aliasm = NULL; | |
| 1140 return 1; | |
| 1141 } | |
| 1142 break; } | |
| 1143 default: break; | |
| 1144 } | |
| 1145 i++; | |
| 1146 } | |
| 1147 piece = mystrsep(&tp, ' '); | |
| 1148 } | |
| 1149 if (!aliasm[j]) { | |
| 1150 numaliasm = 0; | |
| 1151 free(aliasm); | |
| 1152 aliasm = NULL; | |
| 1153 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->
getlinenum()); | |
| 1154 return 1; | |
| 1155 } | |
| 1156 } | |
| 1157 return 0; | |
| 1158 } | |
| 1159 | |
| 1160 int HashMgr::is_aliasm() { | |
| 1161 return (aliasm != NULL); | |
| 1162 } | |
| 1163 | |
| 1164 char * HashMgr::get_aliasm(int index) { | |
| 1165 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; | |
| 1166 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); | |
| 1167 return NULL; | |
| 1168 } | |
| OLD | NEW |