| OLD | NEW |
| 1 #include "license.hunspell" | 1 #include "license.hunspell" |
| 2 #include "license.myspell" | 2 #include "license.myspell" |
| 3 | 3 |
| 4 #ifndef MOZILLA_CLIENT | 4 #ifndef MOZILLA_CLIENT |
| 5 #include <cstdlib> | 5 #include <cstdlib> |
| 6 #include <cstring> | 6 #include <cstring> |
| 7 #include <cstdio> | 7 #include <cstdio> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #else | 9 #else |
| 10 #include <stdlib.h> | 10 #include <stdlib.h> |
| 11 #include <string.h> | 11 #include <string.h> |
| 12 #include <stdio.h> | 12 #include <stdio.h> |
| 13 #include <ctype.h> | 13 #include <ctype.h> |
| 14 #endif | 14 #endif |
| 15 | 15 |
| 16 #include "hashmgr.hxx" | 16 #include "hashmgr.hxx" |
| 17 #include "csutil.hxx" | 17 #include "csutil.hxx" |
| 18 #include "atypes.hxx" | 18 #include "atypes.hxx" |
| 19 | 19 |
| 20 #ifdef MOZILLA_CLIENT | 20 #ifdef MOZILLA_CLIENT |
| 21 #ifdef __SUNPRO_CC // for SunONE Studio compiler | 21 #ifdef __SUNPRO_CC // for SunONE Studio compiler |
| 22 using namespace std; | 22 using namespace std; |
| 23 #endif | 23 #endif |
| 24 #else | 24 #else |
| 25 #ifndef W32 | 25 #ifndef WIN32 |
| 26 using namespace std; | 26 using namespace std; |
| 27 #endif | 27 #endif |
| 28 #endif | 28 #endif |
| 29 | 29 |
| 30 // build a hash table from a munched word list | 30 // build a hash table from a munched word list |
| 31 |
| 31 #ifdef HUNSPELL_CHROME_CLIENT | 32 #ifdef HUNSPELL_CHROME_CLIENT |
| 32 HashMgr::HashMgr(hunspell::BDictReader* reader) | 33 HashMgr::HashMgr(hunspell::BDictReader* reader) |
| 33 { | 34 { |
| 34 bdict_reader = reader; | 35 bdict_reader = reader; |
| 35 #else | 36 #else |
| 36 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) | 37 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle, const char * key) |
| 37 { | 38 { |
| 38 #endif | 39 #endif |
| 39 tablesize = 0; | 40 tablesize = 0; |
| 40 tableptr = NULL; | 41 tableptr = NULL; |
| 41 flag_mode = FLAG_CHAR; | 42 flag_mode = FLAG_CHAR; |
| 42 complexprefixes = 0; | 43 complexprefixes = 0; |
| 43 utf8 = 0; | 44 utf8 = 0; |
| 45 langnum = 0; |
| 46 lang = NULL; |
| 47 enc = NULL; |
| 48 csconv = 0; |
| 44 ignorechars = NULL; | 49 ignorechars = NULL; |
| 45 ignorechars_utf16 = NULL; | 50 ignorechars_utf16 = NULL; |
| 46 ignorechars_utf16_len = 0; | 51 ignorechars_utf16_len = 0; |
| 47 numaliasf = 0; | 52 numaliasf = 0; |
| 48 aliasf = NULL; | 53 aliasf = NULL; |
| 49 numaliasm = 0; | 54 numaliasm = 0; |
| 50 aliasm = NULL; | 55 aliasm = NULL; |
| 56 forbiddenword = FORBIDDENWORD; // forbidden word signing flag |
| 51 #ifdef HUNSPELL_CHROME_CLIENT | 57 #ifdef HUNSPELL_CHROME_CLIENT |
| 52 // No tables to load, just the AF config. | 58 // No tables to load, just the AF config. |
| 53 int ec = load_config(); | 59 int ec = load_config(); |
| 54 #else | 60 #else |
| 55 load_config(aff_handle); | 61 load_config(aff_handle); |
| 56 int ec = load_tables(dic_handle); | 62 int ec = load_tables(dic_handle, key); |
| 57 #endif | 63 #endif |
| 58 if (ec) { | 64 if (ec) { |
| 59 /* error condition - what should we do here */ | 65 /* error condition - what should we do here */ |
| 60 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); | 66 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); |
| 61 if (tableptr) { | 67 if (tableptr) { |
| 62 free(tableptr); | 68 free(tableptr); |
| 63 tableptr = NULL; | 69 tableptr = NULL; |
| 64 } | 70 } |
| 65 tablesize = 0; | 71 tablesize = 0; |
| 66 } | 72 } |
| 67 } | 73 } |
| 68 | 74 |
| 69 | 75 |
| 70 HashMgr::~HashMgr() | 76 HashMgr::~HashMgr() |
| 71 { | 77 { |
| 72 if (tableptr) { | 78 if (tableptr) { |
| 73 // now pass through hash table freeing up everything | 79 // now pass through hash table freeing up everything |
| 74 // go through column by column of the table | 80 // go through column by column of the table |
| 75 for (int i=0; i < tablesize; i++) { | 81 for (int i=0; i < tablesize; i++) { |
| 76 struct hentry * pt = &tableptr[i]; | 82 struct hentry * pt = tableptr[i]; |
| 77 struct hentry * nt = NULL; | 83 struct hentry * nt = NULL; |
| 78 if (pt) { | |
| 79 if (pt->astr && !aliasf) free(pt->astr); | |
| 80 if (pt->word) free(pt->word); | |
| 81 #ifdef HUNSPELL_EXPERIMENTAL | |
| 82 if (pt->description && !aliasm) free(pt->description); | |
| 83 #endif | |
| 84 pt = pt->next; | |
| 85 } | |
| 86 while(pt) { | 84 while(pt) { |
| 87 nt = pt->next; | 85 nt = pt->next; |
| 88 if (pt->astr && !aliasf) free(pt->astr); | 86 if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))
) free(pt->astr); |
| 89 if (pt->word) free(pt->word); | |
| 90 #ifdef HUNSPELL_EXPERIMENTAL | |
| 91 if (pt->description && !aliasm) free(pt->description); | |
| 92 #endif | |
| 93 free(pt); | 87 free(pt); |
| 94 pt = nt; | 88 pt = nt; |
| 95 } | 89 } |
| 96 } | 90 } |
| 97 free(tableptr); | 91 free(tableptr); |
| 98 tableptr = NULL; | |
| 99 } | 92 } |
| 100 tablesize = 0; | 93 tablesize = 0; |
| 101 | 94 |
| 102 if (aliasf) { | 95 if (aliasf) { |
| 103 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); | 96 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); |
| 104 free(aliasf); | 97 free(aliasf); |
| 105 aliasf = NULL; | 98 aliasf = NULL; |
| 106 if (aliasflen) { | 99 if (aliasflen) { |
| 107 free(aliasflen); | 100 free(aliasflen); |
| 108 aliasflen = NULL; | 101 aliasflen = NULL; |
| 109 } | 102 } |
| 110 } | 103 } |
| 111 if (aliasm) { | 104 if (aliasm) { |
| 112 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); | 105 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); |
| 113 free(aliasm); | 106 free(aliasm); |
| 114 aliasm = NULL; | 107 aliasm = NULL; |
| 115 } | 108 } |
| 109 |
| 110 #ifndef OPENOFFICEORG |
| 111 #ifndef MOZILLA_CLIENT |
| 112 if (utf8) free_utf_tbl(); |
| 113 #endif |
| 114 #endif |
| 115 |
| 116 if (enc) free(enc); |
| 117 if (lang) free(lang); |
| 116 | 118 |
| 117 if (ignorechars) free(ignorechars); | 119 if (ignorechars) free(ignorechars); |
| 118 if (ignorechars_utf16) free(ignorechars_utf16); | 120 if (ignorechars_utf16) free(ignorechars_utf16); |
| 119 | 121 |
| 120 #ifdef HUNSPELL_CHROME_CLIENT | 122 #ifdef HUNSPELL_CHROME_CLIENT |
| 121 EmptyHentryCache(); | 123 EmptyHentryCache(); |
| 122 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); | 124 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); |
| 123 it != pointer_to_strings_.end(); ++it) { | 125 it != pointer_to_strings_.end(); ++it) { |
| 124 delete *it; | 126 delete *it; |
| 125 } | 127 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 137 hentry* next = cur->next_homonym; | 139 hentry* next = cur->next_homonym; |
| 138 delete cur; | 140 delete cur; |
| 139 cur = next; | 141 cur = next; |
| 140 } | 142 } |
| 141 } | 143 } |
| 142 hentry_cache.clear(); | 144 hentry_cache.clear(); |
| 143 } | 145 } |
| 144 #endif | 146 #endif |
| 145 | 147 |
| 146 // lookup a root word in the hashtable | 148 // lookup a root word in the hashtable |
| 147 | |
| 148 struct hentry * HashMgr::lookup(const char *word) const | 149 struct hentry * HashMgr::lookup(const char *word) const |
| 149 { | 150 { |
| 150 #ifdef HUNSPELL_CHROME_CLIENT | 151 #ifdef HUNSPELL_CHROME_CLIENT |
| 151 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; | 152 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
| 152 int affix_count = bdict_reader->FindWord(word, affix_ids); | 153 int affix_count = bdict_reader->FindWord(word, affix_ids); |
| 153 if (affix_count == 0) { // look for custom added word | 154 if (affix_count == 0) { // look for custom added word |
| 154 std::map<StringPiece, int>::const_iterator iter = | 155 std::map<StringPiece, int>::const_iterator iter = |
| 155 custom_word_to_affix_id_map_.find(word); | 156 custom_word_to_affix_id_map_.find(word); |
| 156 if (iter != custom_word_to_affix_id_map_.end()) { | 157 if (iter != custom_word_to_affix_id_map_.end()) { |
| 157 affix_count = 1; | 158 affix_count = 1; |
| 158 affix_ids[0] = iter->second; | 159 affix_ids[0] = iter->second; |
| 159 } | 160 } |
| 160 } | 161 } |
| 161 | 162 |
| 162 static const int kMaxWordLen = 128; | 163 static const int kMaxWordLen = 128; |
| 163 static char word_buf[kMaxWordLen]; | 164 static char word_buf[kMaxWordLen]; |
| 164 strncpy(word_buf, word, kMaxWordLen); | 165 strncpy(word_buf, word, kMaxWordLen); |
| 165 | 166 |
| 166 return AffixIDsToHentry(word_buf, affix_ids, affix_count); | 167 return AffixIDsToHentry(word_buf, affix_ids, affix_count); |
| 167 #else | 168 #else |
| 168 struct hentry * dp; | 169 struct hentry * dp; |
| 169 if (tableptr) { | 170 if (tableptr) { |
| 170 dp = &tableptr[hash(word)]; | 171 dp = tableptr[hash(word)]; |
| 171 if (dp->word == NULL) return NULL; | 172 if (!dp) return NULL; |
| 172 for ( ; dp != NULL; dp = dp->next) { | 173 for ( ; dp != NULL; dp = dp->next) { |
| 173 if (strcmp(word,dp->word) == 0) return dp; | 174 if (strcmp(word,&(dp->word)) == 0) return dp; |
| 174 } | 175 } |
| 175 } | 176 } |
| 176 return NULL; | 177 return NULL; |
| 177 #endif | 178 #endif |
| 178 } | 179 } |
| 179 | 180 |
| 180 // add a word to the hash table (private) | 181 // add a word to the hash table (private) |
| 181 | 182 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, |
| 182 int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, c
onst char * desc) | 183 int al, const char * desc, bool onlyupcase) |
| 183 { | 184 { |
| 184 #ifndef HUNSPELL_CHROME_CLIENT | 185 #ifndef HUNSPELL_CHROME_CLIENT |
| 185 char * st = mystrdup(word); | 186 bool upcasehomonym = false; |
| 186 if (wl && !st) return 1; | 187 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; |
| 188 // variable-length hash record with word and optional fields |
| 189 struct hentry* hp = |
| 190 » (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); |
| 191 if (!hp) return 1; |
| 192 char * hpw = &(hp->word); |
| 193 strcpy(hpw, word); |
| 187 if (ignorechars != NULL) { | 194 if (ignorechars != NULL) { |
| 188 if (utf8) { | 195 if (utf8) { |
| 189 remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len); | 196 remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); |
| 190 } else { | 197 } else { |
| 191 remove_ignored_chars(st, ignorechars); | 198 remove_ignored_chars(hpw, ignorechars); |
| 192 } | 199 } |
| 193 } | 200 } |
| 194 if (complexprefixes) { | 201 if (complexprefixes) { |
| 195 if (utf8) reverseword_utf(st); else reverseword(st); | 202 if (utf8) reverseword_utf(hpw); else reverseword(hpw); |
| 196 } | 203 } |
| 197 int i = hash(st); | 204 |
| 198 struct hentry * dp = &tableptr[i]; | 205 int i = hash(hpw); |
| 199 if (dp->word == NULL) { | 206 |
| 200 dp->wlen = (short) wl; | 207 hp->blen = (unsigned char) wbl; |
| 201 dp->alen = (short) al; | 208 hp->clen = (unsigned char) wcl; |
| 202 dp->word = st; | 209 hp->alen = (short) al; |
| 203 dp->astr = aff; | 210 hp->astr = aff; |
| 204 dp->next = NULL; | 211 hp->next = NULL; |
| 205 dp->next_homonym = NULL; | 212 hp->next_homonym = NULL; |
| 206 #ifdef HUNSPELL_EXPERIMENTAL | 213 |
| 207 if (aliasm) { | 214 // store the description string or its pointer |
| 208 dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); | 215 if (desc) { |
| 209 } else { | 216 hp->var = H_OPT; |
| 210 dp->description = mystrdup(desc); | 217 if (aliasm) { |
| 211 if (desc && !dp->description) return 1; | 218 hp->var += H_OPT_ALIASM; |
| 212 if (dp->description && complexprefixes) { | 219 store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); |
| 213 if (utf8) reverseword_utf(dp->description); else reverseword(dp-
>description); | 220 } else { |
| 221 » strcpy(hpw + wbl + 1, desc); |
| 222 if (complexprefixes) { |
| 223 if (utf8) reverseword_utf(HENTRY_DATA(hp)); |
| 224 else reverseword(HENTRY_DATA(hp)); |
| 225 } |
| 226 } |
| 227 » if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; |
| 228 } else hp->var = 0; |
| 229 |
| 230 struct hentry * dp = tableptr[i]; |
| 231 if (!dp) { |
| 232 tableptr[i] = hp; |
| 233 return 0; |
| 234 } |
| 235 while (dp->next != NULL) { |
| 236 if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) { |
| 237 » // remove hidden onlyupcase homonym |
| 238 if (!onlyupcase) { |
| 239 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { |
| 240 » » free(dp->astr); |
| 241 » » dp->astr = hp->astr; |
| 242 » » dp->alen = hp->alen; |
| 243 » » free(hp); |
| 244 » » return 0; |
| 245 » » } else { |
| 246 » » dp->next_homonym = hp; |
| 247 » » } |
| 248 } else { |
| 249 » upcasehomonym = true; |
| 250 } |
| 251 } |
| 252 dp=dp->next; |
| 253 } |
| 254 if (strcmp(&(hp->word), &(dp->word)) == 0) { |
| 255 » // remove hidden onlyupcase homonym |
| 256 if (!onlyupcase) { |
| 257 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { |
| 258 » » free(dp->astr); |
| 259 » » dp->astr = hp->astr; |
| 260 » » dp->alen = hp->alen; |
| 261 » » free(hp); |
| 262 » » return 0; |
| 263 » » } else { |
| 264 » » dp->next_homonym = hp; |
| 265 » » } |
| 266 } else { |
| 267 » upcasehomonym = true; |
| 214 } | 268 } |
| 215 } | 269 } |
| 216 #endif | 270 if (!upcasehomonym) { |
| 217 } else { | 271 » dp->next = hp; |
| 218 struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry)); | |
| 219 if (!hp) return 1; | |
| 220 hp->wlen = (short) wl; | |
| 221 hp->alen = (short) al; | |
| 222 hp->word = st; | |
| 223 hp->astr = aff; | |
| 224 hp->next = NULL; | |
| 225 hp->next_homonym = NULL; | |
| 226 #ifdef HUNSPELL_EXPERIMENTAL | |
| 227 if (aliasm) { | |
| 228 hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); | |
| 229 } else { | 272 } else { |
| 230 hp->description = mystrdup(desc); | 273 » // remove hidden onlyupcase homonym |
| 231 if (desc && !hp->description) return 1; | 274 » if (hp->astr) free(hp->astr); |
| 232 if (dp->description && complexprefixes) { | 275 » free(hp); |
| 233 if (utf8) reverseword_utf(hp->description); else reverseword(hp-
>description); | |
| 234 } | |
| 235 } | 276 } |
| 236 #endif | |
| 237 while (dp->next != NULL) { | |
| 238 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_
homonym = hp; | |
| 239 dp=dp->next; | |
| 240 } | |
| 241 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_ho
monym = hp; | |
| 242 dp->next = hp; | |
| 243 } | |
| 244 #endif // HUNSPELL_CHROME_CLIENT | 277 #endif // HUNSPELL_CHROME_CLIENT |
| 245 std::map<StringPiece, int>::iterator iter = | 278 std::map<StringPiece, int>::iterator iter = |
| 246 custom_word_to_affix_id_map_.find(word); | 279 custom_word_to_affix_id_map_.find(word); |
| 247 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added | 280 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added |
| 248 std::string* new_string_word = new std::string(word); | 281 std::string* new_string_word = new std::string(word); |
| 249 pointer_to_strings_.push_back(new_string_word); | 282 pointer_to_strings_.push_back(new_string_word); |
| 250 StringPiece sp(*(new_string_word)); | 283 StringPiece sp(*(new_string_word)); |
| 251 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words | 284 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words |
| 252 return 1; | 285 return 1; |
| 253 } | 286 } |
| 254 | 287 |
| 255 return 0; | 288 return 0; |
| 256 } | 289 } |
| 257 | 290 |
| 258 // add a custom dic. word to the hash table (public) | 291 int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, |
| 259 int HashMgr::put_word(const char * word, int wl, char * aff) | 292 unsigned short * flags, int al, char * dp, int captype) |
| 260 { | 293 { |
| 261 unsigned short * flags; | 294 // add inner capitalized forms to handle the following allcap forms: |
| 262 int al = 0; | 295 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG |
| 263 if (aff) { | 296 // Allcaps with suffixes: CIA's -> CIA'S |
| 264 al = decode_flags(&flags, aff); | 297 if (((captype == HUHCAP) || (captype == HUHINITCAP) || |
| 265 flag_qsort(flags, 0, al); | 298 ((captype == ALLCAP) && (flags != NULL))) && |
| 266 } else { | 299 !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { |
| 267 flags = NULL; | 300 unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned s
hort) * (al+1)); |
| 301 » if (!flags2) return 1; |
| 302 if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); |
| 303 flags2[al] = ONLYUPCASEFLAG; |
| 304 if (utf8) { |
| 305 char st[BUFSIZE]; |
| 306 w_char w[BUFSIZE]; |
| 307 int wlen = u8_u16(w, BUFSIZE, word); |
| 308 mkallsmall_utf(w, wlen, langnum); |
| 309 mkallcap_utf(w, 1, langnum); |
| 310 u16_u8(st, BUFSIZE, w, wlen); |
| 311 return add_word(st,wbl,wcl,flags2,al+1,dp, true); |
| 312 } else { |
| 313 mkallsmall(word, csconv); |
| 314 mkinitcap(word, csconv); |
| 315 return add_word(word,wbl,wcl,flags2,al+1,dp, true); |
| 316 } |
| 268 } | 317 } |
| 269 add_word(word, wl, flags, al, NULL); | |
| 270 return 0; | 318 return 0; |
| 271 } | 319 } |
| 272 | 320 |
| 273 int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern) | 321 // detect captype and modify word length for UTF-8 encoding |
| 322 int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { |
| 323 int len; |
| 324 if (utf8) { |
| 325 w_char dest_utf[BUFSIZE]; |
| 326 len = u8_u16(dest_utf, BUFSIZE, word); |
| 327 *captype = get_captype_utf8(dest_utf, len, langnum); |
| 328 } else { |
| 329 len = wbl; |
| 330 *captype = get_captype((char *) word, len, csconv); |
| 331 } |
| 332 return len; |
| 333 } |
| 334 |
| 335 // remove word (personal dictionary function for standalone applications) |
| 336 int HashMgr::remove(const char * word) |
| 274 { | 337 { |
| 275 unsigned short * flags; | 338 struct hentry * dp = lookup(word); |
| 276 struct hentry * dp = lookup(pattern); | 339 while (dp) { |
| 277 if (!dp || !dp->astr) return 1; | 340 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { |
| 278 flags = (unsigned short *) malloc (dp->alen * sizeof(short)); | 341 unsigned short * flags = |
| 279 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); | 342 (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1)); |
| 280 add_word(word, wl, flags, dp->alen, NULL); | 343 if (!flags) return 1; |
| 344 for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i]; |
| 345 flags[dp->alen] = forbiddenword; |
| 346 dp->astr = flags; |
| 347 dp->alen++; |
| 348 flag_qsort(flags, 0, dp->alen); |
| 349 } |
| 350 dp = dp->next_homonym; |
| 351 } |
| 281 return 0; | 352 return 0; |
| 282 } | 353 } |
| 283 | 354 |
| 355 /* remove forbidden flag to add a personal word to the hash */ |
| 356 int HashMgr::remove_forbidden_flag(const char * word) { |
| 357 struct hentry * dp = lookup(word); |
| 358 if (!dp) return 1; |
| 359 while (dp) { |
| 360 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { |
| 361 if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal
dic. |
| 362 else { |
| 363 unsigned short * flags2 = |
| 364 (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1)); |
| 365 if (!flags2) return 1; |
| 366 int i, j = 0; |
| 367 for (i = 0; i < dp->alen; i++) { |
| 368 if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i]; |
| 369 } |
| 370 dp->alen--; |
| 371 dp->astr = flags2; // XXX allowed forbidden words |
| 372 } |
| 373 } |
| 374 dp = dp->next_homonym; |
| 375 } |
| 376 return 0; |
| 377 } |
| 378 |
| 379 // add a custom dic. word to the hash table (public) |
| 380 int HashMgr::add(const char * word) |
| 381 { |
| 382 unsigned short * flags = NULL; |
| 383 int al = 0; |
| 384 if (remove_forbidden_flag(word)) { |
| 385 int captype; |
| 386 int wbl = strlen(word); |
| 387 int wcl = get_clen_and_captype(word, wbl, &captype); |
| 388 add_word(word, wbl, wcl, flags, al, NULL, false); |
| 389 return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, N
ULL, captype); |
| 390 } |
| 391 return 0; |
| 392 } |
| 393 |
| 394 int HashMgr::add_with_affix(const char * word, const char * example) |
| 395 { |
| 396 // detect captype and modify word length for UTF-8 encoding |
| 397 struct hentry * dp = lookup(example); |
| 398 remove_forbidden_flag(word); |
| 399 if (dp && dp->astr) { |
| 400 int captype; |
| 401 int wbl = strlen(word); |
| 402 int wcl = get_clen_and_captype(word, wbl, &captype); |
| 403 if (aliasf) { |
| 404 add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); |
| 405 } else { |
| 406 unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeo
f(short)); |
| 407 if (flags) { |
| 408 memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(shor
t)); |
| 409 add_word(word, wbl, wcl, flags, dp->alen, NULL, false); |
| 410 } else return 1; |
| 411 } |
| 412 return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp
->alen, NULL, captype); |
| 413 } |
| 414 return 1; |
| 415 } |
| 416 |
| 284 // walk the hash table entry by entry - null at end | 417 // walk the hash table entry by entry - null at end |
| 418 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); |
| 285 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const | 419 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const |
| 286 { | 420 { |
| 287 #ifdef HUNSPELL_CHROME_CLIENT | 421 #ifdef HUNSPELL_CHROME_CLIENT |
| 288 // This function creates a new hentry if NULL is passed as hp. It also takes | 422 // This function creates a new hentry if NULL is passed as hp. It also takes |
| 289 // the responsibility of deleting the pointer hp when walk is over. | 423 // the responsibility of deleting the pointer hp when walk is over. |
| 290 | 424 |
| 291 // This function is only ever called by one place and not nested. We can | 425 // This function is only ever called by one place and not nested. We can |
| 292 // therefore keep static state between calls and use |col| as a "reset" flag | 426 // therefore keep static state between calls and use |col| as a "reset" flag |
| 293 // to avoid changing the API. It is set to -1 for the first call. | 427 // to avoid changing the API. It is set to -1 for the first call. |
| 294 static hunspell::WordIterator word_iterator = | 428 static hunspell::WordIterator word_iterator = |
| (...skipping 10 matching lines...) Expand all Loading... |
| 305 if (affix_count == 0) { | 439 if (affix_count == 0) { |
| 306 delete hp; | 440 delete hp; |
| 307 return NULL; | 441 return NULL; |
| 308 } | 442 } |
| 309 short word_len = static_cast<short>(strlen(word)); | 443 short word_len = static_cast<short>(strlen(word)); |
| 310 | 444 |
| 311 // For now, just re-compute the |hp| and return it. No need to create linked | 445 // For now, just re-compute the |hp| and return it. No need to create linked |
| 312 // lists for the extra affixes. If hp is NULL, create it here. | 446 // lists for the extra affixes. If hp is NULL, create it here. |
| 313 if (!hp) | 447 if (!hp) |
| 314 hp = new hentry; | 448 hp = new hentry; |
| 315 hp->word = word; | 449 hp->word = *word; |
| 316 hp->wlen = word_len; | 450 hp->blen = word_len; |
| 317 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0], | 451 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0], |
| 318 &hp->astr); | 452 &hp->astr); |
| 319 hp->next = NULL; | 453 hp->next = NULL; |
| 320 hp->next_homonym = NULL; | 454 hp->next_homonym = NULL; |
| 321 | 455 hp->var = 0; |
| 456 hp->clen = 0; |
| 322 return hp; | 457 return hp; |
| 323 #else | 458 #else |
| 324 //reset to start | 459 » |
| 325 if ((col < 0) || (hp == NULL)) { | 460 if (hp && hp->next != NULL) return hp->next; |
| 326 col = -1; | 461 for (col++; col < tablesize; col++) { |
| 327 hp = NULL; | 462 if (tableptr[col]) return tableptr[col]; |
| 328 } | 463 } |
| 329 | 464 // null at end and reset to start |
| 330 if (hp && hp->next != NULL) { | 465 col = -1; |
| 331 hp = hp->next; | 466 return NULL; |
| 332 } else { | |
| 333 col++; | |
| 334 hp = (col < tablesize) ? &tableptr[col] : NULL; | |
| 335 // search for next non-blank column entry | |
| 336 while (hp && (hp->word == NULL)) { | |
| 337 col ++; | |
| 338 hp = (col < tablesize) ? &tableptr[col] : NULL; | |
| 339 } | |
| 340 if (col < tablesize) return hp; | |
| 341 hp = NULL; | |
| 342 col = -1; | |
| 343 } | |
| 344 return hp; | |
| 345 #endif | 467 #endif |
| 346 } | 468 } |
| 347 | 469 |
| 348 // load a munched word list and build a hash table on the fly | 470 // load a munched word list and build a hash table on the fly |
| 349 int HashMgr::load_tables(FILE* t_handle) | 471 int HashMgr::load_tables(FILE* t_handle, const char * key) |
| 350 { | 472 { |
| 351 #ifndef HUNSPELL_CHROME_CLIENT | 473 #ifndef HUNSPELL_CHROME_CLIENT |
| 352 int wl, al; | 474 int al; |
| 353 char * ap; | 475 char * ap; |
| 354 char * dp; | 476 char * dp; |
| 477 char * dp2; |
| 355 unsigned short * flags; | 478 unsigned short * flags; |
| 479 char * ts; |
| 356 | 480 |
| 357 // raw dictionary - munched file | 481 // open dictionary file |
| 358 FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); | 482 FileMgr * dict = new FileMgr(tpath, key); |
| 359 if (rawdict == NULL) return 1; | 483 if (dict == NULL) return 1; |
| 360 fseek(rawdict, 0, SEEK_SET); | |
| 361 | 484 |
| 362 // first read the first line of file to get hash table size */ | 485 // first read the first line of file to get hash table size */ |
| 363 char ts[MAXDELEN]; | 486 if (!(ts = dict->getline())) { |
| 364 if (! fgets(ts, MAXDELEN-1,rawdict)) return 2; | 487 HUNSPELL_WARNING(stderr, "error: empty dic file\n"); |
| 488 delete dict; |
| 489 return 2; |
| 490 } |
| 365 mychomp(ts); | 491 mychomp(ts); |
| 366 | 492 |
| 367 /* remove byte order mark */ | 493 /* remove byte order mark */ |
| 368 if (strncmp(ts,"\xef\xbb\xbf",3) == 0) { | 494 if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) { |
| 369 memmove(ts, ts+3, strlen(ts+3)+1); | 495 memmove(ts, ts+3, strlen(ts+3)+1); |
| 370 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos
sible incompatibility with old Hunspell versions\n"); | 496 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos
sible incompatibility with old Hunspell versions\n"); |
| 371 } | 497 } |
| 372 | 498 |
| 373 if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word
count in dictionary file\n"); | |
| 374 tablesize = atoi(ts); | 499 tablesize = atoi(ts); |
| 375 if (!tablesize) return 4; | 500 if (tablesize == 0) { |
| 501 HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the di
c file\n"); |
| 502 delete dict; |
| 503 return 4; |
| 504 } |
| 376 tablesize = tablesize + 5 + USERWORD; | 505 tablesize = tablesize + 5 + USERWORD; |
| 377 if ((tablesize %2) == 0) tablesize++; | 506 if ((tablesize %2) == 0) tablesize++; |
| 378 | 507 |
| 379 // allocate the hash table | 508 // allocate the hash table |
| 380 tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry)); | 509 tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); |
| 381 if (! tableptr) return 3; | 510 if (! tableptr) { |
| 382 for (int i=0; i<tablesize; i++) tableptr[i].word = NULL; | 511 delete dict; |
| 512 return 3; |
| 513 } |
| 514 for (int i=0; i<tablesize; i++) tableptr[i] = NULL; |
| 383 | 515 |
| 384 // loop through all words on much list and add to hash | 516 // loop through all words on much list and add to hash |
| 385 // table and create word and affix strings | 517 // table and create word and affix strings |
| 386 | 518 |
| 387 while (fgets(ts,MAXDELEN-1,rawdict)) { | 519 while ((ts = dict->getline())) { |
| 388 mychomp(ts); | 520 mychomp(ts); |
| 389 // split each line into word and morphological description | 521 // split each line into word and morphological description |
| 390 dp = strchr(ts,'\t'); | 522 dp = ts; |
| 523 while ((dp = strchr(dp, ':'))) { |
| 524 » if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { |
| 525 » for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); |
| 526 » if (dp < ts) { // missing word |
| 527 » » dp = NULL; |
| 528 » } else { |
| 529 » » *(dp + 1) = '\0'; |
| 530 » » dp = dp + 2; |
| 531 » } |
| 532 » break; |
| 533 » } |
| 534 » dp++; |
| 535 } |
| 391 | 536 |
| 392 if (dp) { | 537 // tabulator is the old morphological field separator |
| 393 *dp = '\0'; | 538 dp2 = strchr(ts, '\t'); |
| 394 dp++; | 539 if (dp2 && (!dp || dp2 < dp)) { |
| 395 } else { | 540 » *dp2 = '\0'; |
| 396 dp = NULL; | 541 » dp = dp2 + 1; |
| 397 } | 542 } |
| 398 | 543 |
| 399 // split each line into word and affix char strings | 544 // split each line into word and affix char strings |
| 400 // "\/" signs slash in words (not affix separator) | 545 // "\/" signs slash in words (not affix separator) |
| 401 // "/" at beginning of the line is word character (not affix separator) | 546 // "/" at beginning of the line is word character (not affix separator) |
| 402 ap = strchr(ts,'/'); | 547 ap = strchr(ts,'/'); |
| 403 while (ap) { | 548 while (ap) { |
| 404 if (ap == ts) { | 549 if (ap == ts) { |
| 405 ap++; | 550 ap++; |
| 406 continue; | 551 continue; |
| 407 } else if (*(ap - 1) != '\\') break; | 552 } else if (*(ap - 1) != '\\') break; |
| 408 // replace "\/" with "/" | 553 // replace "\/" with "/" |
| 409 for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); | 554 for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); |
| 410 ap = strchr(ap,'/'); | 555 ap = strchr(ap,'/'); |
| 411 } | 556 } |
| 412 | 557 |
| 413 if (ap) { | 558 if (ap) { |
| 414 *ap = '\0'; | 559 *ap = '\0'; |
| 415 if (aliasf) { | 560 if (aliasf) { |
| 416 int index = atoi(ap + 1); | 561 int index = atoi(ap + 1); |
| 417 al = get_aliasf(index, &flags); | 562 al = get_aliasf(index, &flags, dict); |
| 418 if (!al) { | 563 if (!al) { |
| 419 HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts); | 564 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
dict->getlinenum()); |
| 420 *ap = '\0'; | 565 *ap = '\0'; |
| 421 } | 566 } |
| 422 } else { | 567 } else { |
| 423 al = decode_flags(&flags, ap + 1); | 568 al = decode_flags(&flags, ap + 1, dict); |
| 424 flag_qsort(flags, 0, al); | 569 flag_qsort(flags, 0, al); |
| 425 } | 570 } |
| 426 } else { | 571 } else { |
| 427 al = 0; | 572 al = 0; |
| 428 ap = NULL; | 573 ap = NULL; |
| 429 flags = NULL; | 574 flags = NULL; |
| 430 } | 575 } |
| 431 | 576 |
| 432 wl = strlen(ts); | 577 int captype; |
| 578 int wbl = strlen(ts); |
| 579 int wcl = get_clen_and_captype(ts, wbl, &captype); |
| 580 // add the word and its index plus its capitalized form optionally |
| 581 if (add_word(ts,wbl,wcl,flags,al,dp, false) || |
| 582 » add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { |
| 583 » delete dict; |
| 584 » return 5; |
| 585 } |
| 586 } |
| 433 | 587 |
| 434 // add the word and its index | 588 delete dict; |
| 435 if (add_word(ts,wl,flags,al,dp)) return 5; | |
| 436 | |
| 437 } | |
| 438 | |
| 439 fclose(rawdict); | |
| 440 #endif | 589 #endif |
| 441 return 0; | 590 return 0; |
| 442 } | 591 } |
| 443 | 592 |
| 444 | |
| 445 // the hash function is a simple load and rotate | 593 // the hash function is a simple load and rotate |
| 446 // algorithm borrowed | 594 // algorithm borrowed |
| 447 | 595 |
| 448 int HashMgr::hash(const char * word) const | 596 int HashMgr::hash(const char * word) const |
| 449 { | 597 { |
| 450 #ifdef HUNSPELL_CHROME_CLIENT | 598 #ifdef HUNSPELL_CHROME_CLIENT |
| 451 return 0; | 599 return 0; |
| 452 #else | 600 #else |
| 453 long hv = 0; | 601 long hv = 0; |
| 454 for (int i=0; i < 4 && *word != 0; i++) | 602 for (int i=0; i < 4 && *word != 0; i++) |
| 455 hv = (hv << 8) | (*word++); | 603 hv = (hv << 8) | (*word++); |
| 456 while (*word != 0) { | 604 while (*word != 0) { |
| 457 ROTATE(hv,ROTATE_LEN); | 605 ROTATE(hv,ROTATE_LEN); |
| 458 hv ^= (*word++); | 606 hv ^= (*word++); |
| 459 } | 607 } |
| 460 return (unsigned long) hv % tablesize; | 608 return (unsigned long) hv % tablesize; |
| 461 #endif | 609 #endif |
| 462 } | 610 } |
| 463 | 611 |
| 464 int HashMgr::decode_flags(unsigned short ** result, char * flags) { | 612 int HashMgr::decode_flags(unsigned short ** result, char * flags) { |
| 465 int len; | 613 int len; |
| 466 switch (flag_mode) { | 614 switch (flag_mode) { |
| 467 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) | 615 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) |
| 468 len = strlen(flags); | 616 len = strlen(flags); |
| 469 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG fla
gvector is odd: %s\n", flags); | 617 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: bad flagvector\n"); |
| 470 len = len/2; | 618 len /= 2; |
| 471 *result = (unsigned short *) malloc(len * sizeof(short)); | 619 *result = (unsigned short *) malloc(len * sizeof(short)); |
| 620 if (!*result) return -1; |
| 472 for (int i = 0; i < len; i++) { | 621 for (int i = 0; i < len; i++) { |
| 473 (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned sh
ort) flags[i * 2 + 1]; | 622 (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned sh
ort) flags[i * 2 + 1]; |
| 474 } | 623 } |
| 475 break; | 624 break; |
| 476 } | 625 } |
| 477 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452
1 23 233) | 626 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452
1 23 233) |
| 627 int i; |
| 478 len = 1; | 628 len = 1; |
| 479 char * src = flags; | 629 char * src = flags; |
| 480 unsigned short * dest; | 630 unsigned short * dest; |
| 481 char * p; | 631 char * p; |
| 482 for (p = flags; *p; p++) { | 632 for (p = flags; *p; p++) { |
| 483 if (*p == ',') len++; | 633 if (*p == ',') len++; |
| 484 } | 634 } |
| 485 *result = (unsigned short *) malloc(len * sizeof(short)); | 635 *result = (unsigned short *) malloc(len * sizeof(short)); |
| 636 if (!*result) return -1; |
| 486 dest = *result; | 637 dest = *result; |
| 487 for (p = flags; *p; p++) { | 638 for (p = flags; *p; p++) { |
| 488 if (*p == ',') { | 639 if (*p == ',') { |
| 489 *dest = (unsigned short) atoi(src); | 640 i = atoi(src); |
| 641 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d i
s too large (max: %d)\n", i, DEFAULTFLAGS - 1); |
| 642 *dest = (unsigned short) i; |
| 490 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\
n"); | 643 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\
n"); |
| 491 src = p + 1; | 644 src = p + 1; |
| 492 dest++; | 645 dest++; |
| 493 } | 646 } |
| 494 } | 647 } |
| 495 *dest = (unsigned short) atoi(src); | 648 i = atoi(src); |
| 649 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to
o large (max: %d)\n", i, DEFAULTFLAGS - 1); |
| 650 *dest = (unsigned short) i; |
| 496 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); | 651 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); |
| 497 break; | 652 break; |
| 498 } | 653 } |
| 499 case FLAG_UNI: { // UTF-8 characters | 654 case FLAG_UNI: { // UTF-8 characters |
| 500 w_char w[MAXDELEN/2]; | 655 w_char w[BUFSIZE/2]; |
| 501 len = u8_u16(w, MAXDELEN/2, flags); | 656 len = u8_u16(w, BUFSIZE/2, flags); |
| 502 *result = (unsigned short *) malloc(len * sizeof(short)); | 657 *result = (unsigned short *) malloc(len * sizeof(short)); |
| 658 if (!*result) return -1; |
| 503 memcpy(*result, w, len * sizeof(short)); | 659 memcpy(*result, w, len * sizeof(short)); |
| 504 break; | 660 break; |
| 505 } | 661 } |
| 506 default: { // Ispell's one-character flags (erfg -> e r f g) | 662 default: { // Ispell's one-character flags (erfg -> e r f g) |
| 507 unsigned short * dest; | 663 unsigned short * dest; |
| 508 len = strlen(flags); | 664 len = strlen(flags); |
| 509 *result = (unsigned short *) malloc(len * sizeof(short)); | 665 *result = (unsigned short *) malloc(len * sizeof(short)); |
| 666 if (!*result) return -1; |
| 510 dest = *result; | 667 dest = *result; |
| 511 for (unsigned char * p = (unsigned char *) flags; *p; p++) { | 668 for (unsigned char * p = (unsigned char *) flags; *p; p++) { |
| 512 *dest = (unsigned short) *p; | 669 *dest = (unsigned short) *p; |
| 513 dest++; | 670 dest++; |
| 514 } | 671 } |
| 515 } | 672 } |
| 516 } | 673 } |
| 517 return len; | 674 return len; |
| 518 } | 675 } |
| 519 | 676 |
| 520 unsigned short HashMgr::decode_flag(const char * f) { | 677 unsigned short HashMgr::decode_flag(const char * f) { |
| 521 unsigned short s = 0; | 678 unsigned short s = 0; |
| 679 int i; |
| 522 switch (flag_mode) { | 680 switch (flag_mode) { |
| 523 case FLAG_LONG: | 681 case FLAG_LONG: |
| 524 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; | 682 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; |
| 525 break; | 683 break; |
| 526 case FLAG_NUM: | 684 case FLAG_NUM: |
| 527 s = (unsigned short) atoi(f); | 685 i = atoi(f); |
| 686 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to
o large (max: %d)\n", i, DEFAULTFLAGS - 1); |
| 687 s = (unsigned short) i; |
| 528 break; | 688 break; |
| 529 case FLAG_UNI: | 689 case FLAG_UNI: |
| 530 u8_u16((w_char *) &s, 1, f); | 690 u8_u16((w_char *) &s, 1, f); |
| 531 break; | 691 break; |
| 532 default: | 692 default: |
| 533 s = (unsigned short) *((unsigned char *)f); | 693 s = (unsigned short) *((unsigned char *)f); |
| 534 } | 694 } |
| 535 if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); | 695 if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); |
| 536 return s; | 696 return s; |
| 537 } | 697 } |
| 538 | 698 |
| 539 char * HashMgr::encode_flag(unsigned short f) { | 699 char * HashMgr::encode_flag(unsigned short f) { |
| 540 unsigned char ch[10]; | 700 unsigned char ch[10]; |
| 541 if (f==0) return mystrdup("(NULL)"); | 701 if (f==0) return mystrdup("(NULL)"); |
| 542 if (flag_mode == FLAG_LONG) { | 702 if (flag_mode == FLAG_LONG) { |
| 543 ch[0] = (unsigned char) (f >> 8); | 703 ch[0] = (unsigned char) (f >> 8); |
| 544 ch[1] = (unsigned char) (f - ((f >> 8) << 8)); | 704 ch[1] = (unsigned char) (f - ((f >> 8) << 8)); |
| 545 ch[2] = '\0'; | 705 ch[2] = '\0'; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 562 // Read in the regular commands from the affix file. We care about the FLAG | 722 // Read in the regular commands from the affix file. We care about the FLAG |
| 563 // line becuase the AF lines depend on this value, and the IGNORE line. | 723 // line becuase the AF lines depend on this value, and the IGNORE line. |
| 564 // The rest of the commands will be read by the affix manager. | 724 // The rest of the commands will be read by the affix manager. |
| 565 char line[MAXDELEN+1]; | 725 char line[MAXDELEN+1]; |
| 566 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); | 726 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); |
| 567 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { | 727 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { |
| 568 // Parse in the ignored characters (for example, Arabic optional | 728 // Parse in the ignored characters (for example, Arabic optional |
| 569 // diacritics characters. | 729 // diacritics characters. |
| 570 if (strncmp(line,"IGNORE",6) == 0) { | 730 if (strncmp(line,"IGNORE",6) == 0) { |
| 571 parse_array(line, &ignorechars, &ignorechars_utf16, | 731 parse_array(line, &ignorechars, &ignorechars_utf16, |
| 572 &ignorechars_utf16_len, "IGNORE", utf8); | 732 &ignorechars_utf16_len, utf8, 0); |
| 573 } | 733 } |
| 574 // Retrieve the format of an AF line. | 734 // Retrieve the format of an AF line. |
| 575 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { | 735 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { |
| 576 if (strstr(line, "long")) flag_mode = FLAG_LONG; | 736 if (strstr(line, "long")) flag_mode = FLAG_LONG; |
| 577 if (strstr(line, "num")) flag_mode = FLAG_NUM; | 737 if (strstr(line, "num")) flag_mode = FLAG_NUM; |
| 578 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; | 738 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; |
| 579 } | 739 } |
| 580 } | 740 } |
| 581 | 741 |
| 582 // Read in all the AF lines which tell us the rules for each affix group ID. | 742 // Read in all the AF lines which tell us the rules for each affix group ID. |
| 583 iterator = bdict_reader->GetAfLineIterator(); | 743 iterator = bdict_reader->GetAfLineIterator(); |
| 584 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { | 744 while (iterator.AdvanceAndCopy(line, MAXDELEN)) { |
| 585 int rv = parse_aliasf(line, &iterator); | 745 int rv = parse_aliasf(line, &iterator); |
| 586 if (rv) | 746 if (rv) |
| 587 return rv; | 747 return rv; |
| 588 } | 748 } |
| 589 | 749 |
| 590 return 0; | 750 return 0; |
| 591 } | 751 } |
| 592 #else | 752 #else |
| 593 // read in aff file and set flag mode | 753 // read in aff file and set flag mode |
| 594 int HashMgr::load_config(FILE* aff_handle) | 754 int HashMgr::load_config(FILE* aff_handle, const char * key) |
| 595 { | 755 { |
| 756 char * line; // io buffers |
| 596 int firstline = 1; | 757 int firstline = 1; |
| 597 | |
| 598 // io buffers | |
| 599 char line[MAXDELEN+1]; | |
| 600 | 758 |
| 601 // open the affix file | 759 // open the affix file |
| 602 FILE * afflst; | 760 FileMgr * afflst = new FileMgr(affpath, key); |
| 603 afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); | |
| 604 if (!afflst) { | 761 if (!afflst) { |
| 605 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); | 762 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); |
| 606 return 1; | 763 return 1; |
| 607 } | 764 } |
| 608 fseek(afflst, 0, SEEK_SET); | |
| 609 | 765 |
| 610 // read in each line ignoring any that do not | 766 // read in each line ignoring any that do not |
| 611 // start with a known line type indicator | 767 // start with a known line type indicator |
| 612 | 768 |
| 613 while (fgets(line,MAXDELEN,afflst)) { | 769 while ((line = afflst->getline())) { |
| 614 mychomp(line); | 770 mychomp(line); |
| 615 | 771 |
| 616 /* remove byte order mark */ | 772 /* remove byte order mark */ |
| 617 if (firstline) { | 773 if (firstline) { |
| 618 firstline = 0; | 774 firstline = 0; |
| 619 if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(l
ine+3)+1); | 775 if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(l
ine+3)+1); |
| 620 } | 776 } |
| 621 | 777 |
| 622 /* parse in the try string */ | 778 /* parse in the try string */ |
| 623 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { | 779 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { |
| 624 if (flag_mode != FLAG_CHAR) { | 780 if (flag_mode != FLAG_CHAR) { |
| 625 HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n"); | 781 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions o
f the FLAG affix file parameter\n", afflst->getlinenum()); |
| 626 } | 782 } |
| 627 if (strstr(line, "long")) flag_mode = FLAG_LONG; | 783 if (strstr(line, "long")) flag_mode = FLAG_LONG; |
| 628 if (strstr(line, "num")) flag_mode = FLAG_NUM; | 784 if (strstr(line, "num")) flag_mode = FLAG_NUM; |
| 629 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; | 785 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; |
| 630 if (flag_mode == FLAG_CHAR) { | 786 if (flag_mode == FLAG_CHAR) { |
| 631 HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF
-8' parameter: %s\n", line); | 787 HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `lon
g' or `UTF-8' parameter\n", afflst->getlinenum()); |
| 632 } | 788 } |
| 633 } | 789 } |
| 634 if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UT
F-8")) utf8 = 1; | 790 if (strncmp(line,"FORBIDDENWORD",13) == 0) { |
| 791 char * st = NULL; |
| 792 if (parse_string(line, &st, afflst->getlinenum())) { |
| 793 delete afflst; |
| 794 return 1; |
| 795 } |
| 796 forbiddenword = decode_flag(st); |
| 797 free(st); |
| 798 } |
| 799 if (strncmp(line, "SET", 3) == 0) { |
| 800 » if (parse_string(line, &enc, afflst->getlinenum())) { |
| 801 delete afflst; |
| 802 return 1; |
| 803 } » |
| 804 » if (strcmp(enc, "UTF-8") == 0) { |
| 805 » utf8 = 1; |
| 806 #ifndef OPENOFFICEORG |
| 807 #ifndef MOZILLA_CLIENT |
| 808 » initialize_utf_tbl(); |
| 809 #endif |
| 810 #endif |
| 811 » } else csconv = get_current_cs(enc); |
| 812 » } |
| 813 if (strncmp(line, "LANG", 4) == 0) { |
| 814 » if (parse_string(line, &lang, afflst->getlinenum())) { |
| 815 delete afflst; |
| 816 return 1; |
| 817 } » |
| 818 » langnum = get_lang_num(lang); |
| 819 » } |
| 635 | 820 |
| 636 /* parse in the ignored characters (for example, Arabic optional diacriti
cs characters */ | 821 /* parse in the ignored characters (for example, Arabic optional diacriti
cs characters */ |
| 637 if (strncmp(line,"IGNORE",6) == 0) { | 822 if (strncmp(line,"IGNORE",6) == 0) { |
| 638 if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_u
tf16_len, "IGNORE", utf8)) { | 823 if (parse_array(line, &ignorechars, &ignorechars_utf16, |
| 639 fclose(afflst); | 824 &ignorechars_utf16_len, utf8, afflst->getlinenum())) { |
| 825 delete afflst; |
| 640 return 1; | 826 return 1; |
| 641 } | 827 } |
| 642 } | 828 } |
| 643 | 829 |
| 644 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { | 830 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { |
| 645 if (parse_aliasf(line, afflst)) { | 831 if (parse_aliasf(line, afflst)) { |
| 646 fclose(afflst); | 832 delete afflst; |
| 647 return 1; | 833 return 1; |
| 648 } | 834 } |
| 649 } | 835 } |
| 650 | 836 |
| 651 #ifdef HUNSPELL_EXPERIMENTAL | |
| 652 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { | 837 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { |
| 653 if (parse_aliasm(line, afflst)) { | 838 if (parse_aliasm(line, afflst)) { |
| 654 fclose(afflst); | 839 delete afflst; |
| 655 return 1; | 840 return 1; |
| 656 } | 841 } |
| 657 } | 842 } |
| 658 #endif | 843 |
| 659 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; | 844 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; |
| 660 if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && is
space(line[3])) break; | 845 if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && iss
pace(line[3])) break; |
| 661 } | 846 } |
| 662 fclose(afflst); | 847 if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING); |
| 848 delete afflst; |
| 663 return 0; | 849 return 0; |
| 664 } | 850 } |
| 665 #endif // HUNSPELL_CHROME_CLIENT | 851 #endif // HUNSPELL_CHROME_CLIENT |
| 666 | 852 |
| 667 /* parse in the ALIAS table */ | 853 /* parse in the ALIAS table */ |
| 668 #ifdef HUNSPELL_CHROME_CLIENT | 854 #ifdef HUNSPELL_CHROME_CLIENT |
| 669 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator) | 855 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator) |
| 670 { | 856 { |
| 671 #else | 857 #else |
| 672 int HashMgr::parse_aliasf(char * line, FILE * af) | 858 int HashMgr::parse_aliasf(char * line, FileMgr * af) |
| 673 { | 859 { |
| 674 #endif | 860 #endif |
| 675 if (numaliasf != 0) { | 861 if (numaliasf != 0) { |
| 676 HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tabl
es used\n"); | 862 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n"); |
| 677 return 1; | 863 return 1; |
| 678 } | 864 } |
| 679 char * tp = line; | 865 char * tp = line; |
| 680 char * piece; | 866 char * piece; |
| 681 int i = 0; | 867 int i = 0; |
| 682 int np = 0; | 868 int np = 0; |
| 683 piece = mystrsep(&tp, 0); | 869 piece = mystrsep(&tp, 0); |
| 684 while (piece) { | 870 while (piece) { |
| 685 if (*piece != '\0') { | 871 if (*piece != '\0') { |
| 686 switch(i) { | 872 switch(i) { |
| 687 case 0: { np++; break; } | 873 case 0: { np++; break; } |
| 688 case 1: { | 874 case 1: { |
| 689 numaliasf = atoi(piece); | 875 numaliasf = atoi(piece); |
| 690 if (numaliasf < 1) { | 876 if (numaliasf < 1) { |
| 691 numaliasf = 0; | 877 numaliasf = 0; |
| 692 aliasf = NULL; | 878 aliasf = NULL; |
| 693 aliasflen = NULL; | 879 aliasflen = NULL; |
| 694 HUNSPELL_WARNING(stderr, "incorrect number of entries
in AF table\n"); | 880 HUNSPELL_WARNING(stderr, "error: bad entry number\n"); |
| 695 free(piece); | |
| 696 return 1; | 881 return 1; |
| 697 } | 882 } |
| 698 aliasf = (unsigned short **) malloc(numaliasf * sizeof(un
signed short *)); | 883 aliasf = (unsigned short **) malloc(numaliasf * sizeof(un
signed short *)); |
| 699 aliasflen = (unsigned short *) malloc(numaliasf * sizeof(
short)); | 884 aliasflen = (unsigned short *) malloc(numaliasf * sizeof(
short)); |
| 700 if (!aliasf || !aliasflen) { | 885 if (!aliasf || !aliasflen) { |
| 701 numaliasf = 0; | 886 numaliasf = 0; |
| 702 if (aliasf) free(aliasf); | 887 if (aliasf) free(aliasf); |
| 703 if (aliasflen) free(aliasflen); | 888 if (aliasflen) free(aliasflen); |
| 704 aliasf = NULL; | 889 aliasf = NULL; |
| 705 aliasflen = NULL; | 890 aliasflen = NULL; |
| 706 return 1; | 891 return 1; |
| 707 } | 892 } |
| 708 np++; | 893 np++; |
| 709 break; | 894 break; |
| 710 } | 895 } |
| 711 default: break; | 896 default: break; |
| 712 } | 897 } |
| 713 i++; | 898 i++; |
| 714 } | 899 } |
| 715 free(piece); | |
| 716 piece = mystrsep(&tp, 0); | 900 piece = mystrsep(&tp, 0); |
| 717 } | 901 } |
| 718 if (np != 2) { | 902 if (np != 2) { |
| 719 numaliasf = 0; | 903 numaliasf = 0; |
| 720 free(aliasf); | 904 free(aliasf); |
| 721 free(aliasflen); | 905 free(aliasflen); |
| 722 aliasf = NULL; | 906 aliasf = NULL; |
| 723 aliasflen = NULL; | 907 aliasflen = NULL; |
| 724 HUNSPELL_WARNING(stderr, "error: missing AF table information\n"); | 908 HUNSPELL_WARNING(stderr, "error: missing data\n"); |
| 725 return 1; | 909 return 1; |
| 726 } | 910 } |
| 727 | 911 |
| 728 /* now parse the numaliasf lines to read in the remainder of the table */ | 912 /* now parse the numaliasf lines to read in the remainder of the table */ |
| 729 char * nl = line; | 913 char * nl = line; |
| 730 for (int j=0; j < numaliasf; j++) { | 914 for (int j=0; j < numaliasf; j++) { |
| 731 #ifdef HUNSPELL_CHROME_CLIENT | 915 #ifdef HUNSPELL_CHROME_CLIENT |
| 732 if (!iterator->AdvanceAndCopy(nl, MAXDELEN)) | 916 if (!iterator->AdvanceAndCopy(nl, MAXDELEN)) |
| 733 return 1; | 917 return 1; |
| 734 #else | 918 #else |
| 735 if (!fgets(nl,MAXDELEN,af)) return 1; | 919 if (!(nl = af->getline())) return 1; |
| 736 #endif | 920 #endif |
| 737 mychomp(nl); | 921 » mychomp(nl); |
| 738 tp = nl; | 922 tp = nl; |
| 739 i = 0; | 923 i = 0; |
| 740 aliasf[j] = NULL; | 924 aliasf[j] = NULL; |
| 741 aliasflen[j] = 0; | 925 aliasflen[j] = 0; |
| 742 piece = mystrsep(&tp, 0); | 926 piece = mystrsep(&tp, 0); |
| 743 while (piece) { | 927 while (piece) { |
| 744 if (*piece != '\0') { | 928 if (*piece != '\0') { |
| 745 switch(i) { | 929 switch(i) { |
| 746 case 0: { | 930 case 0: { |
| 747 if (strncmp(piece,"AF",2) != 0) { | 931 if (strncmp(piece,"AF",2) != 0) { |
| 748 numaliasf = 0; | 932 numaliasf = 0; |
| 749 free(aliasf); | 933 free(aliasf); |
| 750 free(aliasflen); | 934 free(aliasflen); |
| 751 aliasf = NULL; | 935 aliasf = NULL; |
| 752 aliasflen = NULL; | 936 aliasflen = NULL; |
| 753 HUNSPELL_WARNING(stderr, "error: AF table is co
rrupt\n"); | 937 HUNSPELL_WARNING(stderr, "error: table is corru
pt\n"); |
| 754 free(piece); | |
| 755 return 1; | 938 return 1; |
| 756 } | 939 } |
| 757 break; | 940 break; |
| 758 } | 941 } |
| 759 case 1: { | 942 case 1: { |
| 760 aliasflen[j] = (unsigned short) decode_flags(&(alias
f[j]), piece); | 943 aliasflen[j] = (unsigned short) decode_flags(&(alias
f[j]), piece); |
| 761 flag_qsort(aliasf[j], 0, aliasflen[j]); | 944 flag_qsort(aliasf[j], 0, aliasflen[j]); |
| 762 break; | 945 break; |
| 763 } | 946 } |
| 764 default: break; | 947 default: break; |
| 765 } | 948 } |
| 766 i++; | 949 i++; |
| 767 } | 950 } |
| 768 free(piece); | |
| 769 piece = mystrsep(&tp, 0); | 951 piece = mystrsep(&tp, 0); |
| 770 } | 952 } |
| 771 if (!aliasf[j]) { | 953 if (!aliasf[j]) { |
| 772 free(aliasf); | 954 free(aliasf); |
| 773 free(aliasflen); | 955 free(aliasflen); |
| 774 aliasf = NULL; | 956 aliasf = NULL; |
| 775 aliasflen = NULL; | 957 aliasflen = NULL; |
| 776 numaliasf = 0; | 958 numaliasf = 0; |
| 777 HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); | 959 HUNSPELL_WARNING(stderr, "error: table is corrupt\n"); |
| 778 return 1; | 960 return 1; |
| 779 } | 961 } |
| 780 } | 962 } |
| 781 return 0; | 963 return 0; |
| 782 } | 964 } |
| 783 | 965 |
| 784 #ifdef HUNSPELL_CHROME_CLIENT | 966 #ifdef HUNSPELL_CHROME_CLIENT |
| 785 hentry* HashMgr::AffixIDsToHentry(char* word, | 967 hentry* HashMgr::AffixIDsToHentry(char* word, |
| 786 int* affix_ids, | 968 int* affix_ids, |
| 787 int affix_count) const | 969 int affix_count) const |
| (...skipping 15 matching lines...) Expand all Loading... |
| 803 | 985 |
| 804 // We can get a number of prefixes per word. There will normally be only one, | 986 // We can get a number of prefixes per word. There will normally be only one, |
| 805 // but if not, there will be a linked list of "hentry"s for the "homonym"s | 987 // but if not, there will be a linked list of "hentry"s for the "homonym"s |
| 806 // for the word. | 988 // for the word. |
| 807 struct hentry* first_he = NULL; | 989 struct hentry* first_he = NULL; |
| 808 struct hentry* prev_he = NULL; // For making linked list. | 990 struct hentry* prev_he = NULL; // For making linked list. |
| 809 for (int i = 0; i < affix_count; i++) { | 991 for (int i = 0; i < affix_count; i++) { |
| 810 struct hentry* he = new hentry; | 992 struct hentry* he = new hentry; |
| 811 if (i == 0) | 993 if (i == 0) |
| 812 first_he = he; | 994 first_he = he; |
| 813 he->word = word; | 995 he->word = *word; |
| 814 he->wlen = word_len; | 996 he->blen = word_len; |
| 815 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i], | 997 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i], |
| 816 &he->astr); | 998 &he->astr); |
| 817 he->next = NULL; | 999 he->next = NULL; |
| 818 he->next_homonym = NULL; | 1000 he->next_homonym = NULL; |
| 819 if (prev_he) | 1001 if (prev_he) |
| 820 prev_he->next_homonym = he; | 1002 prev_he->next_homonym = he; |
| 821 prev_he = he; | 1003 prev_he = he; |
| 822 } | 1004 } |
| 823 | 1005 |
| 824 cache[std_word] = first_he; // Save this word in the cache for later. | 1006 cache[std_word] = first_he; // Save this word in the cache for later. |
| (...skipping 22 matching lines...) Expand all Loading... |
| 847 int HashMgr::get_aliasf(int index, unsigned short ** fvec) { | 1029 int HashMgr::get_aliasf(int index, unsigned short ** fvec) { |
| 848 if ((index > 0) && (index <= numaliasf)) { | 1030 if ((index > 0) && (index <= numaliasf)) { |
| 849 *fvec = aliasf[index - 1]; | 1031 *fvec = aliasf[index - 1]; |
| 850 return aliasflen[index - 1]; | 1032 return aliasflen[index - 1]; |
| 851 } | 1033 } |
| 852 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index); | 1034 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index); |
| 853 *fvec = NULL; | 1035 *fvec = NULL; |
| 854 return 0; | 1036 return 0; |
| 855 } | 1037 } |
| 856 | 1038 |
| 857 #ifdef HUNSPELL_EXPERIMENTAL | |
| 858 /* parse morph alias definitions */ | 1039 /* parse morph alias definitions */ |
| 859 int HashMgr::parse_aliasm(char * line, FILE * af) | 1040 int HashMgr::parse_aliasm(char * line, FileMgr * af) |
| 860 { | 1041 { |
| 861 if (numaliasm != 0) { | 1042 if (numaliasm != 0) { |
| 862 HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological d
escriptions) tables used\n"); | 1043 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n"); |
| 863 return 1; | 1044 return 1; |
| 864 } | 1045 } |
| 865 char * tp = line; | 1046 char * tp = line; |
| 866 char * piece; | 1047 char * piece; |
| 867 int i = 0; | 1048 int i = 0; |
| 868 int np = 0; | 1049 int np = 0; |
| 869 piece = mystrsep(&tp, 0); | 1050 piece = mystrsep(&tp, 0); |
| 870 while (piece) { | 1051 while (piece) { |
| 871 if (*piece != '\0') { | 1052 if (*piece != '\0') { |
| 872 switch(i) { | 1053 switch(i) { |
| 873 case 0: { np++; break; } | 1054 case 0: { np++; break; } |
| 874 case 1: { | 1055 case 1: { |
| 875 numaliasm = atoi(piece); | 1056 numaliasm = atoi(piece); |
| 876 if (numaliasm < 1) { | 1057 if (numaliasm < 1) { |
| 877 HUNSPELL_WARNING(stderr, "incorrect number of entries
in AM table\n"); | 1058 HUNSPELL_WARNING(stderr, "error: line %d: bad entry nu
mber\n", af->getlinenum()); |
| 878 free(piece); | |
| 879 return 1; | 1059 return 1; |
| 880 } | 1060 } |
| 881 aliasm = (char **) malloc(numaliasm * sizeof(char *)); | 1061 aliasm = (char **) malloc(numaliasm * sizeof(char *)); |
| 882 if (!aliasm) { | 1062 if (!aliasm) { |
| 883 numaliasm = 0; | 1063 numaliasm = 0; |
| 884 return 1; | 1064 return 1; |
| 885 } | 1065 } |
| 886 np++; | 1066 np++; |
| 887 break; | 1067 break; |
| 888 } | 1068 } |
| 889 default: break; | 1069 default: break; |
| 890 } | 1070 } |
| 891 i++; | 1071 i++; |
| 892 } | 1072 } |
| 893 free(piece); | |
| 894 piece = mystrsep(&tp, 0); | 1073 piece = mystrsep(&tp, 0); |
| 895 } | 1074 } |
| 896 if (np != 2) { | 1075 if (np != 2) { |
| 897 numaliasm = 0; | 1076 numaliasm = 0; |
| 898 free(aliasm); | 1077 free(aliasm); |
| 899 aliasm = NULL; | 1078 aliasm = NULL; |
| 900 HUNSPELL_WARNING(stderr, "error: missing AM alias information\n"); | 1079 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum(
)); |
| 901 return 1; | 1080 return 1; |
| 902 } | 1081 } |
| 903 | 1082 |
| 904 /* now parse the numaliasm lines to read in the remainder of the table */ | 1083 /* now parse the numaliasm lines to read in the remainder of the table */ |
| 905 char * nl = line; | 1084 char * nl = line; |
| 906 for (int j=0; j < numaliasm; j++) { | 1085 for (int j=0; j < numaliasm; j++) { |
| 907 if (!fgets(nl,MAXDELEN,af)) return 1; | 1086 if (!(nl = af->getline())) return 1; |
| 908 mychomp(nl); | 1087 mychomp(nl); |
| 909 tp = nl; | 1088 tp = nl; |
| 910 i = 0; | 1089 i = 0; |
| 911 aliasm[j] = NULL; | 1090 aliasm[j] = NULL; |
| 912 piece = mystrsep(&tp, 0); | 1091 piece = mystrsep(&tp, ' '); |
| 913 while (piece) { | 1092 while (piece) { |
| 914 if (*piece != '\0') { | 1093 if (*piece != '\0') { |
| 915 switch(i) { | 1094 switch(i) { |
| 916 case 0: { | 1095 case 0: { |
| 917 if (strncmp(piece,"AM",2) != 0) { | 1096 if (strncmp(piece,"AM",2) != 0) { |
| 918 HUNSPELL_WARNING(stderr, "error: AM table is co
rrupt\n"); | 1097 HUNSPELL_WARNING(stderr, "error: line %d: table
is corrupt\n", af->getlinenum()); |
| 919 free(piece); | |
| 920 numaliasm = 0; | 1098 numaliasm = 0; |
| 921 free(aliasm); | 1099 free(aliasm); |
| 922 aliasm = NULL; | 1100 aliasm = NULL; |
| 923 return 1; | 1101 return 1; |
| 924 } | 1102 } |
| 925 break; | 1103 break; |
| 926 } | 1104 } |
| 927 case 1: { | 1105 case 1: { |
| 1106 // add the remaining of the line |
| 1107 if (*tp) { |
| 1108 *(tp - 1) = ' '; |
| 1109 tp = tp + strlen(tp); |
| 1110 } |
| 928 if (complexprefixes) { | 1111 if (complexprefixes) { |
| 929 if (utf8) reverseword_utf(piece); | 1112 if (utf8) reverseword_utf(piece); |
| 930 else reverseword(piece); | 1113 else reverseword(piece); |
| 931 } | 1114 } |
| 932 aliasm[j] = mystrdup(piece); | 1115 aliasm[j] = mystrdup(piece); |
| 1116 if (!aliasm[j]) { |
| 1117 numaliasm = 0; |
| 1118 free(aliasm); |
| 1119 aliasm = NULL; |
| 1120 return 1; |
| 1121 } |
| 933 break; } | 1122 break; } |
| 934 default: break; | 1123 default: break; |
| 935 } | 1124 } |
| 936 i++; | 1125 i++; |
| 937 } | 1126 } |
| 938 free(piece); | 1127 piece = mystrsep(&tp, ' '); |
| 939 piece = mystrsep(&tp, 0); | |
| 940 } | 1128 } |
| 941 if (!aliasm[j]) { | 1129 if (!aliasm[j]) { |
| 942 numaliasm = 0; | 1130 numaliasm = 0; |
| 943 free(aliasm); | 1131 free(aliasm); |
| 944 aliasm = NULL; | 1132 aliasm = NULL; |
| 945 HUNSPELL_WARNING(stderr, "error: map table is corrupt\n"); | 1133 HUNSPELL_WARNING(stderr, "error: table is corrupt\n"); |
| 946 return 1; | 1134 return 1; |
| 947 } | 1135 } |
| 948 } | 1136 } |
| 949 return 0; | 1137 return 0; |
| 950 } | 1138 } |
| 951 | 1139 |
| 952 int HashMgr::is_aliasm() { | 1140 int HashMgr::is_aliasm() { |
| 953 return (aliasm != NULL); | 1141 return (aliasm != NULL); |
| 954 } | 1142 } |
| 955 | 1143 |
| 956 char * HashMgr::get_aliasm(int index) { | 1144 char * HashMgr::get_aliasm(int index) { |
| 957 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; | 1145 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; |
| 958 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); | 1146 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); |
| 959 return NULL; | 1147 return NULL; |
| 960 } | 1148 } |
| 961 #endif | |
| OLD | NEW |