OLD | NEW |
| (Empty) |
1 #include "license.hunspell" | |
2 #include "license.myspell" | |
3 | |
4 #include <stdlib.h> | |
5 #include <string.h> | |
6 #include <stdio.h> | |
7 #include <ctype.h> | |
8 | |
9 #include "suggestmgr.hxx" | |
10 #include "htypes.hxx" | |
11 #include "csutil.hxx" | |
12 | |
13 const w_char W_VLINE = { '\0', '|' }; | |
14 | |
15 #ifdef HUNSPELL_CHROME_CLIENT | |
16 namespace { | |
17 // A simple class which creates temporary hentry objects which are available | |
18 // only in a scope. To conceal memory operations from SuggestMgr functions, | |
19 // this object automatically deletes all hentry objects created through | |
20 // CreateScopedHashEntry() calls in its destructor. So, the following snippet | |
21 // raises a memory error. | |
22 // | |
23 // hentry* bad_copy = NULL; | |
24 // { | |
25 // ScopedHashEntryFactory factory; | |
26 // hentry* scoped_copy = factory.CreateScopedHashEntry(0, source); | |
27 // ... | |
28 // bad_copy = scoped_copy; | |
29 // } | |
30 // if (bad_copy->word[0]) // memory for scoped_copy has been deleted! | |
31 // | |
32 // As listed in the above snippet, it is simple to use this class. | |
33 // 1. Declare an instance of this ScopedHashEntryFactory, and; | |
34 // 2. Call its CreateHashEntry() member instead of using 'new hentry' or | |
35 // 'operator='. | |
36 // | |
37 class ScopedHashEntryFactory { | |
38 public: | |
39 ScopedHashEntryFactory(); | |
40 ~ScopedHashEntryFactory(); | |
41 | |
42 // Creates a temporary copy of the given hentry struct. | |
43 // The returned copy is available only while this object is available. | |
44 // NOTE: this function just calls memcpy() in creating a copy of the given | |
45 // hentry struct, i.e. it does NOT copy objects referred by pointers of the | |
46 // given hentry struct. | |
47 hentry* CreateScopedHashEntry(int index, const hentry* source); | |
48 | |
49 private: | |
50 // A struct which encapsulates the new hentry struct introduced in hunspell | |
51 // 1.2.8. For a pointer to an hentry struct 'h', hunspell 1.2.8 stores a word | |
52 // (including a NUL character) into 'h->word[0]',...,'h->word[h->blen]' even | |
53 // though arraysize(h->word[]) is 1. Also, it changed 'astr' to a pointer so | |
54 // it can store affix flags into 'h->astr[0]',...,'h->astr[alen-1]'. To handle | |
55 // this new hentry struct, we define a struct which combines three values: an | |
56 // hentry struct 'hentry'; a char array 'word[kMaxWordLen]', and; an unsigned | |
57 // short array 'astr' so a hentry struct 'h' returned from | |
58 // CreateScopedHashEntry() satisfies the following equations: | |
59 // hentry* h = factory.CreateScopedHashEntry(0, source); | |
60 // h->word[0] == ((HashEntryItem*)h)->entry.word[0]. | |
61 // h->word[1] == ((HashEntryItem*)h)->word[0]. | |
62 // ... | |
63 // h->word[h->blen] == ((HashEntryItem*)h)->word[h->blen-1]. | |
64 // h->astr[0] == ((HashEntryItem*)h)->astr[0]. | |
65 // h->astr[1] == ((HashEntryItem*)h)->astr[1]. | |
66 // ... | |
67 // h->astr[h->alen-1] == ((HashEntryItem*)h)->astr[h->alen-1]. | |
68 enum { | |
69 kMaxWordLen = 128, | |
70 kMaxAffixLen = 8, | |
71 }; | |
72 struct HashEntryItem { | |
73 hentry entry; | |
74 char word[kMaxWordLen]; | |
75 unsigned short astr[kMaxAffixLen]; | |
76 }; | |
77 | |
78 HashEntryItem hash_items_[MAX_ROOTS]; | |
79 }; | |
80 | |
81 ScopedHashEntryFactory::ScopedHashEntryFactory() { | |
82 memset(&hash_items_[0], 0, sizeof(hash_items_)); | |
83 } | |
84 | |
85 ScopedHashEntryFactory::~ScopedHashEntryFactory() { | |
86 } | |
87 | |
88 hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index, | |
89 const hentry* source) { | |
90 if (index >= MAX_ROOTS || source->blen >= kMaxWordLen) | |
91 return NULL; | |
92 | |
93 // Retrieve a HashEntryItem struct from our spool, initialize it, and | |
94 // returns the address of its 'hentry' member. | |
95 size_t source_size = sizeof(hentry) + source->blen + 1; | |
96 HashEntryItem* hash_item = &hash_items_[index]; | |
97 memcpy(&hash_item->entry, source, source_size); | |
98 if (source->astr) { | |
99 hash_item->entry.alen = source->alen; | |
100 if (hash_item->entry.alen > kMaxAffixLen) | |
101 hash_item->entry.alen = kMaxAffixLen; | |
102 memcpy(hash_item->astr, source->astr, hash_item->entry.alen * sizeof(hash_it
em->astr[0])); | |
103 hash_item->entry.astr = &hash_item->astr[0]; | |
104 } | |
105 return &hash_item->entry; | |
106 } | |
107 | |
108 } // namespace | |
109 #endif | |
110 | |
111 | |
112 #ifdef HUNSPELL_CHROME_CLIENT | |
113 SuggestMgr::SuggestMgr(hunspell::BDictReader* reader, | |
114 const char * tryme, int maxn, | |
115 AffixMgr * aptr) | |
116 { | |
117 bdict_reader = reader; | |
118 #else | |
119 SuggestMgr::SuggestMgr(const char * tryme, int maxn, | |
120 AffixMgr * aptr) | |
121 { | |
122 #endif | |
123 | |
124 // register affix manager and check in string of chars to | |
125 // try when building candidate suggestions | |
126 pAMgr = aptr; | |
127 | |
128 csconv = NULL; | |
129 | |
130 ckeyl = 0; | |
131 ckey = NULL; | |
132 ckey_utf = NULL; | |
133 | |
134 ctryl = 0; | |
135 ctry = NULL; | |
136 ctry_utf = NULL; | |
137 | |
138 utf8 = 0; | |
139 langnum = 0; | |
140 complexprefixes = 0; | |
141 | |
142 maxSug = maxn; | |
143 nosplitsugs = 0; | |
144 maxngramsugs = MAXNGRAMSUGS; | |
145 maxcpdsugs = MAXCOMPOUNDSUGS; | |
146 | |
147 if (pAMgr) { | |
148 langnum = pAMgr->get_langnum(); | |
149 ckey = pAMgr->get_key_string(); | |
150 nosplitsugs = pAMgr->get_nosplitsugs(); | |
151 if (pAMgr->get_maxngramsugs() >= 0) | |
152 maxngramsugs = pAMgr->get_maxngramsugs(); | |
153 utf8 = pAMgr->get_utf8(); | |
154 if (pAMgr->get_maxcpdsugs() >= 0) | |
155 maxcpdsugs = pAMgr->get_maxcpdsugs(); | |
156 if (!utf8) | |
157 { | |
158 char * enc = pAMgr->get_encoding(); | |
159 csconv = get_current_cs(enc); | |
160 free(enc); | |
161 } | |
162 complexprefixes = pAMgr->get_complexprefixes(); | |
163 } | |
164 | |
165 if (ckey) { | |
166 if (utf8) { | |
167 w_char t[MAXSWL]; | |
168 ckeyl = u8_u16(t, MAXSWL, ckey); | |
169 ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char)); | |
170 if (ckey_utf) memcpy(ckey_utf, t, ckeyl * sizeof(w_char)); | |
171 else ckeyl = 0; | |
172 } else { | |
173 ckeyl = strlen(ckey); | |
174 } | |
175 } | |
176 | |
177 if (tryme) { | |
178 ctry = mystrdup(tryme); | |
179 if (ctry) ctryl = strlen(ctry); | |
180 if (ctry && utf8) { | |
181 w_char t[MAXSWL]; | |
182 ctryl = u8_u16(t, MAXSWL, tryme); | |
183 ctry_utf = (w_char *) malloc(ctryl * sizeof(w_char)); | |
184 if (ctry_utf) memcpy(ctry_utf, t, ctryl * sizeof(w_char)); | |
185 else ctryl = 0; | |
186 } | |
187 } | |
188 } | |
189 | |
190 | |
191 SuggestMgr::~SuggestMgr() | |
192 { | |
193 pAMgr = NULL; | |
194 if (ckey) free(ckey); | |
195 ckey = NULL; | |
196 if (ckey_utf) free(ckey_utf); | |
197 ckey_utf = NULL; | |
198 ckeyl = 0; | |
199 if (ctry) free(ctry); | |
200 ctry = NULL; | |
201 if (ctry_utf) free(ctry_utf); | |
202 ctry_utf = NULL; | |
203 ctryl = 0; | |
204 maxSug = 0; | |
205 #ifdef MOZILLA_CLIENT | |
206 delete [] csconv; | |
207 #endif | |
208 } | |
209 | |
210 int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int
cpdsuggest, | |
211 int * timer, clock_t * timelimit) { | |
212 int cwrd = 1; | |
213 if (ns == maxSug) return maxSug; | |
214 for (int k=0; k < ns; k++) { | |
215 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; | |
216 } | |
217 if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { | |
218 wlst[ns] = mystrdup(candidate); | |
219 if (wlst[ns] == NULL) { | |
220 for (int j=0; j<ns; j++) free(wlst[j]); | |
221 return -1; | |
222 } | |
223 ns++; | |
224 } | |
225 return ns; | |
226 } | |
227 | |
228 // generate suggestions for a misspelled word | |
229 // pass in address of array of char * pointers | |
230 // onlycompoundsug: probably bad suggestions (need for ngram sugs, too) | |
231 | |
232 int SuggestMgr::suggest(char*** slst, const char * w, int nsug, | |
233 int * onlycompoundsug) | |
234 { | |
235 int nocompoundtwowords = 0; | |
236 char ** wlst; | |
237 w_char word_utf[MAXSWL]; | |
238 int wl = 0; | |
239 int nsugorig = nsug; | |
240 char w2[MAXWORDUTF8LEN]; | |
241 const char * word = w; | |
242 int oldSug = 0; | |
243 | |
244 // word reversing wrapper for complex prefixes | |
245 if (complexprefixes) { | |
246 strcpy(w2, w); | |
247 if (utf8) reverseword_utf(w2); else reverseword(w2); | |
248 word = w2; | |
249 } | |
250 | |
251 if (*slst) { | |
252 wlst = *slst; | |
253 } else { | |
254 wlst = (char **) malloc(maxSug * sizeof(char *)); | |
255 if (wlst == NULL) return -1; | |
256 for (int i = 0; i < maxSug; i++) { | |
257 wlst[i] = NULL; | |
258 } | |
259 } | |
260 | |
261 if (utf8) { | |
262 wl = u8_u16(word_utf, MAXSWL, word); | |
263 if (wl == -1) { | |
264 *slst = wlst; | |
265 return nsug; | |
266 } | |
267 } | |
268 | |
269 for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest
++) { | |
270 | |
271 // limit compound suggestion | |
272 if (cpdsuggest > 0) oldSug = nsug; | |
273 | |
274 // suggestions for an uppercase word (html -> HTML) | |
275 if ((nsug < maxSug) && (nsug > -1)) { | |
276 nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
277 capchars(wlst, word, nsug, cpdsuggest); | |
278 } | |
279 | |
280 // perhaps we made a typical fault of spelling | |
281 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
282 nsug = replchars(wlst, word, nsug, cpdsuggest); | |
283 } | |
284 | |
285 // perhaps we made chose the wrong char from a related set | |
286 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
287 nsug = mapchars(wlst, word, nsug, cpdsuggest); | |
288 } | |
289 | |
290 // only suggest compound words when no other suggestion | |
291 if ((cpdsuggest == 0) && (nsug > nsugorig)) nocompoundtwowords=1; | |
292 | |
293 // did we swap the order of chars by mistake | |
294 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
295 nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
296 swapchar(wlst, word, nsug, cpdsuggest); | |
297 } | |
298 | |
299 // did we swap the order of non adjacent chars by mistake | |
300 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
301 nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
302 longswapchar(wlst, word, nsug, cpdsuggest); | |
303 } | |
304 | |
305 // did we just hit the wrong key in place of a good char (case and keyboard) | |
306 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
307 nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
308 badcharkey(wlst, word, nsug, cpdsuggest); | |
309 } | |
310 | |
311 // did we add a char that should not be there | |
312 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
313 nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
314 extrachar(wlst, word, nsug, cpdsuggest); | |
315 } | |
316 | |
317 | |
318 // did we forgot a char | |
319 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
320 nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
321 forgotchar(wlst, word, nsug, cpdsuggest); | |
322 } | |
323 | |
324 // did we move a char | |
325 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
326 nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
327 movechar(wlst, word, nsug, cpdsuggest); | |
328 } | |
329 | |
330 // did we just hit the wrong key in place of a good char | |
331 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
332 nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : | |
333 badchar(wlst, word, nsug, cpdsuggest); | |
334 } | |
335 | |
336 // did we double two characters | |
337 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) { | |
338 nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest)
: | |
339 doubletwochars(wlst, word, nsug, cpdsuggest); | |
340 } | |
341 | |
342 // perhaps we forgot to hit space and two words ran together | |
343 if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug
< oldSug + maxcpdsugs))) { | |
344 nsug = twowords(wlst, word, nsug, cpdsuggest); | |
345 } | |
346 | |
347 } // repeating ``for'' statement compounding support | |
348 | |
349 if (nsug < 0) { | |
350 // we ran out of memory - we should free up as much as possible | |
351 for (int i = 0; i < maxSug; i++) | |
352 if (wlst[i] != NULL) free(wlst[i]); | |
353 free(wlst); | |
354 wlst = NULL; | |
355 } | |
356 | |
357 if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) *onlycompoundsug =
1; | |
358 | |
359 *slst = wlst; | |
360 return nsug; | |
361 } | |
362 | |
363 // generate suggestions for a word with typical mistake | |
364 // pass in address of array of char * pointers | |
365 #ifdef HUNSPELL_EXPERIMENTAL | |
366 int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug) | |
367 { | |
368 int nocompoundtwowords = 0; | |
369 char ** wlst; | |
370 int oldSug; | |
371 | |
372 char w2[MAXWORDUTF8LEN]; | |
373 const char * word = w; | |
374 | |
375 // word reversing wrapper for complex prefixes | |
376 if (complexprefixes) { | |
377 strcpy(w2, w); | |
378 if (utf8) reverseword_utf(w2); else reverseword(w2); | |
379 word = w2; | |
380 } | |
381 | |
382 if (*slst) { | |
383 wlst = *slst; | |
384 } else { | |
385 wlst = (char **) malloc(maxSug * sizeof(char *)); | |
386 if (wlst == NULL) return -1; | |
387 } | |
388 | |
389 for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest
++) { | |
390 | |
391 // limit compound suggestion | |
392 if (cpdsuggest > 0) oldSug = nsug; | |
393 | |
394 // perhaps we made a typical fault of spelling | |
395 if ((nsug < maxSug) && (nsug > -1)) | |
396 nsug = replchars(wlst, word, nsug, cpdsuggest); | |
397 | |
398 // perhaps we made chose the wrong char from a related set | |
399 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs))) | |
400 nsug = mapchars(wlst, word, nsug, cpdsuggest); | |
401 | |
402 if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1; | |
403 | |
404 // perhaps we forgot to hit space and two words ran together | |
405 | |
406 if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcp
dsugs)) && check_forbidden(word, strlen(word))) { | |
407 nsug = twowords(wlst, word, nsug, cpdsuggest); | |
408 } | |
409 | |
410 } // repeating ``for'' statement compounding support | |
411 | |
412 if (nsug < 0) { | |
413 for (int i=0;i<maxSug; i++) | |
414 if (wlst[i] != NULL) free(wlst[i]); | |
415 free(wlst); | |
416 return -1; | |
417 } | |
418 | |
419 *slst = wlst; | |
420 return nsug; | |
421 } | |
422 #endif // END OF HUNSPELL_EXPERIMENTAL CODE | |
423 | |
424 // suggestions for an uppercase word (html -> HTML) | |
425 int SuggestMgr::capchars_utf(char ** wlst, const w_char * word, int wl, int ns,
int cpdsuggest) | |
426 { | |
427 char candidate[MAXSWUTF8L]; | |
428 w_char candidate_utf[MAXSWL]; | |
429 memcpy(candidate_utf, word, wl * sizeof(w_char)); | |
430 mkallcap_utf(candidate_utf, wl, langnum); | |
431 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
432 return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL)
; | |
433 } | |
434 | |
435 // suggestions for an uppercase word (html -> HTML) | |
436 int SuggestMgr::capchars(char** wlst, const char * word, int ns, int cpdsuggest) | |
437 { | |
438 char candidate[MAXSWUTF8L]; | |
439 strcpy(candidate, word); | |
440 mkallcap(candidate, csconv); | |
441 return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL)
; | |
442 } | |
443 | |
444 // suggestions for when chose the wrong char out of a related set | |
445 int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest) | |
446 { | |
447 char candidate[MAXSWUTF8L]; | |
448 clock_t timelimit; | |
449 int timer; | |
450 candidate[0] = '\0'; | |
451 | |
452 int wl = strlen(word); | |
453 if (wl < 2 || ! pAMgr) return ns; | |
454 | |
455 int nummap = pAMgr->get_nummap(); | |
456 struct mapentry* maptable = pAMgr->get_maptable(); | |
457 if (maptable==NULL) return ns; | |
458 | |
459 timelimit = clock(); | |
460 timer = MINTIMER; | |
461 return map_related(word, (char *) &candidate, 0, 0, wlst, cpdsuggest, ns, mapt
able, nummap, &timer, &timelimit); | |
462 } | |
463 | |
464 int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn, | |
465 char** wlst, int cpdsuggest, int ns, | |
466 const mapentry* maptable, int nummap, int * timer, clock_t * timelimit) | |
467 { | |
468 if (*(word + wn) == '\0') { | |
469 int cwrd = 1; | |
470 *(candidate + cn) = '\0'; | |
471 int wl = strlen(candidate); | |
472 for (int m=0; m < ns; m++) | |
473 if (strcmp(candidate, wlst[m]) == 0) cwrd = 0; | |
474 if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { | |
475 if (ns < maxSug) { | |
476 wlst[ns] = mystrdup(candidate); | |
477 if (wlst[ns] == NULL) return -1; | |
478 ns++; | |
479 } | |
480 } | |
481 return ns; | |
482 } | |
483 int in_map = 0; | |
484 for (int j = 0; j < nummap; j++) { | |
485 for (int k = 0; k < maptable[j].len; k++) { | |
486 int len = strlen(maptable[j].set[k]); | |
487 if (strncmp(maptable[j].set[k], word + wn, len) == 0) { | |
488 in_map = 1; | |
489 for (int l = 0; l < maptable[j].len; l++) { | |
490 strcpy(candidate + cn, maptable[j].set[l]); | |
491 ns = map_related(word, candidate, wn + len, strlen(candidate), wlst, | |
492 cpdsuggest, ns, maptable, nummap, timer, timelimit); | |
493 if (!(*timer)) return ns; | |
494 } | |
495 } | |
496 } | |
497 } | |
498 if (!in_map) { | |
499 *(candidate + cn) = *(word + wn); | |
500 ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest, | |
501 ns, maptable, nummap, timer, timelimit); | |
502 } | |
503 return ns; | |
504 } | |
505 | |
506 // suggestions for a typical fault of spelling, that | |
507 // differs with more, than 1 letter from the right form. | |
508 int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest
) | |
509 { | |
510 char candidate[MAXSWUTF8L]; | |
511 const char * r; | |
512 int lenr, lenp; | |
513 int wl = strlen(word); | |
514 if (wl < 2 || ! pAMgr) return ns; | |
515 | |
516 #ifdef HUNSPELL_CHROME_CLIENT | |
517 const char *pattern, *pattern2; | |
518 hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator(
); | |
519 while (iterator.GetNext(&pattern, &pattern2)) { | |
520 r = word; | |
521 lenr = strlen(pattern2); | |
522 lenp = strlen(pattern); | |
523 | |
524 // search every occurence of the pattern in the word | |
525 while ((r=strstr(r, pattern)) != NULL) { | |
526 strcpy(candidate, word); | |
527 if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break; | |
528 strcpy(candidate+(r-word), pattern2); | |
529 strcpy(candidate+(r-word)+lenr, r+lenp); | |
530 ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL
); | |
531 if (ns == -1) return -1; | |
532 // check REP suggestions with space | |
533 char * sp = strchr(candidate, ' '); | |
534 if (sp) { | |
535 char * prev = candidate; | |
536 while (sp) { | |
537 *sp = '\0'; | |
538 if (checkword(prev, strlen(prev), 0, NULL, NULL)) { | |
539 int oldns = ns; | |
540 *sp = ' '; | |
541 ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL,
NULL); | |
542 if (ns == -1) return -1; | |
543 if (oldns < ns) { | |
544 free(wlst[ns - 1]); | |
545 wlst[ns - 1] = mystrdup(candidate); | |
546 if (!wlst[ns - 1]) return -1; | |
547 } | |
548 } | |
549 *sp = ' '; | |
550 prev = sp + 1; | |
551 sp = strchr(prev, ' '); | |
552 } | |
553 } | |
554 r++; // search for the next letter | |
555 } | |
556 } | |
557 #else | |
558 int numrep = pAMgr->get_numrep(); | |
559 struct replentry* reptable = pAMgr->get_reptable(); | |
560 if (reptable==NULL) return ns; | |
561 for (int i=0; i < numrep; i++ ) { | |
562 r = word; | |
563 lenr = strlen(reptable[i].pattern2); | |
564 lenp = strlen(reptable[i].pattern); | |
565 // search every occurence of the pattern in the word | |
566 while ((r=strstr(r, reptable[i].pattern)) != NULL && (!reptable[i].end ||
strlen(r) == strlen(reptable[i].pattern)) && | |
567 (!reptable[i].start || r == word)) { | |
568 strcpy(candidate, word); | |
569 if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break; | |
570 strcpy(candidate+(r-word),reptable[i].pattern2); | |
571 strcpy(candidate+(r-word)+lenr, r+lenp); | |
572 ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL
); | |
573 if (ns == -1) return -1; | |
574 // check REP suggestions with space | |
575 char * sp = strchr(candidate, ' '); | |
576 if (sp) { | |
577 char * prev = candidate; | |
578 while (sp) { | |
579 *sp = '\0'; | |
580 if (checkword(prev, strlen(prev), 0, NULL, NULL)) { | |
581 int oldns = ns; | |
582 *sp = ' '; | |
583 ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL,
NULL); | |
584 if (ns == -1) return -1; | |
585 if (oldns < ns) { | |
586 free(wlst[ns - 1]); | |
587 wlst[ns - 1] = mystrdup(candidate); | |
588 if (!wlst[ns - 1]) return -1; | |
589 } | |
590 } | |
591 *sp = ' '; | |
592 prev = sp + 1; | |
593 sp = strchr(prev, ' '); | |
594 } | |
595 } | |
596 r++; // search for the next letter | |
597 } | |
598 } | |
599 #endif | |
600 return ns; | |
601 } | |
602 | |
603 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation
-> vacacation) | |
604 int SuggestMgr::doubletwochars(char** wlst, const char * word, int ns, int cpdsu
ggest) | |
605 { | |
606 char candidate[MAXSWUTF8L]; | |
607 int state=0; | |
608 int wl = strlen(word); | |
609 if (wl < 5 || ! pAMgr) return ns; | |
610 for (int i=2; i < wl; i++ ) { | |
611 if (word[i]==word[i-2]) { | |
612 state++; | |
613 if (state==3) { | |
614 strcpy(candidate,word); | |
615 strcpy(candidate+i-1,word+i+1); | |
616 ns = testsug(wlst, candidate, wl-2, ns, cpdsuggest, NULL, NULL); | |
617 if (ns == -1) return -1; | |
618 state=0; | |
619 } | |
620 } else { | |
621 state=0; | |
622 } | |
623 } | |
624 return ns; | |
625 } | |
626 | |
627 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation
-> vacacation) | |
628 int SuggestMgr::doubletwochars_utf(char ** wlst, const w_char * word, int wl, in
t ns, int cpdsuggest) | |
629 { | |
630 w_char candidate_utf[MAXSWL]; | |
631 char candidate[MAXSWUTF8L]; | |
632 int state=0; | |
633 if (wl < 5 || ! pAMgr) return ns; | |
634 for (int i=2; i < wl; i++) { | |
635 if (w_char_eq(word[i], word[i-2])) { | |
636 state++; | |
637 if (state==3) { | |
638 memcpy(candidate_utf, word, (i - 1) * sizeof(w_char)); | |
639 memcpy(candidate_utf+i-1, word+i+1, (wl-i-1) * sizeof(w_char)); | |
640 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl-2); | |
641 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NUL
L, NULL); | |
642 if (ns == -1) return -1; | |
643 state=0; | |
644 } | |
645 } else { | |
646 state=0; | |
647 } | |
648 } | |
649 return ns; | |
650 } | |
651 | |
652 // error is wrong char in place of correct one (case and keyboard related versio
n) | |
653 int SuggestMgr::badcharkey(char ** wlst, const char * word, int ns, int cpdsugge
st) | |
654 { | |
655 char tmpc; | |
656 char candidate[MAXSWUTF8L]; | |
657 int wl = strlen(word); | |
658 strcpy(candidate, word); | |
659 // swap out each char one by one and try uppercase and neighbor | |
660 // keyboard chars in its place to see if that makes a good word | |
661 | |
662 for (int i=0; i < wl; i++) { | |
663 tmpc = candidate[i]; | |
664 // check with uppercase letters | |
665 candidate[i] = csconv[((unsigned char)tmpc)].cupper; | |
666 if (tmpc != candidate[i]) { | |
667 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
668 if (ns == -1) return -1; | |
669 candidate[i] = tmpc; | |
670 } | |
671 // check neighbor characters in keyboard string | |
672 if (!ckey) continue; | |
673 char * loc = strchr(ckey, tmpc); | |
674 while (loc) { | |
675 if ((loc > ckey) && (*(loc - 1) != '|')) { | |
676 candidate[i] = *(loc - 1); | |
677 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
678 if (ns == -1) return -1; | |
679 } | |
680 if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) { | |
681 candidate[i] = *(loc + 1); | |
682 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
683 if (ns == -1) return -1; | |
684 } | |
685 loc = strchr(loc + 1, tmpc); | |
686 } | |
687 candidate[i] = tmpc; | |
688 } | |
689 return ns; | |
690 } | |
691 | |
692 // error is wrong char in place of correct one (case and keyboard related versio
n) | |
693 int SuggestMgr::badcharkey_utf(char ** wlst, const w_char * word, int wl, int ns
, int cpdsuggest) | |
694 { | |
695 w_char tmpc; | |
696 w_char candidate_utf[MAXSWL]; | |
697 char candidate[MAXSWUTF8L]; | |
698 memcpy(candidate_utf, word, wl * sizeof(w_char)); | |
699 // swap out each char one by one and try all the tryme | |
700 // chars in its place to see if that makes a good word | |
701 for (int i=0; i < wl; i++) { | |
702 tmpc = candidate_utf[i]; | |
703 // check with uppercase letters | |
704 mkallcap_utf(candidate_utf + i, 1, langnum); | |
705 if (!w_char_eq(tmpc, candidate_utf[i])) { | |
706 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
707 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NU
LL); | |
708 if (ns == -1) return -1; | |
709 candidate_utf[i] = tmpc; | |
710 } | |
711 // check neighbor characters in keyboard string | |
712 if (!ckey) continue; | |
713 w_char * loc = ckey_utf; | |
714 while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) loc++; | |
715 while (loc < (ckey_utf + ckeyl)) { | |
716 if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) { | |
717 candidate_utf[i] = *(loc - 1); | |
718 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
719 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL,
NULL); | |
720 if (ns == -1) return -1; | |
721 } | |
722 if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE))
{ | |
723 candidate_utf[i] = *(loc + 1); | |
724 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
725 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL,
NULL); | |
726 if (ns == -1) return -1; | |
727 } | |
728 do { loc++; } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)
); | |
729 } | |
730 candidate_utf[i] = tmpc; | |
731 } | |
732 return ns; | |
733 } | |
734 | |
735 // error is wrong char in place of correct one | |
736 int SuggestMgr::badchar(char ** wlst, const char * word, int ns, int cpdsuggest) | |
737 { | |
738 char tmpc; | |
739 char candidate[MAXSWUTF8L]; | |
740 clock_t timelimit = clock(); | |
741 int timer = MINTIMER; | |
742 int wl = strlen(word); | |
743 strcpy(candidate, word); | |
744 // swap out each char one by one and try all the tryme | |
745 // chars in its place to see if that makes a good word | |
746 for (int j=0; j < ctryl; j++) { | |
747 for (int i=wl-1; i >= 0; i--) { | |
748 tmpc = candidate[i]; | |
749 if (ctry[j] == tmpc) continue; | |
750 candidate[i] = ctry[j]; | |
751 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit); | |
752 if (ns == -1) return -1; | |
753 if (!timer) return ns; | |
754 candidate[i] = tmpc; | |
755 } | |
756 } | |
757 return ns; | |
758 } | |
759 | |
760 // error is wrong char in place of correct one | |
761 int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, i
nt cpdsuggest) | |
762 { | |
763 w_char tmpc; | |
764 w_char candidate_utf[MAXSWL]; | |
765 char candidate[MAXSWUTF8L]; | |
766 clock_t timelimit = clock(); | |
767 int timer = MINTIMER; | |
768 memcpy(candidate_utf, word, wl * sizeof(w_char)); | |
769 // swap out each char one by one and try all the tryme | |
770 // chars in its place to see if that makes a good word | |
771 for (int j=0; j < ctryl; j++) { | |
772 for (int i=wl-1; i >= 0; i--) { | |
773 tmpc = candidate_utf[i]; | |
774 if (w_char_eq(tmpc, ctry_utf[j])) continue; | |
775 candidate_utf[i] = ctry_utf[j]; | |
776 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
777 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer,
&timelimit); | |
778 if (ns == -1) return -1; | |
779 if (!timer) return ns; | |
780 candidate_utf[i] = tmpc; | |
781 } | |
782 } | |
783 return ns; | |
784 } | |
785 | |
786 // error is word has an extra letter it does not need | |
787 int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns,
int cpdsuggest) | |
788 { | |
789 char candidate[MAXSWUTF8L]; | |
790 w_char candidate_utf[MAXSWL]; | |
791 w_char * p; | |
792 w_char tmpc = W_VLINE; // not used value, only for VCC warning message | |
793 if (wl < 2) return ns; | |
794 // try omitting one char of word at a time | |
795 memcpy(candidate_utf, word, wl * sizeof(w_char)); | |
796 for (p = candidate_utf + wl - 1; p >= candidate_utf; p--) { | |
797 w_char tmpc2 = *p; | |
798 if (p < candidate_utf + wl - 1) *p = tmpc; | |
799 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1); | |
800 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NU
LL); | |
801 if (ns == -1) return -1; | |
802 tmpc = tmpc2; | |
803 } | |
804 return ns; | |
805 } | |
806 | |
807 // error is word has an extra letter it does not need | |
808 int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest
) | |
809 { | |
810 char tmpc = '\0'; | |
811 char candidate[MAXSWUTF8L]; | |
812 char * p; | |
813 int wl = strlen(word); | |
814 if (wl < 2) return ns; | |
815 // try omitting one char of word at a time | |
816 strcpy (candidate, word); | |
817 for (p = candidate + wl - 1; p >=candidate; p--) { | |
818 char tmpc2 = *p; | |
819 *p = tmpc; | |
820 ns = testsug(wlst, candidate, wl-1, ns, cpdsuggest, NULL, NULL); | |
821 if (ns == -1) return -1; | |
822 tmpc = tmpc2; | |
823 } | |
824 return ns; | |
825 } | |
826 | |
827 // error is missing a letter it needs | |
828 int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge
st) | |
829 { | |
830 // TODO(rouslan): Remove the interim change below when this patch lands: | |
831 // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395 | |
832 char candidate[MAXSWUTF8L + 4]; | |
833 char * p; | |
834 clock_t timelimit = clock(); | |
835 int timer = MINTIMER; | |
836 int wl = strlen(word); | |
837 // try inserting a tryme character before every letter (and the null terminat
or) | |
838 for (int i = 0; i < ctryl; i++) { | |
839 strcpy(candidate, word); | |
840 for (p = candidate + wl; p >= candidate; p--) { | |
841 *(p+1) = *p; | |
842 *p = ctry[i]; | |
843 ns = testsug(wlst, candidate, wl+1, ns, cpdsuggest, &timer, &timelimit)
; | |
844 if (ns == -1) return -1; | |
845 if (!timer) return ns; | |
846 } | |
847 } | |
848 return ns; | |
849 } | |
850 | |
851 // error is missing a letter it needs | |
852 int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns
, int cpdsuggest) | |
853 { | |
854 // TODO(rouslan): Remove the interim change below when this patch lands: | |
855 // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395 | |
856 w_char candidate_utf[MAXSWL + 1]; | |
857 char candidate[MAXSWUTF8L + 4]; | |
858 w_char * p; | |
859 clock_t timelimit = clock(); | |
860 int timer = MINTIMER; | |
861 // try inserting a tryme character at the end of the word and before every le
tter | |
862 for (int i = 0; i < ctryl; i++) { | |
863 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
864 for (p = candidate_utf + wl; p >= candidate_utf; p--) { | |
865 *(p + 1) = *p; | |
866 *p = ctry_utf[i]; | |
867 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1); | |
868 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer
, &timelimit); | |
869 if (ns == -1) return -1; | |
870 if (!timer) return ns; | |
871 } | |
872 } | |
873 return ns; | |
874 } | |
875 | |
876 | |
877 /* error is should have been two words */ | |
878 int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
) | |
879 { | |
880 char candidate[MAXSWUTF8L]; | |
881 char * p; | |
882 int c1, c2; | |
883 int forbidden = 0; | |
884 int cwrd; | |
885 | |
886 int wl=strlen(word); | |
887 if (wl < 3) return ns; | |
888 | |
889 if (langnum == LANG_hu) forbidden = check_forbidden(word, wl); | |
890 | |
891 strcpy(candidate + 1, word); | |
892 // split the string into two pieces after every char | |
893 // if both pieces are good words make them a suggestion | |
894 for (p = candidate + 1; p[1] != '\0'; p++) { | |
895 p[-1] = *p; | |
896 // go to end of the UTF-8 character | |
897 while (utf8 && ((p[1] & 0xc0) == 0x80)) { | |
898 *p = p[1]; | |
899 p++; | |
900 } | |
901 if (utf8 && p[1] == '\0') break; // last UTF-8 character | |
902 *p = '\0'; | |
903 c1 = checkword(candidate,strlen(candidate), cpdsuggest, NULL, NULL); | |
904 if (c1) { | |
905 c2 = checkword((p+1),strlen(p+1), cpdsuggest, NULL, NULL); | |
906 if (c2) { | |
907 *p = ' '; | |
908 | |
909 // spec. Hungarian code (need a better compound word support) | |
910 if ((langnum == LANG_hu) && !forbidden && | |
911 // if 3 repeating letter, use - instead of space | |
912 (((p[-1] == p[1]) && (((p>candidate+1) && (p[-1] == p[-2])) || (
p[-1] == p[2]))) || | |
913 // or multiple compounding, with more, than 6 syllables | |
914 ((c1 == 3) && (c2 >= 2)))) *p = '-'; | |
915 | |
916 cwrd = 1; | |
917 for (int k=0; k < ns; k++) | |
918 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; | |
919 if (ns < maxSug) { | |
920 if (cwrd) { | |
921 wlst[ns] = mystrdup(candidate); | |
922 if (wlst[ns] == NULL) return -1; | |
923 ns++; | |
924 } | |
925 } else return ns; | |
926 // add two word suggestion with dash, if TRY string contains | |
927 // "a" or "-" | |
928 // NOTE: cwrd doesn't modified for REP twoword sugg. | |
929 if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) && | |
930 mystrlen(p + 1) > 1 && | |
931 mystrlen(candidate) - mystrlen(p) > 1) { | |
932 *p = '-'; | |
933 for (int k=0; k < ns; k++) | |
934 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; | |
935 if (ns < maxSug) { | |
936 if (cwrd) { | |
937 wlst[ns] = mystrdup(candidate); | |
938 if (wlst[ns] == NULL) return -1; | |
939 ns++; | |
940 } | |
941 } else return ns; | |
942 } | |
943 } | |
944 } | |
945 } | |
946 return ns; | |
947 } | |
948 | |
949 | |
950 // error is adjacent letter were swapped | |
951 int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest
) | |
952 { | |
953 char candidate[MAXSWUTF8L]; | |
954 char * p; | |
955 char tmpc; | |
956 int wl=strlen(word); | |
957 // try swapping adjacent chars one by one | |
958 strcpy(candidate, word); | |
959 for (p = candidate; p[1] != 0; p++) { | |
960 tmpc = *p; | |
961 *p = p[1]; | |
962 p[1] = tmpc; | |
963 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
964 if (ns == -1) return -1; | |
965 p[1] = *p; | |
966 *p = tmpc; | |
967 } | |
968 // try double swaps for short words | |
969 // ahev -> have, owudl -> would | |
970 if (wl == 4 || wl == 5) { | |
971 candidate[0] = word[1]; | |
972 candidate[1] = word[0]; | |
973 candidate[2] = word[2]; | |
974 candidate[wl - 2] = word[wl - 1]; | |
975 candidate[wl - 1] = word[wl - 2]; | |
976 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
977 if (ns == -1) return -1; | |
978 if (wl == 5) { | |
979 candidate[0] = word[0]; | |
980 candidate[1] = word[2]; | |
981 candidate[2] = word[1]; | |
982 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
983 if (ns == -1) return -1; | |
984 } | |
985 } | |
986 return ns; | |
987 } | |
988 | |
989 // error is adjacent letter were swapped | |
990 int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns,
int cpdsuggest) | |
991 { | |
992 w_char candidate_utf[MAXSWL]; | |
993 char candidate[MAXSWUTF8L]; | |
994 w_char * p; | |
995 w_char tmpc; | |
996 int len = 0; | |
997 // try swapping adjacent chars one by one | |
998 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
999 for (p = candidate_utf; p < (candidate_utf + wl - 1); p++) { | |
1000 tmpc = *p; | |
1001 *p = p[1]; | |
1002 p[1] = tmpc; | |
1003 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1004 if (len == 0) len = strlen(candidate); | |
1005 ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); | |
1006 if (ns == -1) return -1; | |
1007 p[1] = *p; | |
1008 *p = tmpc; | |
1009 } | |
1010 // try double swaps for short words | |
1011 // ahev -> have, owudl -> would, suodn -> sound | |
1012 if (wl == 4 || wl == 5) { | |
1013 candidate_utf[0] = word[1]; | |
1014 candidate_utf[1] = word[0]; | |
1015 candidate_utf[2] = word[2]; | |
1016 candidate_utf[wl - 2] = word[wl - 1]; | |
1017 candidate_utf[wl - 1] = word[wl - 2]; | |
1018 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1019 ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); | |
1020 if (ns == -1) return -1; | |
1021 if (wl == 5) { | |
1022 candidate_utf[0] = word[0]; | |
1023 candidate_utf[1] = word[2]; | |
1024 candidate_utf[2] = word[1]; | |
1025 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1026 ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); | |
1027 if (ns == -1) return -1; | |
1028 } | |
1029 } | |
1030 return ns; | |
1031 } | |
1032 | |
1033 // error is not adjacent letter were swapped | |
1034 int SuggestMgr::longswapchar(char ** wlst, const char * word, int ns, int cpdsug
gest) | |
1035 { | |
1036 char candidate[MAXSWUTF8L]; | |
1037 char * p; | |
1038 char * q; | |
1039 char tmpc; | |
1040 int wl=strlen(word); | |
1041 // try swapping not adjacent chars one by one | |
1042 strcpy(candidate, word); | |
1043 for (p = candidate; *p != 0; p++) { | |
1044 for (q = candidate; *q != 0; q++) { | |
1045 if (abs((int)(p-q)) > 1) { | |
1046 tmpc = *p; | |
1047 *p = *q; | |
1048 *q = tmpc; | |
1049 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
1050 if (ns == -1) return -1; | |
1051 *q = *p; | |
1052 *p = tmpc; | |
1053 } | |
1054 } | |
1055 } | |
1056 return ns; | |
1057 } | |
1058 | |
1059 | |
1060 // error is adjacent letter were swapped | |
1061 int SuggestMgr::longswapchar_utf(char ** wlst, const w_char * word, int wl, int
ns, int cpdsuggest) | |
1062 { | |
1063 w_char candidate_utf[MAXSWL]; | |
1064 char candidate[MAXSWUTF8L]; | |
1065 w_char * p; | |
1066 w_char * q; | |
1067 w_char tmpc; | |
1068 // try swapping not adjacent chars | |
1069 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
1070 for (p = candidate_utf; p < (candidate_utf + wl); p++) { | |
1071 for (q = candidate_utf; q < (candidate_utf + wl); q++) { | |
1072 if (abs((int)(p-q)) > 1) { | |
1073 tmpc = *p; | |
1074 *p = *q; | |
1075 *q = tmpc; | |
1076 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1077 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL,
NULL); | |
1078 if (ns == -1) return -1; | |
1079 *q = *p; | |
1080 *p = tmpc; | |
1081 } | |
1082 } | |
1083 } | |
1084 return ns; | |
1085 } | |
1086 | |
1087 // error is a letter was moved | |
1088 int SuggestMgr::movechar(char ** wlst, const char * word, int ns, int cpdsuggest
) | |
1089 { | |
1090 char candidate[MAXSWUTF8L]; | |
1091 char * p; | |
1092 char * q; | |
1093 char tmpc; | |
1094 | |
1095 int wl=strlen(word); | |
1096 // try moving a char | |
1097 strcpy(candidate, word); | |
1098 for (p = candidate; *p != 0; p++) { | |
1099 for (q = p + 1; (*q != 0) && ((q - p) < 10); q++) { | |
1100 tmpc = *(q-1); | |
1101 *(q-1) = *q; | |
1102 *q = tmpc; | |
1103 if ((q-p) < 2) continue; // omit swap char | |
1104 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
1105 if (ns == -1) return -1; | |
1106 } | |
1107 strcpy(candidate, word); | |
1108 } | |
1109 for (p = candidate + wl - 1; p > candidate; p--) { | |
1110 for (q = p - 1; (q >= candidate) && ((p - q) < 10); q--) { | |
1111 tmpc = *(q+1); | |
1112 *(q+1) = *q; | |
1113 *q = tmpc; | |
1114 if ((p-q) < 2) continue; // omit swap char | |
1115 ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); | |
1116 if (ns == -1) return -1; | |
1117 } | |
1118 strcpy(candidate, word); | |
1119 } | |
1120 return ns; | |
1121 } | |
1122 | |
1123 // error is a letter was moved | |
1124 int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns,
int cpdsuggest) | |
1125 { | |
1126 w_char candidate_utf[MAXSWL]; | |
1127 char candidate[MAXSWUTF8L]; | |
1128 w_char * p; | |
1129 w_char * q; | |
1130 w_char tmpc; | |
1131 // try moving a char | |
1132 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
1133 for (p = candidate_utf; p < (candidate_utf + wl); p++) { | |
1134 for (q = p + 1; (q < (candidate_utf + wl)) && ((q - p) < 10); q++) { | |
1135 tmpc = *(q-1); | |
1136 *(q-1) = *q; | |
1137 *q = tmpc; | |
1138 if ((q-p) < 2) continue; // omit swap char | |
1139 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1140 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL,
NULL); | |
1141 if (ns == -1) return -1; | |
1142 } | |
1143 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
1144 } | |
1145 for (p = candidate_utf + wl - 1; p > candidate_utf; p--) { | |
1146 for (q = p - 1; (q >= candidate_utf) && ((p - q) < 10); q--) { | |
1147 tmpc = *(q+1); | |
1148 *(q+1) = *q; | |
1149 *q = tmpc; | |
1150 if ((p-q) < 2) continue; // omit swap char | |
1151 u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); | |
1152 ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL,
NULL); | |
1153 if (ns == -1) return -1; | |
1154 } | |
1155 memcpy (candidate_utf, word, wl * sizeof(w_char)); | |
1156 } | |
1157 return ns; | |
1158 } | |
1159 | |
1160 // generate a set of suggestions for very poorly spelled words | |
1161 int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md
) | |
1162 { | |
1163 | |
1164 int i, j; | |
1165 int lval; | |
1166 int sc, scphon; | |
1167 int lp, lpphon; | |
1168 int nonbmp = 0; | |
1169 | |
1170 // exhaustively search through all root words | |
1171 // keeping track of the MAX_ROOTS most similar root words | |
1172 struct hentry * roots[MAX_ROOTS]; | |
1173 char * rootsphon[MAX_ROOTS]; | |
1174 int scores[MAX_ROOTS]; | |
1175 int scoresphon[MAX_ROOTS]; | |
1176 for (i = 0; i < MAX_ROOTS; i++) { | |
1177 roots[i] = NULL; | |
1178 scores[i] = -100 * i; | |
1179 rootsphon[i] = NULL; | |
1180 scoresphon[i] = -100 * i; | |
1181 } | |
1182 lp = MAX_ROOTS - 1; | |
1183 lpphon = MAX_ROOTS - 1; | |
1184 scphon = -20000; | |
1185 int low = NGRAM_LOWERING; | |
1186 | |
1187 char w2[MAXWORDUTF8LEN]; | |
1188 char f[MAXSWUTF8L]; | |
1189 char * word = w; | |
1190 | |
1191 // word reversing wrapper for complex prefixes | |
1192 if (complexprefixes) { | |
1193 strcpy(w2, w); | |
1194 if (utf8) reverseword_utf(w2); else reverseword(w2); | |
1195 word = w2; | |
1196 } | |
1197 | |
1198 char mw[MAXSWUTF8L]; | |
1199 w_char u8[MAXSWL]; | |
1200 int nc = strlen(word); | |
1201 int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; | |
1202 | |
1203 // set character based ngram suggestion for words with non-BMP Unicode charact
ers | |
1204 if (n == -1) { | |
1205 utf8 = 0; // XXX not state-free | |
1206 n = nc; | |
1207 nonbmp = 1; | |
1208 low = 0; | |
1209 } | |
1210 | |
1211 struct hentry* hp = NULL; | |
1212 int col = -1; | |
1213 #ifdef HUNSPELL_CHROME_CLIENT | |
1214 ScopedHashEntryFactory hash_entry_factory; | |
1215 #endif | |
1216 phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; | |
1217 char target[MAXSWUTF8L]; | |
1218 char candidate[MAXSWUTF8L]; | |
1219 if (ph) { | |
1220 if (utf8) { | |
1221 w_char _w[MAXSWL]; | |
1222 int _wl = u8_u16(_w, MAXSWL, word); | |
1223 mkallcap_utf(_w, _wl, langnum); | |
1224 u16_u8(candidate, MAXSWUTF8L, _w, _wl); | |
1225 } else { | |
1226 strcpy(candidate, word); | |
1227 if (!nonbmp) mkallcap(candidate, csconv); | |
1228 } | |
1229 phonet(candidate, target, nc, *ph); // XXX phonet() is 8-bit (nc, not n) | |
1230 } | |
1231 | |
1232 FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; | |
1233 FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL; | |
1234 FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; | |
1235 FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; | |
1236 | |
1237 for (i = 0; i < md; i++) { | |
1238 while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) { | |
1239 if ((hp->astr) && (pAMgr) && | |
1240 (TESTAFF(hp->astr, forbiddenword, hp->alen) || | |
1241 TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || | |
1242 TESTAFF(hp->astr, nosuggest, hp->alen) || | |
1243 TESTAFF(hp->astr, nongramsuggest, hp->alen) || | |
1244 TESTAFF(hp->astr, onlyincompound, hp->alen))) continue; | |
1245 | |
1246 sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + | |
1247 leftcommonsubstring(word, HENTRY_WORD(hp)); | |
1248 | |
1249 // check special pronounciation | |
1250 if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { | |
1251 int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + | |
1252 + leftcommonsubstring(word, f); | |
1253 if (sc2 > sc) sc = sc2; | |
1254 } | |
1255 | |
1256 scphon = -20000; | |
1257 if (ph && (sc > 2) && (abs(n - (int) hp->clen) <= 3)) { | |
1258 char target2[MAXSWUTF8L]; | |
1259 if (utf8) { | |
1260 w_char _w[MAXSWL]; | |
1261 int _wl = u8_u16(_w, MAXSWL, HENTRY_WORD(hp)); | |
1262 mkallcap_utf(_w, _wl, langnum); | |
1263 u16_u8(candidate, MAXSWUTF8L, _w, _wl); | |
1264 } else { | |
1265 strcpy(candidate, HENTRY_WORD(hp)); | |
1266 mkallcap(candidate, csconv); | |
1267 } | |
1268 phonet(candidate, target2, -1, *ph); | |
1269 scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE); | |
1270 } | |
1271 | |
1272 if (sc > scores[lp]) { | |
1273 scores[lp] = sc; | |
1274 #ifdef HUNSPELL_CHROME_CLIENT | |
1275 roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp); | |
1276 #else | |
1277 roots[lp] = hp; | |
1278 #endif | |
1279 lval = sc; | |
1280 for (j=0; j < MAX_ROOTS; j++) | |
1281 if (scores[j] < lval) { | |
1282 lp = j; | |
1283 lval = scores[j]; | |
1284 } | |
1285 } | |
1286 | |
1287 | |
1288 if (scphon > scoresphon[lpphon]) { | |
1289 scoresphon[lpphon] = scphon; | |
1290 rootsphon[lpphon] = HENTRY_WORD(hp); | |
1291 lval = scphon; | |
1292 for (j=0; j < MAX_ROOTS; j++) | |
1293 if (scoresphon[j] < lval) { | |
1294 lpphon = j; | |
1295 lval = scoresphon[j]; | |
1296 } | |
1297 } | |
1298 }} | |
1299 | |
1300 // find minimum threshold for a passable suggestion | |
1301 // mangle original word three differnt ways | |
1302 // and score them to generate a minimum acceptable score | |
1303 int thresh = 0; | |
1304 for (int sp = 1; sp < 4; sp++) { | |
1305 if (utf8) { | |
1306 for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*'; | |
1307 u16_u8(mw, MAXSWUTF8L, u8, n); | |
1308 thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); | |
1309 } else { | |
1310 strcpy(mw, word); | |
1311 for (int k=sp; k < n; k+=4) *(mw + k) = '*'; | |
1312 thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); | |
1313 } | |
1314 } | |
1315 thresh = thresh / 3; | |
1316 thresh--; | |
1317 | |
1318 // now expand affixes on each of these root words and | |
1319 // and use length adjusted ngram scores to select | |
1320 // possible suggestions | |
1321 char * guess[MAX_GUESS]; | |
1322 char * guessorig[MAX_GUESS]; | |
1323 int gscore[MAX_GUESS]; | |
1324 for(i=0;i<MAX_GUESS;i++) { | |
1325 guess[i] = NULL; | |
1326 guessorig[i] = NULL; | |
1327 gscore[i] = -100 * i; | |
1328 } | |
1329 | |
1330 lp = MAX_GUESS - 1; | |
1331 | |
1332 struct guessword * glst; | |
1333 glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword)); | |
1334 if (! glst) { | |
1335 if (nonbmp) utf8 = 1; | |
1336 return ns; | |
1337 } | |
1338 | |
1339 for (i = 0; i < MAX_ROOTS; i++) { | |
1340 if (roots[i]) { | |
1341 struct hentry * rp = roots[i]; | |
1342 int nw = pAMgr->expand_rootword(glst, MAX_WORDS, HENTRY_WORD(rp), rp->bl
en, | |
1343 rp->astr, rp->alen, word, nc, | |
1344 ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MOR
PH_PHON) : NULL)); | |
1345 | |
1346 for (int k = 0; k < nw ; k++) { | |
1347 sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) + | |
1348 leftcommonsubstring(word, glst[k].word); | |
1349 | |
1350 if (sc > thresh) { | |
1351 if (sc > gscore[lp]) { | |
1352 if (guess[lp]) { | |
1353 free (guess[lp]); | |
1354 if (guessorig[lp]) { | |
1355 free(guessorig[lp]); | |
1356 guessorig[lp] = NULL; | |
1357 } | |
1358 } | |
1359 gscore[lp] = sc; | |
1360 guess[lp] = glst[k].word; | |
1361 guessorig[lp] = glst[k].orig; | |
1362 lval = sc; | |
1363 for (j=0; j < MAX_GUESS; j++) | |
1364 if (gscore[j] < lval) { | |
1365 lp = j; | |
1366 lval = gscore[j]; | |
1367 } | |
1368 } else { | |
1369 free(glst[k].word); | |
1370 if (glst[k].orig) free(glst[k].orig); | |
1371 } | |
1372 } else { | |
1373 free(glst[k].word); | |
1374 if (glst[k].orig) free(glst[k].orig); | |
1375 } | |
1376 } | |
1377 } | |
1378 } | |
1379 free(glst); | |
1380 | |
1381 // now we are done generating guesses | |
1382 // sort in order of decreasing score | |
1383 | |
1384 | |
1385 bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); | |
1386 if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); | |
1387 | |
1388 // weight suggestions with a similarity index, based on | |
1389 // the longest common subsequent algorithm and resort | |
1390 | |
1391 int is_swap = 0; | |
1392 int re = 0; | |
1393 double fact = 1.0; | |
1394 if (pAMgr) { | |
1395 int maxd = pAMgr->get_maxdiff(); | |
1396 if (maxd >= 0) fact = (10.0 - maxd)/5.0; | |
1397 } | |
1398 | |
1399 for (i=0; i < MAX_GUESS; i++) { | |
1400 if (guess[i]) { | |
1401 // lowering guess[i] | |
1402 char gl[MAXSWUTF8L]; | |
1403 int len; | |
1404 if (utf8) { | |
1405 w_char _w[MAXSWL]; | |
1406 len = u8_u16(_w, MAXSWL, guess[i]); | |
1407 mkallsmall_utf(_w, len, langnum); | |
1408 u16_u8(gl, MAXSWUTF8L, _w, len); | |
1409 } else { | |
1410 strcpy(gl, guess[i]); | |
1411 if (!nonbmp) mkallsmall(gl, csconv); | |
1412 len = strlen(guess[i]); | |
1413 } | |
1414 | |
1415 int _lcs = lcslen(word, gl); | |
1416 | |
1417 // same characters with different casing | |
1418 if ((n == len) && (n == _lcs)) { | |
1419 gscore[i] += 2000; | |
1420 break; | |
1421 } | |
1422 // using 2-gram instead of 3, and other weightening | |
1423 | |
1424 re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + | |
1425 ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); | |
1426 | |
1427 gscore[i] = | |
1428 // length of longest common subsequent minus length difference | |
1429 2 * _lcs - abs((int) (n - len)) + | |
1430 // weight length of the left common substring | |
1431 leftcommonsubstring(word, gl) + | |
1432 // weight equal character positions | |
1433 (!nonbmp && commoncharacterpositions(word, gl, &is_swap) ? 1: 0) + | |
1434 // swap character (not neighboring) | |
1435 ((is_swap) ? 10 : 0) + | |
1436 // ngram | |
1437 ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) + | |
1438 // weighted ngrams | |
1439 re + | |
1440 // different limit for dictionaries with PHONE rules | |
1441 (ph ? (re < len * fact ? -1000 : 0) : (re < (n + len)*fact? -1000 : 0)
); | |
1442 } | |
1443 } | |
1444 | |
1445 bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); | |
1446 | |
1447 // phonetic version | |
1448 if (ph) for (i=0; i < MAX_ROOTS; i++) { | |
1449 if (rootsphon[i]) { | |
1450 // lowering rootphon[i] | |
1451 char gl[MAXSWUTF8L]; | |
1452 int len; | |
1453 if (utf8) { | |
1454 w_char _w[MAXSWL]; | |
1455 len = u8_u16(_w, MAXSWL, rootsphon[i]); | |
1456 mkallsmall_utf(_w, len, langnum); | |
1457 u16_u8(gl, MAXSWUTF8L, _w, len); | |
1458 } else { | |
1459 strcpy(gl, rootsphon[i]); | |
1460 if (!nonbmp) mkallsmall(gl, csconv); | |
1461 len = strlen(rootsphon[i]); | |
1462 } | |
1463 | |
1464 // heuristic weigthing of ngram scores | |
1465 scoresphon[i] += 2 * lcslen(word, gl) - abs((int) (n - len)) + | |
1466 // weight length of the left common substring | |
1467 leftcommonsubstring(word, gl); | |
1468 } | |
1469 } | |
1470 | |
1471 if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); | |
1472 | |
1473 // copy over | |
1474 int oldns = ns; | |
1475 | |
1476 int same = 0; | |
1477 for (i=0; i < MAX_GUESS; i++) { | |
1478 if (guess[i]) { | |
1479 if ((ns < oldns + maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] >
1000))) { | |
1480 int unique = 1; | |
1481 // leave only excellent suggestions, if exists | |
1482 if (gscore[i] > 1000) same = 1; else if (gscore[i] < -100) { | |
1483 same = 1; | |
1484 // keep the best ngram suggestions, unless in ONLYMAXDIFF mode | |
1485 if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) { | |
1486 free(guess[i]); | |
1487 if (guessorig[i]) free(guessorig[i]); | |
1488 continue; | |
1489 } | |
1490 } | |
1491 for (j = 0; j < ns; j++) { | |
1492 // don't suggest previous suggestions or a previous suggestion with pr
efixes or affixes | |
1493 if ((!guessorig[i] && strstr(guess[i], wlst[j])) || | |
1494 (guessorig[i] && strstr(guessorig[i], wlst[j])) || | |
1495 // check forbidden words | |
1496 !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0; | |
1497 } | |
1498 if (unique) { | |
1499 wlst[ns++] = guess[i]; | |
1500 if (guessorig[i]) { | |
1501 free(guess[i]); | |
1502 wlst[ns-1] = guessorig[i]; | |
1503 } | |
1504 } else { | |
1505 free(guess[i]); | |
1506 if (guessorig[i]) free(guessorig[i]); | |
1507 } | |
1508 } else { | |
1509 free(guess[i]); | |
1510 if (guessorig[i]) free(guessorig[i]); | |
1511 } | |
1512 } | |
1513 } | |
1514 | |
1515 oldns = ns; | |
1516 if (ph) for (i=0; i < MAX_ROOTS; i++) { | |
1517 if (rootsphon[i]) { | |
1518 if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) { | |
1519 int unique = 1; | |
1520 for (j = 0; j < ns; j++) { | |
1521 // don't suggest previous suggestions or a previous suggestion with pr
efixes or affixes | |
1522 if (strstr(rootsphon[i], wlst[j]) || | |
1523 // check forbidden words | |
1524 !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) uniqu
e = 0; | |
1525 } | |
1526 if (unique) { | |
1527 wlst[ns++] = mystrdup(rootsphon[i]); | |
1528 if (!wlst[ns - 1]) return ns - 1; | |
1529 } | |
1530 } | |
1531 } | |
1532 } | |
1533 | |
1534 if (nonbmp) utf8 = 1; | |
1535 return ns; | |
1536 } | |
1537 | |
1538 | |
1539 // see if a candidate suggestion is spelled correctly | |
1540 // needs to check both root words and words with affixes | |
1541 | |
1542 // obsolote MySpell-HU modifications: | |
1543 // return value 2 and 3 marks compounding with hyphen (-) | |
1544 // `3' marks roots without suffix | |
1545 int SuggestMgr::checkword(const char * word, int len, int cpdsuggest, int * time
r, clock_t * timelimit) | |
1546 { | |
1547 struct hentry * rv=NULL; | |
1548 struct hentry * rv2=NULL; | |
1549 int nosuffix = 0; | |
1550 | |
1551 // check time limit | |
1552 if (timer) { | |
1553 (*timer)--; | |
1554 if (!(*timer) && timelimit) { | |
1555 if ((clock() - *timelimit) > TIMELIMIT) return 0; | |
1556 *timer = MAXPLUSTIMER; | |
1557 } | |
1558 } | |
1559 | |
1560 if (pAMgr) { | |
1561 if (cpdsuggest==1) { | |
1562 if (pAMgr->get_compound()) { | |
1563 rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, 0); //EX
T | |
1564 if (rv && (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || | |
1565 !(TESTAFF(rv2->astr,pAMgr->get_forbiddenword(),rv2->alen) || | |
1566 TESTAFF(rv2->astr,pAMgr->get_nosuggest(),rv2->alen)))) return 3; //
XXX obsolote categorisation + only ICONV needs affix flag check? | |
1567 } | |
1568 return 0; | |
1569 } | |
1570 | |
1571 rv = pAMgr->lookup(word); | |
1572 | |
1573 if (rv) { | |
1574 if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen) | |
1575 || TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen))) return 0; | |
1576 while (rv) { | |
1577 if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) |
| | |
1578 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || | |
1579 TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) { | |
1580 rv = rv->next_homonym; | |
1581 } else break; | |
1582 } | |
1583 } else rv = pAMgr->prefix_check(word, len, 0); // only prefix, and prefix +
suffix XXX | |
1584 | |
1585 if (rv) { | |
1586 nosuffix=1; | |
1587 } else { | |
1588 rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, NULL); // only suf
fix | |
1589 } | |
1590 | |
1591 if (!rv && pAMgr->have_contclass()) { | |
1592 rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL); | |
1593 if (!rv) rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL); | |
1594 } | |
1595 | |
1596 // check forbidden words | |
1597 if ((rv) && (rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->a
len) || | |
1598 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || | |
1599 TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen) || | |
1600 TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) return 0; | |
1601 | |
1602 if (rv) { // XXX obsolote | |
1603 if ((pAMgr->get_compoundflag()) && | |
1604 TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) return 2 + nos
uffix; | |
1605 return 1; | |
1606 } | |
1607 } | |
1608 return 0; | |
1609 } | |
1610 | |
1611 int SuggestMgr::check_forbidden(const char * word, int len) | |
1612 { | |
1613 struct hentry * rv = NULL; | |
1614 | |
1615 if (pAMgr) { | |
1616 rv = pAMgr->lookup(word); | |
1617 if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) || | |
1618 TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL; | |
1619 if (!(pAMgr->prefix_check(word,len,1))) | |
1620 rv = pAMgr->suffix_check(word,len, 0, NULL, NULL, 0, NULL); // prefix+su
ffix, suffix | |
1621 // check forbidden words | |
1622 if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->al
en)) return 1; | |
1623 } | |
1624 return 0; | |
1625 } | |
1626 | |
1627 #ifdef HUNSPELL_EXPERIMENTAL | |
1628 // suggest possible stems | |
1629 int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug) | |
1630 { | |
1631 char ** wlst; | |
1632 | |
1633 struct hentry * rv = NULL; | |
1634 | |
1635 char w2[MAXSWUTF8L]; | |
1636 const char * word = w; | |
1637 | |
1638 // word reversing wrapper for complex prefixes | |
1639 if (complexprefixes) { | |
1640 strcpy(w2, w); | |
1641 if (utf8) reverseword_utf(w2); else reverseword(w2); | |
1642 word = w2; | |
1643 } | |
1644 | |
1645 int wl = strlen(word); | |
1646 | |
1647 | |
1648 if (*slst) { | |
1649 wlst = *slst; | |
1650 } else { | |
1651 wlst = (char **) calloc(maxSug, sizeof(char *)); | |
1652 if (wlst == NULL) return -1; | |
1653 } | |
1654 | |
1655 rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug); | |
1656 | |
1657 // delete dash from end of word | |
1658 if (nsug > 0) { | |
1659 for (int j=0; j < nsug; j++) { | |
1660 if (wlst[j][strlen(wlst[j]) - 1] == '-') wlst[j][strlen(wlst[j]) - 1
] = '\0'; | |
1661 } | |
1662 } | |
1663 | |
1664 *slst = wlst; | |
1665 return nsug; | |
1666 } | |
1667 #endif // END OF HUNSPELL_EXPERIMENTAL CODE | |
1668 | |
1669 | |
1670 char * SuggestMgr::suggest_morph(const char * w) | |
1671 { | |
1672 char result[MAXLNLEN]; | |
1673 char * r = (char *) result; | |
1674 char * st; | |
1675 | |
1676 struct hentry * rv = NULL; | |
1677 | |
1678 *result = '\0'; | |
1679 | |
1680 if (! pAMgr) return NULL; | |
1681 | |
1682 char w2[MAXSWUTF8L]; | |
1683 const char * word = w; | |
1684 | |
1685 // word reversing wrapper for complex prefixes | |
1686 if (complexprefixes) { | |
1687 strcpy(w2, w); | |
1688 if (utf8) reverseword_utf(w2); else reverseword(w2); | |
1689 word = w2; | |
1690 } | |
1691 | |
1692 rv = pAMgr->lookup(word); | |
1693 | |
1694 while (rv) { | |
1695 if ((!rv->astr) || !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->a
len) || | |
1696 TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || | |
1697 TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) { | |
1698 if (!HENTRY_FIND(rv, MORPH_STEM)) { | |
1699 mystrcat(result, " ", MAXLNLEN);
| |
1700 mystrcat(result, MORPH_STEM, MAXLNLEN); | |
1701 mystrcat(result, word, MAXLNLEN); | |
1702 } | |
1703 if (HENTRY_DATA(rv)) { | |
1704 mystrcat(result, " ", MAXLNLEN);
| |
1705 mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); | |
1706 } | |
1707 mystrcat(result, "\n", MAXLNLEN); | |
1708 } | |
1709 rv = rv->next_homonym; | |
1710 } | |
1711 | |
1712 st = pAMgr->affix_check_morph(word,strlen(word)); | |
1713 if (st) { | |
1714 mystrcat(result, st, MAXLNLEN); | |
1715 free(st); | |
1716 } | |
1717 | |
1718 if (pAMgr->get_compound() && (*result == '\0')) | |
1719 pAMgr->compound_check_morph(word, strlen(word), | |
1720 0, 0, 100, 0,NULL, 0, &r, NULL); | |
1721 | |
1722 return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL; | |
1723 } | |
1724 | |
1725 #ifdef HUNSPELL_EXPERIMENTAL | |
1726 char * SuggestMgr::suggest_morph_for_spelling_error(const char * word) | |
1727 { | |
1728 char * p = NULL; | |
1729 char ** wlst = (char **) calloc(maxSug, sizeof(char *)); | |
1730 if (!**wlst) return NULL; | |
1731 // we will use only the first suggestion | |
1732 for (int i = 0; i < maxSug - 1; i++) wlst[i] = ""; | |
1733 int ns = suggest(&wlst, word, maxSug - 1, NULL); | |
1734 if (ns == maxSug) { | |
1735 p = suggest_morph(wlst[maxSug - 1]); | |
1736 free(wlst[maxSug - 1]); | |
1737 } | |
1738 if (wlst) free(wlst); | |
1739 return p; | |
1740 } | |
1741 #endif // END OF HUNSPELL_EXPERIMENTAL CODE | |
1742 | |
1743 /* affixation */ | |
1744 char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern) | |
1745 { | |
1746 char result[MAXLNLEN]; | |
1747 *result = '\0'; | |
1748 int sfxcount = get_sfxcount(pattern); | |
1749 | |
1750 if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL; | |
1751 | |
1752 if (HENTRY_DATA(rv)) { | |
1753 char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->al
en, | |
1754 HENTRY_DATA(rv), pattern, 0); | |
1755 if (aff) { | |
1756 mystrcat(result, aff, MAXLNLEN); | |
1757 mystrcat(result, "\n", MAXLNLEN); | |
1758 free(aff); | |
1759 } | |
1760 } | |
1761 | |
1762 // check all allomorphs | |
1763 char allomorph[MAXLNLEN]; | |
1764 char * p = NULL; | |
1765 if (HENTRY_DATA(rv)) p = (char *) strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); | |
1766 while (p) { | |
1767 struct hentry * rv2 = NULL; | |
1768 p += MORPH_TAG_LEN; | |
1769 int plen = fieldlen(p); | |
1770 strncpy(allomorph, p, plen); | |
1771 allomorph[plen] = '\0'; | |
1772 rv2 = pAMgr->lookup(allomorph); | |
1773 while (rv2) { | |
1774 // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= sfxcount
) { | |
1775 if (HENTRY_DATA(rv2)) { | |
1776 char * st = (char *) strstr(HENTRY_DATA2(rv2), MORPH_STEM); | |
1777 if (st && (strncmp(st + MORPH_TAG_LEN, | |
1778 HENTRY_WORD(rv), fieldlen(st + MORPH_TAG_LEN)) == 0)) { | |
1779 char * aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv
2->astr, rv2->alen, | |
1780 HENTRY_DATA(rv2), pattern, 0); | |
1781 if (aff) { | |
1782 mystrcat(result, aff, MAXLNLEN); | |
1783 mystrcat(result, "\n", MAXLNLEN); | |
1784 free(aff); | |
1785 } | |
1786 } | |
1787 } | |
1788 rv2 = rv2->next_homonym; | |
1789 } | |
1790 p = strstr(p + plen, MORPH_ALLOMORPH); | |
1791 } | |
1792 | |
1793 return (*result) ? mystrdup(result) : NULL; | |
1794 } | |
1795 | |
1796 char * SuggestMgr::suggest_gen(char ** desc, int n, char * pattern) { | |
1797 char result[MAXLNLEN]; | |
1798 char result2[MAXLNLEN]; | |
1799 char newpattern[MAXLNLEN]; | |
1800 *newpattern = '\0'; | |
1801 if (n == 0) return 0; | |
1802 *result2 = '\0'; | |
1803 struct hentry * rv = NULL; | |
1804 if (!pAMgr) return NULL; | |
1805 | |
1806 // search affixed forms with and without derivational suffixes | |
1807 while(1) { | |
1808 | |
1809 for (int k = 0; k < n; k++) { | |
1810 *result = '\0'; | |
1811 // add compound word parts (except the last one) | |
1812 char * s = (char *) desc[k]; | |
1813 char * part = strstr(s, MORPH_PART); | |
1814 if (part) { | |
1815 char * nextpart = strstr(part + 1, MORPH_PART); | |
1816 while (nextpart) { | |
1817 copy_field(result + strlen(result), part, MORPH_PART); | |
1818 part = nextpart; | |
1819 nextpart = strstr(part + 1, MORPH_PART); | |
1820 } | |
1821 s = part; | |
1822 } | |
1823 | |
1824 char **pl; | |
1825 char tok[MAXLNLEN]; | |
1826 strcpy(tok, s); | |
1827 char * alt = strstr(tok, " | "); | |
1828 while (alt) { | |
1829 alt[1] = MSEP_ALT; | |
1830 alt = strstr(alt, " | "); | |
1831 } | |
1832 int pln = line_tok(tok, &pl, MSEP_ALT); | |
1833 for (int i = 0; i < pln; i++) { | |
1834 // remove inflectional and terminal suffixes | |
1835 char * is = strstr(pl[i], MORPH_INFL_SFX); | |
1836 if (is) *is = '\0'; | |
1837 char * ts = strstr(pl[i], MORPH_TERM_SFX); | |
1838 while (ts) { | |
1839 *ts = '_'; | |
1840 ts = strstr(pl[i], MORPH_TERM_SFX); | |
1841 } | |
1842 char * st = strstr(s, MORPH_STEM); | |
1843 if (st) { | |
1844 copy_field(tok, st, MORPH_STEM); | |
1845 rv = pAMgr->lookup(tok); | |
1846 while (rv) { | |
1847 char newpat[MAXLNLEN]; | |
1848 strcpy(newpat, pl[i]); | |
1849 strcat(newpat, pattern); | |
1850 char * sg = suggest_hentry_gen(rv, newpat); | |
1851 if (!sg) sg = suggest_hentry_gen(rv, pattern); | |
1852 if (sg) { | |
1853 char ** gen; | |
1854 int genl = line_tok(sg, &gen, MSEP_REC); | |
1855 free(sg); | |
1856 sg = NULL; | |
1857 for (int j = 0; j < genl; j++) { | |
1858 if (strstr(pl[i], MORPH_SURF_PFX)) { | |
1859 int r2l = strlen(result2); | |
1860 result2[r2l] = MSEP_REC; | |
1861 strcpy(result2 + r2l + 1, result); | |
1862 copy_field(result2 + strlen(result2), pl[i], MOR
PH_SURF_PFX); | |
1863 mystrcat(result2, gen[j], MAXLNLEN); | |
1864 } else { | |
1865 sprintf(result2 + strlen(result2), "%c%s%s", | |
1866 MSEP_REC, result, gen[j]); | |
1867 } | |
1868 } | |
1869 freelist(&gen, genl); | |
1870 } | |
1871 rv = rv->next_homonym; | |
1872 } | |
1873 } | |
1874 } | |
1875 freelist(&pl, pln); | |
1876 } | |
1877 | |
1878 if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) break; | |
1879 strcpy(newpattern, pattern); | |
1880 pattern = newpattern; | |
1881 char * ds = strstr(pattern, MORPH_DERI_SFX); | |
1882 while (ds) { | |
1883 strncpy(ds, MORPH_TERM_SFX, MORPH_TAG_LEN); | |
1884 ds = strstr(pattern, MORPH_DERI_SFX); | |
1885 } | |
1886 } | |
1887 return (*result2 ? mystrdup(result2) : NULL); | |
1888 } | |
1889 | |
1890 | |
1891 // generate an n-gram score comparing s1 and s2 | |
1892 int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt) | |
1893 { | |
1894 int nscore = 0; | |
1895 int ns; | |
1896 int l1; | |
1897 int l2; | |
1898 int test = 0; | |
1899 | |
1900 if (utf8) { | |
1901 w_char su1[MAXSWL]; | |
1902 w_char su2[MAXSWL]; | |
1903 l1 = u8_u16(su1, MAXSWL, s1); | |
1904 l2 = u8_u16(su2, MAXSWL, s2); | |
1905 if ((l2 <= 0) || (l1 == -1)) return 0; | |
1906 // lowering dictionary word | |
1907 if (opt & NGRAM_LOWERING) mkallsmall_utf(su2, l2, langnum); | |
1908 for (int j = 1; j <= n; j++) { | |
1909 ns = 0; | |
1910 for (int i = 0; i <= (l1-j); i++) { | |
1911 int k = 0; | |
1912 for (int l = 0; l <= (l2-j); l++) { | |
1913 for (k = 0; k < j; k++) { | |
1914 w_char * c1 = su1 + i + k; | |
1915 w_char * c2 = su2 + l + k; | |
1916 if ((c1->l != c2->l) || (c1->h != c2->h)) break; | |
1917 } | |
1918 if (k == j) { | |
1919 ns++; | |
1920 break; | |
1921 } | |
1922 } | |
1923 if (k != j && opt & NGRAM_WEIGHTED) { | |
1924 ns--; | |
1925 test++; | |
1926 if (i == 0 || i == l1-j) ns--; // side weight | |
1927 } | |
1928 } | |
1929 nscore = nscore + ns; | |
1930 if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break; | |
1931 } | |
1932 } else { | |
1933 l2 = strlen(s2); | |
1934 if (l2 == 0) return 0; | |
1935 l1 = strlen(s1); | |
1936 char *t = mystrdup(s2); | |
1937 if (opt & NGRAM_LOWERING) mkallsmall(t, csconv); | |
1938 for (int j = 1; j <= n; j++) { | |
1939 ns = 0; | |
1940 for (int i = 0; i <= (l1-j); i++) { | |
1941 char c = *(s1 + i + j); | |
1942 *(s1 + i + j) = '\0'; | |
1943 if (strstr(t,(s1+i))) { | |
1944 ns++; | |
1945 } else if (opt & NGRAM_WEIGHTED) { | |
1946 ns--; | |
1947 test++; | |
1948 if (i == 0 || i == l1-j) ns--; // side weight | |
1949 } | |
1950 *(s1 + i + j ) = c; | |
1951 } | |
1952 nscore = nscore + ns; | |
1953 if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break; | |
1954 } | |
1955 free(t); | |
1956 } | |
1957 | |
1958 ns = 0; | |
1959 if (opt & NGRAM_LONGER_WORSE) ns = (l2-l1)-2; | |
1960 if (opt & NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2; | |
1961 ns = (nscore - ((ns > 0) ? ns : 0)); | |
1962 return ns; | |
1963 } | |
1964 | |
1965 // length of the left common substring of s1 and (decapitalised) s2 | |
1966 int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) { | |
1967 if (utf8) { | |
1968 w_char su1[MAXSWL]; | |
1969 w_char su2[MAXSWL]; | |
1970 su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0; | |
1971 // decapitalize dictionary word | |
1972 if (complexprefixes) { | |
1973 int l1 = u8_u16(su1, MAXSWL, s1); | |
1974 int l2 = u8_u16(su2, MAXSWL, s2); | |
1975 if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1; | |
1976 } else { | |
1977 int i; | |
1978 u8_u16(su1, 1, s1); | |
1979 u8_u16(su2, 1, s2); | |
1980 unsigned short idx = (su2->h << 8) + su2->l; | |
1981 unsigned short otheridx = (su1->h << 8) + su1->l; | |
1982 if (otheridx != idx && | |
1983 (otheridx != unicodetolower(idx, langnum))) return 0; | |
1984 int l1 = u8_u16(su1, MAXSWL, s1); | |
1985 int l2 = u8_u16(su2, MAXSWL, s2); | |
1986 for(i = 1; (i < l1) && (i < l2) && | |
1987 (su1[i].l == su2[i].l) && (su1[i].h == su2[i].h); i++); | |
1988 return i; | |
1989 } | |
1990 } else { | |
1991 if (complexprefixes) { | |
1992 int l1 = strlen(s1); | |
1993 int l2 = strlen(s2); | |
1994 if (*(s2+l1-1) == *(s2+l2-1)) return 1; | |
1995 } else { | |
1996 char * olds = s1; | |
1997 // decapitalise dictionary word | |
1998 if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) return 0
; | |
1999 do { | |
2000 s1++; s2++; | |
2001 } while ((*s1 == *s2) && (*s1 != '\0')); | |
2002 return (int)(s1 - olds); | |
2003 } | |
2004 } | |
2005 return 0; | |
2006 } | |
2007 | |
2008 int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw
ap) { | |
2009 int num = 0; | |
2010 int diff = 0; | |
2011 int diffpos[2]; | |
2012 *is_swap = 0; | |
2013 if (utf8) { | |
2014 w_char su1[MAXSWL]; | |
2015 w_char su2[MAXSWL]; | |
2016 int l1 = u8_u16(su1, MAXSWL, s1); | |
2017 int l2 = u8_u16(su2, MAXSWL, s2); | |
2018 // decapitalize dictionary word | |
2019 if (complexprefixes) { | |
2020 mkallsmall_utf(su2+l2-1, 1, langnum); | |
2021 } else { | |
2022 mkallsmall_utf(su2, 1, langnum); | |
2023 } | |
2024 for (int i = 0; (i < l1) && (i < l2); i++) { | |
2025 if (((short *) su1)[i] == ((short *) su2)[i]) { | |
2026 num++; | |
2027 } else { | |
2028 if (diff < 2) diffpos[diff] = i; | |
2029 diff++; | |
2030 } | |
2031 } | |
2032 if ((diff == 2) && (l1 == l2) && | |
2033 (((short *) su1)[diffpos[0]] == ((short *) su2)[diffpos[1]]) && | |
2034 (((short *) su1)[diffpos[1]] == ((short *) su2)[diffpos[0]])) *is_swap =
1; | |
2035 } else { | |
2036 int i; | |
2037 char t[MAXSWUTF8L]; | |
2038 strcpy(t, s2); | |
2039 // decapitalize dictionary word | |
2040 if (complexprefixes) { | |
2041 int l2 = strlen(t); | |
2042 *(t+l2-1) = csconv[((unsigned char)*(t+l2-1))].clower; | |
2043 } else { | |
2044 mkallsmall(t, csconv); | |
2045 } | |
2046 for (i = 0; (*(s1+i) != 0) && (*(t+i) != 0); i++) { | |
2047 if (*(s1+i) == *(t+i)) { | |
2048 num++; | |
2049 } else { | |
2050 if (diff < 2) diffpos[diff] = i; | |
2051 diff++; | |
2052 } | |
2053 } | |
2054 if ((diff == 2) && (*(s1+i) == 0) && (*(t+i) == 0) && | |
2055 (*(s1+diffpos[0]) == *(t+diffpos[1])) && | |
2056 (*(s1+diffpos[1]) == *(t+diffpos[0]))) *is_swap = 1; | |
2057 } | |
2058 return num; | |
2059 } | |
2060 | |
2061 int SuggestMgr::mystrlen(const char * word) { | |
2062 if (utf8) { | |
2063 w_char w[MAXSWL]; | |
2064 return u8_u16(w, MAXSWL, word); | |
2065 } else return strlen(word); | |
2066 } | |
2067 | |
2068 // sort in decreasing order of score | |
2069 void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n ) | |
2070 { | |
2071 int m = 1; | |
2072 while (m < n) { | |
2073 int j = m; | |
2074 while (j > 0) { | |
2075 if (rsc[j-1] < rsc[j]) { | |
2076 int sctmp = rsc[j-1]; | |
2077 char * wdtmp = rword[j-1]; | |
2078 rsc[j-1] = rsc[j]; | |
2079 rword[j-1] = rword[j]; | |
2080 rsc[j] = sctmp; | |
2081 rword[j] = wdtmp; | |
2082 if (rword2) { | |
2083 wdtmp = rword2[j-1]; | |
2084 rword2[j-1] = rword2[j]; | |
2085 rword2[j] = wdtmp; | |
2086 } | |
2087 j--; | |
2088 } else break; | |
2089 } | |
2090 m++; | |
2091 } | |
2092 return; | |
2093 } | |
2094 | |
2095 // longest common subsequence | |
2096 void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char *
* result) { | |
2097 int n, m; | |
2098 w_char su[MAXSWL]; | |
2099 w_char su2[MAXSWL]; | |
2100 char * b; | |
2101 char * c; | |
2102 int i; | |
2103 int j; | |
2104 if (utf8) { | |
2105 m = u8_u16(su, MAXSWL, s); | |
2106 n = u8_u16(su2, MAXSWL, s2); | |
2107 } else { | |
2108 m = strlen(s); | |
2109 n = strlen(s2); | |
2110 } | |
2111 c = (char *) calloc(m + 1, n + 1); | |
2112 b = (char *) calloc(m + 1, n + 1); | |
2113 if (!c || !b) { | |
2114 if (c) free(c); | |
2115 if (b) free(b); | |
2116 *result = NULL; | |
2117 return; | |
2118 } | |
2119 for (i = 1; i <= m; i++) { | |
2120 for (j = 1; j <= n; j++) { | |
2121 if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1))) | |
2122 || ((!utf8) && ((*(s+i-1)) == (*(s2+j-1))))) { | |
2123 c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1; | |
2124 b[i*(n+1) + j] = LCS_UPLEFT; | |
2125 } else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) { | |
2126 c[i*(n+1) + j] = c[(i-1)*(n+1) + j]; | |
2127 b[i*(n+1) + j] = LCS_UP; | |
2128 } else { | |
2129 c[i*(n+1) + j] = c[i*(n+1) + j-1]; | |
2130 b[i*(n+1) + j] = LCS_LEFT; | |
2131 } | |
2132 } | |
2133 } | |
2134 *result = b; | |
2135 free(c); | |
2136 *l1 = m; | |
2137 *l2 = n; | |
2138 } | |
2139 | |
2140 int SuggestMgr::lcslen(const char * s, const char* s2) { | |
2141 int m; | |
2142 int n; | |
2143 int i; | |
2144 int j; | |
2145 char * result; | |
2146 int len = 0; | |
2147 lcs(s, s2, &m, &n, &result); | |
2148 if (!result) return 0; | |
2149 i = m; | |
2150 j = n; | |
2151 while ((i != 0) && (j != 0)) { | |
2152 if (result[i*(n+1) + j] == LCS_UPLEFT) { | |
2153 len++; | |
2154 i--; | |
2155 j--; | |
2156 } else if (result[i*(n+1) + j] == LCS_UP) { | |
2157 i--; | |
2158 } else j--; | |
2159 } | |
2160 free(result); | |
2161 return len; | |
2162 } | |
OLD | NEW |