Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(265)

Side by Side Diff: third_party/hunspell_new/src/hunspell/affentry.cxx

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #include "license.hunspell"
2 #include "license.myspell"
3
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7 #include <ctype.h>
8
9 #include "affentry.hxx"
10 #include "csutil.hxx"
11
12 PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
13 {
14 // register affix manager
15 pmyMgr = pmgr;
16
17 // set up its initial values
18
19 aflag = dp->aflag; // flag
20 strip = dp->strip; // string to strip
21 appnd = dp->appnd; // string to append
22 stripl = dp->stripl; // length of strip string
23 appndl = dp->appndl; // length of append string
24 numconds = dp->numconds; // length of the condition
25 opts = dp->opts; // cross product flag
26 // then copy over all of the conditions
27 if (opts & aeLONGCOND) {
28 memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
29 c.l.conds2 = dp->c.l.conds2;
30 } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
31 next = NULL;
32 nextne = NULL;
33 nexteq = NULL;
34 morphcode = dp->morphcode;
35 contclass = dp->contclass;
36 contclasslen = dp->contclasslen;
37 }
38
39
40 PfxEntry::~PfxEntry()
41 {
42 aflag = 0;
43 if (appnd) free(appnd);
44 if (strip) free(strip);
45 pmyMgr = NULL;
46 appnd = NULL;
47 strip = NULL;
48 if (opts & aeLONGCOND) free(c.l.conds2);
49 if (morphcode && !(opts & aeALIASM)) free(morphcode);
50 if (contclass && !(opts & aeALIASF)) free(contclass);
51 }
52
53 // add prefix to this word assuming conditions hold
54 char * PfxEntry::add(const char * word, int len)
55 {
56 char tword[MAXWORDUTF8LEN + 4];
57
58 if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
59 (len >= numconds) && test_condition(word) &&
60 (!stripl || (strncmp(word, strip, stripl) == 0)) &&
61 ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
62 /* we have a match so add prefix */
63 char * pp = tword;
64 if (appndl) {
65 strcpy(tword,appnd);
66 pp += appndl;
67 }
68 strcpy(pp, (word + stripl));
69 return mystrdup(tword);
70 }
71 return NULL;
72 }
73
74 inline char * PfxEntry::nextchar(char * p) {
75 if (p) {
76 p++;
77 if (opts & aeLONGCOND) {
78 // jump to the 2nd part of the condition
79 if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
80 // end of the MAXCONDLEN length condition
81 } else if (p == c.conds + MAXCONDLEN) return NULL;
82 return *p ? p : NULL;
83 }
84 return NULL;
85 }
86
87 inline int PfxEntry::test_condition(const char * st)
88 {
89 const char * pos = NULL; // group with pos input position
90 bool neg = false; // complementer
91 bool ingroup = false; // character in the group
92 if (numconds == 0) return 1;
93 char * p = c.conds;
94 while (1) {
95 switch (*p) {
96 case '\0': return 1;
97 case '[': {
98 neg = false;
99 ingroup = false;
100 p = nextchar(p);
101 pos = st; break;
102 }
103 case '^': { p = nextchar(p); neg = true; break; }
104 case ']': {
105 if ((neg && ingroup) || (!neg && !ingroup)) return 0;
106 pos = NULL;
107 p = nextchar(p);
108 // skip the next character
109 if (!ingroup && *st) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
110 if (*st == '\0' && p) return 0; // word <= condition
111 break;
112 }
113 case '.': if (!pos) { // dots are not metacharacters in groups: [.]
114 p = nextchar(p);
115 // skip the next character
116 for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
117 if (*st == '\0' && p) return 0; // word <= condition
118 break;
119 }
120 default: {
121 if (*st == *p) {
122 st++;
123 p = nextchar(p);
124 if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
125 while (p && (*p & 0xc0) == 0x80) { // character
126 if (*p != *st) {
127 if (!pos) return 0;
128 st = pos;
129 break;
130 }
131 p = nextchar(p);
132 st++;
133 }
134 if (pos && st != pos) {
135 ingroup = true;
136 while (p && *p != ']' && ((p = nextchar(p)) != NULL) );
137 }
138 } else if (pos) {
139 ingroup = true;
140 while (p && *p != ']' && ((p = nextchar(p)) != NULL));
141 }
142 } else if (pos) { // group
143 p = nextchar(p);
144 } else return 0;
145 }
146 }
147 if (!p) return 1;
148 }
149 }
150
151 // check if this prefix entry matches
152 struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound , const FLAG needflag)
153 {
154 int tmpl; // length of tmpword
155 struct hentry * he; // hash entry of root word or NULL
156 char tmpword[MAXWORDUTF8LEN + 4];
157
158 // on entry prefix is 0 length or already matches the beginning of the word.
159 // So if the remaining root word has positive length
160 // and if there are enough chars in root word and added back strip chars
161 // to meet the number of characters conditions, then test it
162
163 tmpl = len - appndl;
164
165 if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
166
167 // generate new root word by removing prefix and adding
168 // back any characters that would have been stripped
169
170 if (stripl) strcpy (tmpword, strip);
171 strcpy ((tmpword + stripl), (word + appndl));
172
173 // now make sure all of the conditions on characters
174 // are met. Please see the appendix at the end of
175 // this file for more info on exactly what is being
176 // tested
177
178 // if all conditions are met then check if resulting
179 // root word in the dictionary
180
181 if (test_condition(tmpword)) {
182 tmpl += stripl;
183 if ((he = pmyMgr->lookup(tmpword)) != NULL) {
184 do {
185 if (TESTAFF(he->astr, aflag, he->alen) &&
186 // forbid single prefixes with needaffix flag
187 ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclassl en) &&
188 // needflag
189 ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
190 (contclass && TESTAFF(contclass, needflag, contclasslen ))))
191 return he;
192 he = he->next_homonym; // check homonyms
193 } while (he);
194 }
195
196 // prefix matched but no root word was found
197 // if aeXPRODUCT is allowed, try again but now
198 // ross checked combined with a suffix
199
200 //if ((opts & aeXPRODUCT) && in_compound) {
201 if ((opts & aeXPRODUCT)) {
202 he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NU LL,
203 0, NULL, FLAG_NULL, needflag, in_compound);
204 if (he) return he;
205 }
206 }
207 }
208 return NULL;
209 }
210
211 // check if this prefix entry matches
212 struct hentry * PfxEntry::check_twosfx(const char * word, int len,
213 char in_compound, const FLAG needflag)
214 {
215 int tmpl; // length of tmpword
216 struct hentry * he; // hash entry of root word or NULL
217 char tmpword[MAXWORDUTF8LEN + 4];
218
219 // on entry prefix is 0 length or already matches the beginning of the word.
220 // So if the remaining root word has positive length
221 // and if there are enough chars in root word and added back strip chars
222 // to meet the number of characters conditions, then test it
223
224 tmpl = len - appndl;
225
226 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
227 (tmpl + stripl >= numconds)) {
228
229 // generate new root word by removing prefix and adding
230 // back any characters that would have been stripped
231
232 if (stripl) strcpy (tmpword, strip);
233 strcpy ((tmpword + stripl), (word + appndl));
234
235 // now make sure all of the conditions on characters
236 // are met. Please see the appendix at the end of
237 // this file for more info on exactly what is being
238 // tested
239
240 // if all conditions are met then check if resulting
241 // root word in the dictionary
242
243 if (test_condition(tmpword)) {
244 tmpl += stripl;
245
246 // prefix matched but no root word was found
247 // if aeXPRODUCT is allowed, try again but now
248 // cross checked combined with a suffix
249
250 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
251 he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, t his, needflag);
252 if (he) return he;
253 }
254 }
255 }
256 return NULL;
257 }
258
259 // check if this prefix entry matches
260 char * PfxEntry::check_twosfx_morph(const char * word, int len,
261 char in_compound, const FLAG needflag)
262 {
263 int tmpl; // length of tmpword
264 char tmpword[MAXWORDUTF8LEN + 4];
265
266 // on entry prefix is 0 length or already matches the beginning of the word.
267 // So if the remaining root word has positive length
268 // and if there are enough chars in root word and added back strip chars
269 // to meet the number of characters conditions, then test it
270
271 tmpl = len - appndl;
272
273 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
274 (tmpl + stripl >= numconds)) {
275
276 // generate new root word by removing prefix and adding
277 // back any characters that would have been stripped
278
279 if (stripl) strcpy (tmpword, strip);
280 strcpy ((tmpword + stripl), (word + appndl));
281
282 // now make sure all of the conditions on characters
283 // are met. Please see the appendix at the end of
284 // this file for more info on exactly what is being
285 // tested
286
287 // if all conditions are met then check if resulting
288 // root word in the dictionary
289
290 if (test_condition(tmpword)) {
291 tmpl += stripl;
292
293 // prefix matched but no root word was found
294 // if aeXPRODUCT is allowed, try again but now
295 // ross checked combined with a suffix
296
297 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
298 return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
299 aeXPRODUCT, this, needflag);
300 }
301 }
302 }
303 return NULL;
304 }
305
306 // check if this prefix entry matches
307 char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
308 {
309 int tmpl; // length of tmpword
310 struct hentry * he; // hash entry of root word or NULL
311 char tmpword[MAXWORDUTF8LEN + 4];
312 char result[MAXLNLEN];
313 char * st;
314
315 *result = '\0';
316
317 // on entry prefix is 0 length or already matches the beginning of the word.
318 // So if the remaining root word has positive length
319 // and if there are enough chars in root word and added back strip chars
320 // to meet the number of characters conditions, then test it
321
322 tmpl = len - appndl;
323
324 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
325 (tmpl + stripl >= numconds)) {
326
327 // generate new root word by removing prefix and adding
328 // back any characters that would have been stripped
329
330 if (stripl) strcpy (tmpword, strip);
331 strcpy ((tmpword + stripl), (word + appndl));
332
333 // now make sure all of the conditions on characters
334 // are met. Please see the appendix at the end of
335 // this file for more info on exactly what is being
336 // tested
337
338 // if all conditions are met then check if resulting
339 // root word in the dictionary
340
341 if (test_condition(tmpword)) {
342 tmpl += stripl;
343 if ((he = pmyMgr->lookup(tmpword)) != NULL) {
344 do {
345 if (TESTAFF(he->astr, aflag, he->alen) &&
346 // forbid single prefixes with needaffix flag
347 ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclassl en) &&
348 // needflag
349 ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
350 (contclass && TESTAFF(contclass, needflag, contclasslen )))) {
351 if (morphcode) {
352 mystrcat(result, " ", MAXLNLEN);
353 mystrcat(result, morphcode, MAXLNLEN);
354 } else mystrcat(result,getKey(), MAXLNLEN);
355 if (!HENTRY_FIND(he, MORPH_STEM)) {
356 mystrcat(result, " ", MAXLNLEN);
357 mystrcat(result, MORPH_STEM, MAXLNLEN);
358 mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
359 }
360 // store the pointer of the hash entry
361 if (HENTRY_DATA(he)) {
362 mystrcat(result, " ", MAXLNLEN);
363 mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
364 } else {
365 // return with debug information
366 char * flag = pmyMgr->encode_flag(getFlag());
367 mystrcat(result, " ", MAXLNLEN);
368 mystrcat(result, MORPH_FLAG, MAXLNLEN);
369 mystrcat(result, flag, MAXLNLEN);
370 free(flag);
371 }
372 mystrcat(result, "\n", MAXLNLEN);
373 }
374 he = he->next_homonym;
375 } while (he);
376 }
377
378 // prefix matched but no root word was found
379 // if aeXPRODUCT is allowed, try again but now
380 // ross checked combined with a suffix
381
382 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
383 st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, th is,
384 FLAG_NULL, needflag);
385 if (st) {
386 mystrcat(result, st, MAXLNLEN);
387 free(st);
388 }
389 }
390 }
391 }
392
393 if (*result) return mystrdup(result);
394 return NULL;
395 }
396
397 SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
398 {
399 // register affix manager
400 pmyMgr = pmgr;
401
402 // set up its initial values
403 aflag = dp->aflag; // char flag
404 strip = dp->strip; // string to strip
405 appnd = dp->appnd; // string to append
406 stripl = dp->stripl; // length of strip string
407 appndl = dp->appndl; // length of append string
408 numconds = dp->numconds; // length of the condition
409 opts = dp->opts; // cross product flag
410
411 // then copy over all of the conditions
412 if (opts & aeLONGCOND) {
413 memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
414 c.l.conds2 = dp->c.l.conds2;
415 } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
416 next = NULL;
417 nextne = NULL;
418 nexteq = NULL;
419 rappnd = myrevstrdup(appnd);
420 morphcode = dp->morphcode;
421 contclass = dp->contclass;
422 contclasslen = dp->contclasslen;
423 }
424
425
426 SfxEntry::~SfxEntry()
427 {
428 aflag = 0;
429 if (appnd) free(appnd);
430 if (rappnd) free(rappnd);
431 if (strip) free(strip);
432 pmyMgr = NULL;
433 appnd = NULL;
434 strip = NULL;
435 if (opts & aeLONGCOND) free(c.l.conds2);
436 if (morphcode && !(opts & aeALIASM)) free(morphcode);
437 if (contclass && !(opts & aeALIASF)) free(contclass);
438 }
439
440 // add suffix to this word assuming conditions hold
441 char * SfxEntry::add(const char * word, int len)
442 {
443 char tword[MAXWORDUTF8LEN + 4];
444
445 /* make sure all conditions match */
446 if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
447 (len >= numconds) && test_condition(word + len, word) &&
448 (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
449 ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
450 /* we have a match so add suffix */
451 strcpy(tword,word);
452 if (appndl) {
453 strcpy(tword + len - stripl, appnd);
454 } else {
455 *(tword + len - stripl) = '\0';
456 }
457 return mystrdup(tword);
458 }
459 return NULL;
460 }
461
462 inline char * SfxEntry::nextchar(char * p) {
463 if (p) {
464 p++;
465 if (opts & aeLONGCOND) {
466 // jump to the 2nd part of the condition
467 if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
468 // end of the MAXCONDLEN length condition
469 } else if (p == c.conds + MAXCONDLEN) return NULL;
470 return *p ? p : NULL;
471 }
472 return NULL;
473 }
474
475 inline int SfxEntry::test_condition(const char * st, const char * beg)
476 {
477 const char * pos = NULL; // group with pos input position
478 bool neg = false; // complementer
479 bool ingroup = false; // character in the group
480 if (numconds == 0) return 1;
481 char * p = c.conds;
482 st--;
483 int i = 1;
484 while (1) {
485 switch (*p) {
486 case '\0': return 1;
487 case '[': { p = nextchar(p); pos = st; break; }
488 case '^': { p = nextchar(p); neg = true; break; }
489 case ']': { if (!neg && !ingroup) return 0;
490 i++;
491 // skip the next character
492 if (!ingroup) {
493 for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x8 0; st--);
494 st--;
495 }
496 pos = NULL;
497 neg = false;
498 ingroup = false;
499 p = nextchar(p);
500 if (st < beg && p) return 0; // word <= condition
501 break;
502 }
503 case '.': if (!pos) { // dots are not metacharacters in groups: [.]
504 p = nextchar(p);
505 // skip the next character
506 for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x8 0; st--);
507 if (st < beg) { // word <= condition
508 if (p) return 0; else return 1;
509 }
510 if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 char acter
511 st--;
512 if (st < beg) { // word <= condition
513 if (p) return 0; else return 1;
514 }
515 }
516 break;
517 }
518 default: {
519 if (*st == *p) {
520 p = nextchar(p);
521 if ((opts & aeUTF8) && (*st & 0x80)) {
522 st--;
523 while (p && (st >= beg)) {
524 if (*p != *st) {
525 if (!pos) return 0;
526 st = pos;
527 break;
528 }
529 // first byte of the UTF-8 multibyte character
530 if ((*p & 0xc0) != 0x80) break;
531 p = nextchar(p);
532 st--;
533 }
534 if (pos && st != pos) {
535 if (neg) return 0;
536 else if (i == numconds) return 1;
537 ingroup = true;
538 while (p && *p != ']' && ((p = nextchar(p)) != NULL) );
539 st--;
540 }
541 if (p && *p != ']') p = nextchar(p);
542 } else if (pos) {
543 if (neg) return 0;
544 else if (i == numconds) return 1;
545 ingroup = true;
546 while (p && *p != ']' && ((p = nextchar(p)) != NULL));
547 // if (p && *p != ']') p = nextchar(p);
548 st--;
549 }
550 if (!pos) {
551 i++;
552 st--;
553 }
554 if (st < beg && p && *p != ']') return 0; // word <= conditi on
555 } else if (pos) { // group
556 p = nextchar(p);
557 } else return 0;
558 }
559 }
560 if (!p) return 1;
561 }
562 }
563
564 // see if this suffix is present in the word
565 struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
566 PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
567 const FLAG badflag)
568 {
569 int tmpl; // length of tmpword
570 struct hentry * he; // hash entry pointer
571 unsigned char * cp;
572 char tmpword[MAXWORDUTF8LEN + 4];
573 PfxEntry* ep = ppfx;
574
575 // if this suffix is being cross checked with a prefix
576 // but it does not support cross products skip it
577
578 if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
579 return NULL;
580
581 // upon entry suffix is 0 length or already matches the end of the word.
582 // So if the remaining root word has positive length
583 // and if there are enough chars in root word and added back strip chars
584 // to meet the number of characters conditions, then test it
585
586 tmpl = len - appndl;
587 // the second condition is not enough for UTF-8 strings
588 // it checked in test_condition()
589
590 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
591 (tmpl + stripl >= numconds)) {
592
593 // generate new root word by removing suffix and adding
594 // back any characters that would have been stripped or
595 // or null terminating the shorter string
596
597 strcpy (tmpword, word);
598 cp = (unsigned char *)(tmpword + tmpl);
599 if (stripl) {
600 strcpy ((char *)cp, strip);
601 tmpl += stripl;
602 cp = (unsigned char *)(tmpword + tmpl);
603 } else *cp = '\0';
604
605 // now make sure all of the conditions on characters
606 // are met. Please see the appendix at the end of
607 // this file for more info on exactly what is being
608 // tested
609
610 // if all conditions are met then check if resulting
611 // root word in the dictionary
612
613 if (test_condition((char *) cp, (char *) tmpword)) {
614
615 #ifdef SZOSZABLYA_POSSIBLE_ROOTS
616 fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
617 #endif
618 if ((he = pmyMgr->lookup(tmpword)) != NULL) {
619 do {
620 // check conditional suffix (enabled by prefix)
621 if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->ge tCont() &&
622 TESTAFF(ep->getCont(), aflag, ep->getContLen ()))) &&
623 (((optflags & aeXPRODUCT) == 0) ||
624 (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) | |
625 // enabled by prefix
626 ((contclass) && (ep && TESTAFF(contclass, ep->getFla g(), contclasslen)))
627 ) &&
628 // handle cont. class
629 ((!cclass) ||
630 ((contclass) && TESTAFF(contclass, cclass, contc lasslen))
631 ) &&
632 // check only in compound homonyms (bad flags)
633 (!badflag || !TESTAFF(he->astr, badflag, he->alen)
634 ) &&
635 // handle required flag
636 ((!needflag) ||
637 (TESTAFF(he->astr, needflag, he->alen) ||
638 ((contclass) && TESTAFF(contclass, needflag, contc lasslen)))
639 )
640 ) return he;
641 he = he->next_homonym; // check homonyms
642 } while (he);
643
644 // obsolote stemming code (used only by the
645 // experimental SuffixMgr:suggest_pos_stems)
646 // store resulting root in wlst
647 } else if (wlst && (*ns < maxSug)) {
648 int cwrd = 1;
649 for (int k=0; k < *ns; k++)
650 if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
651 if (cwrd) {
652 wlst[*ns] = mystrdup(tmpword);
653 if (wlst[*ns] == NULL) {
654 for (int j=0; j<*ns; j++) free(wlst[j]);
655 *ns = -1;
656 return NULL;
657 }
658 (*ns)++;
659 }
660 }
661 }
662 }
663 return NULL;
664 }
665
666 // see if two-level suffix is present in the word
667 struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
668 PfxEntry* ppfx, const FLAG needflag)
669 {
670 int tmpl; // length of tmpword
671 struct hentry * he; // hash entry pointer
672 unsigned char * cp;
673 char tmpword[MAXWORDUTF8LEN + 4];
674 PfxEntry* ep = ppfx;
675
676
677 // if this suffix is being cross checked with a prefix
678 // but it does not support cross products skip it
679
680 if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
681 return NULL;
682
683 // upon entry suffix is 0 length or already matches the end of the word.
684 // So if the remaining root word has positive length
685 // and if there are enough chars in root word and added back strip chars
686 // to meet the number of characters conditions, then test it
687
688 tmpl = len - appndl;
689
690 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
691 (tmpl + stripl >= numconds)) {
692
693 // generate new root word by removing suffix and adding
694 // back any characters that would have been stripped or
695 // or null terminating the shorter string
696
697 strcpy (tmpword, word);
698 cp = (unsigned char *)(tmpword + tmpl);
699 if (stripl) {
700 strcpy ((char *)cp, strip);
701 tmpl += stripl;
702 cp = (unsigned char *)(tmpword + tmpl);
703 } else *cp = '\0';
704
705 // now make sure all of the conditions on characters
706 // are met. Please see the appendix at the end of
707 // this file for more info on exactly what is being
708 // tested
709
710 // if all conditions are met then recall suffix_check
711
712 if (test_condition((char *) cp, (char *) tmpword)) {
713 if (ppfx) {
714 // handle conditional suffix
715 if ((contclass) && TESTAFF(contclass, ep->getFlag(), contcla sslen))
716 he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
717 else
718 he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
719 } else {
720 he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, N ULL, (FLAG) aflag, needflag);
721 }
722 if (he) return he;
723 }
724 }
725 return NULL;
726 }
727
728 // see if two-level suffix is present in the word
729 char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
730 PfxEntry* ppfx, const FLAG needflag)
731 {
732 int tmpl; // length of tmpword
733 unsigned char * cp;
734 char tmpword[MAXWORDUTF8LEN + 4];
735 PfxEntry* ep = ppfx;
736 char * st;
737
738 char result[MAXLNLEN];
739
740 *result = '\0';
741
742 // if this suffix is being cross checked with a prefix
743 // but it does not support cross products skip it
744
745 if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
746 return NULL;
747
748 // upon entry suffix is 0 length or already matches the end of the word.
749 // So if the remaining root word has positive length
750 // and if there are enough chars in root word and added back strip chars
751 // to meet the number of characters conditions, then test it
752
753 tmpl = len - appndl;
754
755 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
756 (tmpl + stripl >= numconds)) {
757
758 // generate new root word by removing suffix and adding
759 // back any characters that would have been stripped or
760 // or null terminating the shorter string
761
762 strcpy (tmpword, word);
763 cp = (unsigned char *)(tmpword + tmpl);
764 if (stripl) {
765 strcpy ((char *)cp, strip);
766 tmpl += stripl;
767 cp = (unsigned char *)(tmpword + tmpl);
768 } else *cp = '\0';
769
770 // now make sure all of the conditions on characters
771 // are met. Please see the appendix at the end of
772 // this file for more info on exactly what is being
773 // tested
774
775 // if all conditions are met then recall suffix_check
776
777 if (test_condition((char *) cp, (char *) tmpword)) {
778 if (ppfx) {
779 // handle conditional suffix
780 if ((contclass) && TESTAFF(contclass, ep->getFlag(), contcla sslen)) {
781 st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
782 if (st) {
783 if (ppfx->getMorph()) {
784 mystrcat(result, ppfx->getMorph(), MAXLNLEN);
785 mystrcat(result, " ", MAXLNLEN);
786 }
787 mystrcat(result,st, MAXLNLEN);
788 free(st);
789 mychomp(result);
790 }
791 } else {
792 st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
793 if (st) {
794 mystrcat(result, st, MAXLNLEN);
795 free(st);
796 mychomp(result);
797 }
798 }
799 } else {
800 st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
801 if (st) {
802 mystrcat(result, st, MAXLNLEN);
803 free(st);
804 mychomp(result);
805 }
806 }
807 if (*result) return mystrdup(result);
808 }
809 }
810 return NULL;
811 }
812
813 // get next homonym with same affix
814 struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, Pfx Entry* ppfx,
815 const FLAG cclass, const FLAG needflag)
816 {
817 PfxEntry* ep = ppfx;
818 FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
819
820 while (he->next_homonym) {
821 he = he->next_homonym;
822 if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTA FF(ep->getCont(), aflag, ep->getContLen()))) &&
823 ((optflags & aeXPRODUCT) == 0 ||
824 TESTAFF(he->astr, eFlag, he->alen) ||
825 // handle conditional suffix
826 ((contclass) && TESTAFF(contclass, eFlag, contclassl en))
827 ) &&
828 // handle cont. class
829 ((!cclass) ||
830 ((contclass) && TESTAFF(contclass, cclass, contc lasslen))
831 ) &&
832 // handle required flag
833 ((!needflag) ||
834 (TESTAFF(he->astr, needflag, he->alen) ||
835 ((contclass) && TESTAFF(contclass, needflag, contc lasslen)))
836 )
837 ) return he;
838 }
839 return NULL;
840 }
841
842
843 #if 0
844
845 Appendix: Understanding Affix Code
846
847
848 An affix is either a prefix or a suffix attached to root words to make
849 other words.
850
851 Basically a Prefix or a Suffix is set of AffEntry objects
852 which store information about the prefix or suffix along
853 with supporting routines to check if a word has a particular
854 prefix or suffix or a combination.
855
856 The structure affentry is defined as follows:
857
858 struct affentry
859 {
860 unsigned short aflag; // ID used to represent the affix
861 char * strip; // string to strip before adding affix
862 char * appnd; // the affix string to add
863 unsigned char stripl; // length of the strip string
864 unsigned char appndl; // length of the affix string
865 char numconds; // the number of conditions that must be met
866 char opts; // flag: aeXPRODUCT- combine both prefix and suffix
867 char conds[SETSIZE]; // array which encodes the conditions to be met
868 };
869
870
871 Here is a suffix borrowed from the en_US.aff file. This file
872 is whitespace delimited.
873
874 SFX D Y 4
875 SFX D 0 e d
876 SFX D y ied [^aeiou]y
877 SFX D 0 ed [^ey]
878 SFX D 0 ed [aeiou]y
879
880 This information can be interpreted as follows:
881
882 In the first line has 4 fields
883
884 Field
885 -----
886 1 SFX - indicates this is a suffix
887 2 D - is the name of the character flag which represents this suffix
888 3 Y - indicates it can be combined with prefixes (cross product)
889 4 4 - indicates that sequence of 4 affentry structures are needed to
890 properly store the affix information
891
892 The remaining lines describe the unique information for the 4 SfxEntry
893 objects that make up this affix. Each line can be interpreted
894 as follows: (note fields 1 and 2 are as a check against line 1 info)
895
896 Field
897 -----
898 1 SFX - indicates this is a suffix
899 2 D - is the name of the character flag for this affix
900 3 y - the string of chars to strip off before adding affix
901 (a 0 here indicates the NULL string)
902 4 ied - the string of affix characters to add
903 5 [^aeiou]y - the conditions which must be met before the affix
904 can be applied
905
906 Field 5 is interesting. Since this is a suffix, field 5 tells us that
907 there are 2 conditions that must be met. The first condition is that
908 the next to the last character in the word must *NOT* be any of the
909 following "a", "e", "i", "o" or "u". The second condition is that
910 the last character of the word must end in "y".
911
912 So how can we encode this information concisely and be able to
913 test for both conditions in a fast manner? The answer is found
914 but studying the wonderful ispell code of Geoff Kuenning, et.al.
915 (now available under a normal BSD license).
916
917 If we set up a conds array of 256 bytes indexed (0 to 255) and access it
918 using a character (cast to an unsigned char) of a string, we have 8 bits
919 of information we can store about that character. Specifically we
920 could use each bit to say if that character is allowed in any of the
921 last (or first for prefixes) 8 characters of the word.
922
923 Basically, each character at one end of the word (up to the number
924 of conditions) is used to index into the conds array and the resulting
925 value found there says whether the that character is valid for a
926 specific character position in the word.
927
928 For prefixes, it does this by setting bit 0 if that char is valid
929 in the first position, bit 1 if valid in the second position, and so on.
930
931 If a bit is not set, then that char is not valid for that postion in the
932 word.
933
934 If working with suffixes bit 0 is used for the character closest
935 to the front, bit 1 for the next character towards the end, ...,
936 with bit numconds-1 representing the last char at the end of the string.
937
938 Note: since entries in the conds[] are 8 bits, only 8 conditions
939 (read that only 8 character positions) can be examined at one
940 end of a word (the beginning for prefixes and the end for suffixes.
941
942 So to make this clearer, lets encode the conds array values for the
943 first two affentries for the suffix D described earlier.
944
945
946 For the first affentry:
947 numconds = 1 (only examine the last character)
948
949 conds['e'] = (1 << 0) (the word must end in an E)
950 all others are all 0
951
952 For the second affentry:
953 numconds = 2 (only examine the last two characters)
954
955 conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
956 where X is all characters *but* a, e, i, o, or u
957
958
959 conds['y'] = (1 << 1) (the last char must be a y)
960 all other bits for all other entries in the conds array are zero
961
962
963 #endif
964
OLDNEW
« no previous file with comments | « third_party/hunspell_new/src/hunspell/affentry.hxx ('k') | third_party/hunspell_new/src/hunspell/affixmgr.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698