Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4875)

Unified Diff: chrome/third_party/hunspell/src/hunspell/affentry.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/third_party/hunspell/src/hunspell/affentry.cxx
===================================================================
--- chrome/third_party/hunspell/src/hunspell/affentry.cxx (revision 21721)
+++ chrome/third_party/hunspell/src/hunspell/affentry.cxx (working copy)
@@ -7,9 +7,9 @@
#include <cctype>
#include <cstdio>
#else
-#include <stdlib.h>
+#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
+#include <stdio.h>
#include <ctype.h>
#endif
@@ -17,7 +17,7 @@
#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
@@ -29,22 +29,23 @@
pmyMgr = pmgr;
// set up its intial values
-
- aflag = dp->aflag; // flag
+
+ aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // number of conditions to match
- opts = dp->opts; // cross product flag
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
// then copy over all of the conditions
- memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+ if (opts & aeLONGCOND) {
+ memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
next = NULL;
nextne = NULL;
nexteq = NULL;
-#ifdef HUNSPELL_EXPERIMENTAL
morphcode = dp->morphcode;
-#endif
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
@@ -58,15 +59,8 @@
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
- if (opts & aeUTF8) {
- for (int i = 0; i < numconds; i++) {
- if (conds.utf8.wchars[i])
- free(conds.utf8.wchars[i]);
- }
- }
-#ifdef HUNSPELL_EXPERIMENTAL
+ if (opts & aeLONGCOND) free(c.l.conds2);
if (morphcode && !(opts & aeALIASM)) free(morphcode);
-#endif
if (contclass && !(opts & aeALIASF)) free(contclass);
}
@@ -75,8 +69,9 @@
{
char tword[MAXWORDUTF8LEN + 4];
- if ((len > stripl) && (len >= numconds) && test_condition(word) &&
- (!stripl || (strncmp(word, strip, stripl) == 0)) &&
+ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word) &&
+ (!stripl || (strncmp(word, strip, stripl) == 0)) &&
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add prefix */
char * pp = tword;
@@ -87,51 +82,87 @@
strcpy(pp, (word + stripl));
return mystrdup(tword);
}
- return NULL;
+ return NULL;
}
+inline char * PfxEntry::nextchar(char * p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN) return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
inline int PfxEntry::test_condition(const char * st)
{
- int cond;
- unsigned char * cp = (unsigned char *)st;
- if (!(opts & aeUTF8)) { // 256-character codepage
- for (cond = 0; cond < numconds; cond++) {
- if ((conds.base[*cp++] & (1 << cond)) == 0) return 0;
- }
- } else { // UTF-8 encoding
- unsigned short wc;
- for (cond = 0; cond < numconds; cond++) {
- // a simple 7-bit ASCII character in UTF-8
- if ((*cp >> 7) == 0) {
- // also check limit (end of word)
- if ((!*cp) || ((conds.utf8.ascii[*cp++] & (1 << cond)) == 0)) return 0;
- // UTF-8 multibyte character
- } else {
- // not dot wildcard in rule
- if (!conds.utf8.all[cond]) {
- if (conds.utf8.neg[cond]) {
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (conds.utf8.wchars[cond] &&
- flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short) conds.utf8.wlen[cond])) return 0;
- } else {
- if (!conds.utf8.wchars[cond]) return 0;
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short)conds.utf8.wlen[cond])) return 0;
- }
+ const char * pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0) return 1;
+ char * p = c.conds;
+ while (1) {
+ switch (*p) {
+ case '\0': return 1;
+ case '[': {
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ pos = st; break;
}
- // jump to next UTF-8 character
- for(cp++; (*cp & 0xc0) == 0x80; cp++);
- }
+ case '^': { p = nextchar(p); neg = true; break; }
+ case ']': {
+ if ((neg && ingroup) || (!neg && !ingroup)) return 0;
+ pos = NULL;
+ p = nextchar(p);
+ // skip the next character
+ if (!ingroup) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
+ if (*st == '\0' && p) return 0; // word <= condition
+ break;
+ }
+ case '.': if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
+ if (*st == '\0' && p) return 0; // word <= condition
+ break;
+ }
+ default: {
+ if (*st == *p) {
+ st++;
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
+ while (p && (*p & 0xc0) == 0x80) { // character
+ if (*p != *st) {
+ if (!pos) return 0;
+ st = pos;
+ break;
+ }
+ p = nextchar(p);
+ st++;
+ }
+ if (pos && st != pos) {
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ }
+ } else if (pos) {
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ }
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else return 0;
+ }
}
+ if (!p) return 1;
}
- return 1;
}
-
-// check if this prefix entry matches
+// check if this prefix entry matches
struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound, const FLAG needflag)
{
int tmpl; // length of tmpword
@@ -145,7 +176,7 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -166,8 +197,8 @@
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with pseudoroot flag
- ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // forbid single prefixes with needaffix flag
+ ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
// needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen))))
@@ -175,14 +206,14 @@
he = he->next_homonym; // check homonyms
} while (he);
}
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
//if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
+ he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
0, NULL, FLAG_NULL, needflag, in_compound);
if (he) return he;
}
@@ -191,7 +222,7 @@
return NULL;
}
-// check if this prefix entry matches
+// check if this prefix entry matches
struct hentry * PfxEntry::check_twosfx(const char * word, int len,
char in_compound, const FLAG needflag)
{
@@ -206,7 +237,8 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -225,8 +257,8 @@
if (test_condition(tmpword)) {
tmpl += stripl;
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
@@ -238,8 +270,7 @@
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
-// check if this prefix entry matches
+// check if this prefix entry matches
char * PfxEntry::check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag)
{
@@ -253,7 +284,8 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -272,8 +304,8 @@
if (test_condition(tmpword)) {
tmpl += stripl;
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
@@ -285,7 +317,7 @@
return NULL;
}
-// check if this prefix entry matches
+// check if this prefix entry matches
char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
{
int tmpl; // length of tmpword
@@ -293,7 +325,7 @@
char tmpword[MAXWORDUTF8LEN + 4];
char result[MAXLNLEN];
char * st;
-
+
*result = '\0';
// on entry prefix is 0 length or already matches the beginning of the word.
@@ -303,7 +335,8 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -324,41 +357,56 @@
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with pseudoroot flag
- ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // forbid single prefixes with needaffix flag
+ ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
// needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen)))) {
- if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
- if (he->description) {
- if ((*(he->description)=='[')||(*(he->description)=='<')) strcat(result,he->word);
- strcat(result,he->description);
+ if (morphcode) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, morphcode, MAXLNLEN);
+ } else mystrcat(result,getKey(), MAXLNLEN);
+ if (!HENTRY_FIND(he, MORPH_STEM)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_STEM, MAXLNLEN);
+ mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
}
- strcat(result, "\n");
+ // store the pointer of the hash entry
+ if (HENTRY_DATA(he)) {
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
+ } else {
+ // return with debug information
+ char * flag = pmyMgr->encode_flag(getFlag());
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_FLAG, MAXLNLEN);
+ mystrcat(result, flag, MAXLNLEN);
+ free(flag);
+ }
+ mystrcat(result, "\n", MAXLNLEN);
}
he = he->next_homonym;
} while (he);
}
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
+ st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
FLAG_NULL, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
}
}
}
-
+
if (*result) return mystrdup(result);
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
{
@@ -366,22 +414,22 @@
pmyMgr = pmgr;
// set up its intial values
- aflag = dp->aflag; // char flag
+ aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // number of conditions to match
- opts = dp->opts; // cross product flag
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
// then copy over all of the conditions
- memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+ if (opts & aeLONGCOND) {
+ memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
rappnd = myrevstrdup(appnd);
-
-#ifdef HUNSPELL_EXPERIMENTAL
morphcode = dp->morphcode;
-#endif
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
@@ -395,15 +443,9 @@
if (strip) free(strip);
pmyMgr = NULL;
appnd = NULL;
- strip = NULL;
- if (opts & aeUTF8) {
- for (int i = 0; i < numconds; i++) {
- if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
- }
- }
-#ifdef HUNSPELL_EXPERIMENTAL
+ strip = NULL;
+ if (opts & aeLONGCOND) free(c.l.conds2);
if (morphcode && !(opts & aeALIASM)) free(morphcode);
-#endif
if (contclass && !(opts & aeALIASF)) free(contclass);
}
@@ -413,7 +455,8 @@
char tword[MAXWORDUTF8LEN + 4];
/* make sure all conditions match */
- if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
+ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word + len, word) &&
(!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add suffix */
@@ -428,56 +471,114 @@
return NULL;
}
+inline char * SfxEntry::nextchar(char * p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN) return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
inline int SfxEntry::test_condition(const char * st, const char * beg)
{
- int cond;
- unsigned char * cp = (unsigned char *) st;
- if (!(opts & aeUTF8)) { // 256-character codepage
- // D\xf6m\xf6lki affix algorithm
- for (cond = numconds; --cond >= 0; ) {
- if ((conds.base[*--cp] & (1 << cond)) == 0) return 0;
- }
- } else { // UTF-8 encoding
- unsigned short wc;
- for (cond = numconds; --cond >= 0; ) {
- // go to next character position and check limit
- if ((char *) --cp < beg) return 0;
- // a simple 7-bit ASCII character in UTF-8
- if ((*cp >> 7) == 0) {
- if ((conds.utf8.ascii[*cp] & (1 << cond)) == 0) return 0;
- // UTF-8 multibyte character
- } else {
- // go to first character of UTF-8 multibyte character
- for (; (*cp & 0xc0) == 0x80; cp--);
- // not dot wildcard in rule
- if (!conds.utf8.all[cond]) {
- if (conds.utf8.neg[cond]) {
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (conds.utf8.wchars[cond] &&
- flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short) conds.utf8.wlen[cond])) return 0;
- } else {
- if (!conds.utf8.wchars[cond]) return 0;
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short)conds.utf8.wlen[cond])) return 0;
+ const char * pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0) return 1;
+ char * p = c.conds;
+ st--;
+ int i = 1;
+ while (1) {
+ switch (*p) {
+ case '\0': return 1;
+ case '[': { p = nextchar(p); pos = st; break; }
+ case '^': { p = nextchar(p); neg = true; break; }
+ case ']': { if (!neg && !ingroup) return 0;
+ i++;
+ // skip the next character
+ if (!ingroup) {
+ for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
+ st--;
+ }
+ pos = NULL;
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ if (st < beg && p) return 0; // word <= condition
+ break;
+ }
+ case '.': if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
+ if (st < beg) { // word <= condition
+ if (p) return 0; else return 1;
+ }
+ if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
+ st--;
+ if (st < beg) { // word <= condition
+ if (p) return 0; else return 1;
+ }
}
+ break;
}
- }
+ default: {
+ if (*st == *p) {
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*st & 0x80)) {
+ st--;
+ while (p && (st >= beg)) {
+ if (*p != *st) {
+ if (!pos) return 0;
+ st = pos;
+ break;
+ }
+ // first byte of the UTF-8 multibyte character
+ if ((*p & 0xc0) != 0x80) break;
+ p = nextchar(p);
+ st--;
+ }
+ if (pos && st != pos) {
+ if (neg) return 0;
+ else if (i == numconds) return 1;
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ st--;
+ }
+ if (p && *p != ']') p = nextchar(p);
+ } else if (pos) {
+ if (neg) return 0;
+ else if (i == numconds) return 1;
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+// if (p && *p != ']') p = nextchar(p);
+ st--;
+ }
+ if (!pos) {
+ i++;
+ st--;
+ }
+ if (st < beg && p && *p != ']') return 0; // word <= condition
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else return 0;
+ }
}
+ if (!p) return 1;
}
- return 1;
}
-
-
-// see if this suffix is present in the word
+// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
const FLAG badflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
@@ -497,9 +598,10 @@
tmpl = len - appndl;
// the second condition is not enough for UTF-8 strings
// it checked in test_condition()
-
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
+
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
// or null terminating the shorter string
@@ -514,7 +616,8 @@
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
- // this file for more info on exactly what is being // tested
+ // this file for more info on exactly what is being
+ // tested
// if all conditions are met then check if resulting
// root word in the dictionary
@@ -528,21 +631,21 @@
do {
// check conditional suffix (enabled by prefix)
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
- TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- (((optflags & aeXPRODUCT) == 0) ||
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ (((optflags & aeXPRODUCT) == 0) ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
// enabled by prefix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
) &&
// handle cont. class
- ((!cclass) ||
+ ((!cclass) ||
((contclass) && TESTAFF(contclass, cclass, contclasslen))
) &&
// check only in compound homonyms (bad flags)
(!badflag || !TESTAFF(he->astr, badflag, he->alen)
- ) &&
+ ) &&
// handle required flag
- ((!needflag) ||
+ ((!needflag) ||
(TESTAFF(he->astr, needflag, he->alen) ||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))
)
@@ -550,12 +653,12 @@
he = he->next_homonym; // check homonyms
} while (he);
- // obsolote stemming code (used only by the
+ // obsolote stemming code (used only by the
// experimental SuffixMgr:suggest_pos_stems)
// store resulting root in wlst
} else if (wlst && (*ns < maxSug)) {
int cwrd = 1;
- for (int k=0; k < *ns; k++)
+ for (int k=0; k < *ns; k++)
if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
if (cwrd) {
wlst[*ns] = mystrdup(tmpword);
@@ -572,11 +675,11 @@
return NULL;
}
-// see if two-level suffix is present in the word
+// see if two-level suffix is present in the word
struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
@@ -596,7 +699,8 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -620,7 +724,7 @@
if (test_condition((char *) cp, (char *) tmpword)) {
if (ppfx) {
// handle conditional suffix
- if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
else
he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
@@ -633,19 +737,18 @@
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
-// see if two-level suffix is present in the word
+// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
char * st;
char result[MAXLNLEN];
-
+
*result = '\0';
// if this suffix is being cross checked with a prefix
@@ -661,7 +764,8 @@
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -689,16 +793,17 @@
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
if (((PfxEntry *) ppfx)->getMorph()) {
- strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
+ mystrcat(result, " ", MAXLNLEN);
}
- strcat(result,st);
+ mystrcat(result,st, MAXLNLEN);
free(st);
mychomp(result);
}
} else {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
@@ -706,7 +811,7 @@
} else {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
@@ -716,28 +821,28 @@
}
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
// get next homonym with same affix
-struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
const FLAG cclass, const FLAG needflag)
{
PfxEntry* ep = (PfxEntry *) ppfx;
+ FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
while (he->next_homonym) {
he = he->next_homonym;
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- ((optflags & aeXPRODUCT) == 0 ||
- TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+ if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ ((optflags & aeXPRODUCT) == 0 ||
+ TESTAFF(he->astr, eFlag, he->alen) ||
// handle conditional suffix
- ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ ((contclass) && TESTAFF(contclass, eFlag, contclasslen))
) &&
// handle cont. class
- ((!cclass) ||
+ ((!cclass) ||
((contclass) && TESTAFF(contclass, cclass, contclasslen))
) &&
// handle required flag
- ((!needflag) ||
+ ((!needflag) ||
(TESTAFF(he->astr, needflag, he->alen) ||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))
)
Property changes on: chrome\third_party\hunspell\src\hunspell\affentry.cxx
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/affentry.hxx ('k') | chrome/third_party/hunspell/src/hunspell/affixmgr.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698