| Index: chrome/third_party/hunspell/src/hunspell/affentry.cxx
|
| ===================================================================
|
| --- chrome/third_party/hunspell/src/hunspell/affentry.cxx (revision 21721)
|
| +++ chrome/third_party/hunspell/src/hunspell/affentry.cxx (working copy)
|
| @@ -7,9 +7,9 @@
|
| #include <cctype>
|
| #include <cstdio>
|
| #else
|
| -#include <stdlib.h>
|
| +#include <stdlib.h>
|
| #include <string.h>
|
| -#include <stdio.h>
|
| +#include <stdio.h>
|
| #include <ctype.h>
|
| #endif
|
|
|
| @@ -17,7 +17,7 @@
|
| #include "csutil.hxx"
|
|
|
| #ifndef MOZILLA_CLIENT
|
| -#ifndef W32
|
| +#ifndef WIN32
|
| using namespace std;
|
| #endif
|
| #endif
|
| @@ -29,22 +29,23 @@
|
| pmyMgr = pmgr;
|
|
|
| // set up its intial values
|
| -
|
| - aflag = dp->aflag; // flag
|
| +
|
| + aflag = dp->aflag; // flag
|
| strip = dp->strip; // string to strip
|
| appnd = dp->appnd; // string to append
|
| stripl = dp->stripl; // length of strip string
|
| appndl = dp->appndl; // length of append string
|
| - numconds = dp->numconds; // number of conditions to match
|
| - opts = dp->opts; // cross product flag
|
| + numconds = dp->numconds; // length of the condition
|
| + opts = dp->opts; // cross product flag
|
| // then copy over all of the conditions
|
| - memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
|
| + if (opts & aeLONGCOND) {
|
| + memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
|
| + c.l.conds2 = dp->c.l.conds2;
|
| + } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
| next = NULL;
|
| nextne = NULL;
|
| nexteq = NULL;
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| morphcode = dp->morphcode;
|
| -#endif
|
| contclass = dp->contclass;
|
| contclasslen = dp->contclasslen;
|
| }
|
| @@ -58,15 +59,8 @@
|
| pmyMgr = NULL;
|
| appnd = NULL;
|
| strip = NULL;
|
| - if (opts & aeUTF8) {
|
| - for (int i = 0; i < numconds; i++) {
|
| - if (conds.utf8.wchars[i])
|
| - free(conds.utf8.wchars[i]);
|
| - }
|
| - }
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| + if (opts & aeLONGCOND) free(c.l.conds2);
|
| if (morphcode && !(opts & aeALIASM)) free(morphcode);
|
| -#endif
|
| if (contclass && !(opts & aeALIASF)) free(contclass);
|
| }
|
|
|
| @@ -75,8 +69,9 @@
|
| {
|
| char tword[MAXWORDUTF8LEN + 4];
|
|
|
| - if ((len > stripl) && (len >= numconds) && test_condition(word) &&
|
| - (!stripl || (strncmp(word, strip, stripl) == 0)) &&
|
| + if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
| + (len >= numconds) && test_condition(word) &&
|
| + (!stripl || (strncmp(word, strip, stripl) == 0)) &&
|
| ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
| /* we have a match so add prefix */
|
| char * pp = tword;
|
| @@ -87,51 +82,87 @@
|
| strcpy(pp, (word + stripl));
|
| return mystrdup(tword);
|
| }
|
| - return NULL;
|
| + return NULL;
|
| }
|
|
|
| +inline char * PfxEntry::nextchar(char * p) {
|
| + if (p) {
|
| + p++;
|
| + if (opts & aeLONGCOND) {
|
| + // jump to the 2nd part of the condition
|
| + if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
|
| + // end of the MAXCONDLEN length condition
|
| + } else if (p == c.conds + MAXCONDLEN) return NULL;
|
| + return *p ? p : NULL;
|
| + }
|
| + return NULL;
|
| +}
|
|
|
| inline int PfxEntry::test_condition(const char * st)
|
| {
|
| - int cond;
|
| - unsigned char * cp = (unsigned char *)st;
|
| - if (!(opts & aeUTF8)) { // 256-character codepage
|
| - for (cond = 0; cond < numconds; cond++) {
|
| - if ((conds.base[*cp++] & (1 << cond)) == 0) return 0;
|
| - }
|
| - } else { // UTF-8 encoding
|
| - unsigned short wc;
|
| - for (cond = 0; cond < numconds; cond++) {
|
| - // a simple 7-bit ASCII character in UTF-8
|
| - if ((*cp >> 7) == 0) {
|
| - // also check limit (end of word)
|
| - if ((!*cp) || ((conds.utf8.ascii[*cp++] & (1 << cond)) == 0)) return 0;
|
| - // UTF-8 multibyte character
|
| - } else {
|
| - // not dot wildcard in rule
|
| - if (!conds.utf8.all[cond]) {
|
| - if (conds.utf8.neg[cond]) {
|
| - u8_u16((w_char *) &wc, 1, (char *) cp);
|
| - if (conds.utf8.wchars[cond] &&
|
| - flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
|
| - wc, (short) conds.utf8.wlen[cond])) return 0;
|
| - } else {
|
| - if (!conds.utf8.wchars[cond]) return 0;
|
| - u8_u16((w_char *) &wc, 1, (char *) cp);
|
| - if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
|
| - wc, (short)conds.utf8.wlen[cond])) return 0;
|
| - }
|
| + const char * pos = NULL; // group with pos input position
|
| + bool neg = false; // complementer
|
| + bool ingroup = false; // character in the group
|
| + if (numconds == 0) return 1;
|
| + char * p = c.conds;
|
| + while (1) {
|
| + switch (*p) {
|
| + case '\0': return 1;
|
| + case '[': {
|
| + neg = false;
|
| + ingroup = false;
|
| + p = nextchar(p);
|
| + pos = st; break;
|
| }
|
| - // jump to next UTF-8 character
|
| - for(cp++; (*cp & 0xc0) == 0x80; cp++);
|
| - }
|
| + case '^': { p = nextchar(p); neg = true; break; }
|
| + case ']': {
|
| + if ((neg && ingroup) || (!neg && !ingroup)) return 0;
|
| + pos = NULL;
|
| + p = nextchar(p);
|
| + // skip the next character
|
| + if (!ingroup) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
|
| + if (*st == '\0' && p) return 0; // word <= condition
|
| + break;
|
| + }
|
| + case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
| + p = nextchar(p);
|
| + // skip the next character
|
| + for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
|
| + if (*st == '\0' && p) return 0; // word <= condition
|
| + break;
|
| + }
|
| + default: {
|
| + if (*st == *p) {
|
| + st++;
|
| + p = nextchar(p);
|
| + if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
|
| + while (p && (*p & 0xc0) == 0x80) { // character
|
| + if (*p != *st) {
|
| + if (!pos) return 0;
|
| + st = pos;
|
| + break;
|
| + }
|
| + p = nextchar(p);
|
| + st++;
|
| + }
|
| + if (pos && st != pos) {
|
| + ingroup = true;
|
| + while (p && *p != ']' && (p = nextchar(p)));
|
| + }
|
| + } else if (pos) {
|
| + ingroup = true;
|
| + while (p && *p != ']' && (p = nextchar(p)));
|
| + }
|
| + } else if (pos) { // group
|
| + p = nextchar(p);
|
| + } else return 0;
|
| + }
|
| }
|
| + if (!p) return 1;
|
| }
|
| - return 1;
|
| }
|
|
|
| -
|
| -// check if this prefix entry matches
|
| +// check if this prefix entry matches
|
| struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound, const FLAG needflag)
|
| {
|
| int tmpl; // length of tmpword
|
| @@ -145,7 +176,7 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
|
|
|
| // generate new root word by removing prefix and adding
|
| // back any characters that would have been stripped
|
| @@ -166,8 +197,8 @@
|
| if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
| do {
|
| if (TESTAFF(he->astr, aflag, he->alen) &&
|
| - // forbid single prefixes with pseudoroot flag
|
| - ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
|
| + // forbid single prefixes with needaffix flag
|
| + ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
|
| // needflag
|
| ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
|
| (contclass && TESTAFF(contclass, needflag, contclasslen))))
|
| @@ -175,14 +206,14 @@
|
| he = he->next_homonym; // check homonyms
|
| } while (he);
|
| }
|
| -
|
| - // prefix matched but no root word was found
|
| - // if aeXPRODUCT is allowed, try again but now
|
| +
|
| + // prefix matched but no root word was found
|
| + // if aeXPRODUCT is allowed, try again but now
|
| // ross checked combined with a suffix
|
|
|
| //if ((opts & aeXPRODUCT) && in_compound) {
|
| if ((opts & aeXPRODUCT)) {
|
| - he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
|
| + he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
|
| 0, NULL, FLAG_NULL, needflag, in_compound);
|
| if (he) return he;
|
| }
|
| @@ -191,7 +222,7 @@
|
| return NULL;
|
| }
|
|
|
| -// check if this prefix entry matches
|
| +// check if this prefix entry matches
|
| struct hentry * PfxEntry::check_twosfx(const char * word, int len,
|
| char in_compound, const FLAG needflag)
|
| {
|
| @@ -206,7 +237,8 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
|
|
| // generate new root word by removing prefix and adding
|
| // back any characters that would have been stripped
|
| @@ -225,8 +257,8 @@
|
| if (test_condition(tmpword)) {
|
| tmpl += stripl;
|
|
|
| - // prefix matched but no root word was found
|
| - // if aeXPRODUCT is allowed, try again but now
|
| + // prefix matched but no root word was found
|
| + // if aeXPRODUCT is allowed, try again but now
|
| // cross checked combined with a suffix
|
|
|
| if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
| @@ -238,8 +270,7 @@
|
| return NULL;
|
| }
|
|
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| -// check if this prefix entry matches
|
| +// check if this prefix entry matches
|
| char * PfxEntry::check_twosfx_morph(const char * word, int len,
|
| char in_compound, const FLAG needflag)
|
| {
|
| @@ -253,7 +284,8 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
|
|
| // generate new root word by removing prefix and adding
|
| // back any characters that would have been stripped
|
| @@ -272,8 +304,8 @@
|
| if (test_condition(tmpword)) {
|
| tmpl += stripl;
|
|
|
| - // prefix matched but no root word was found
|
| - // if aeXPRODUCT is allowed, try again but now
|
| + // prefix matched but no root word was found
|
| + // if aeXPRODUCT is allowed, try again but now
|
| // ross checked combined with a suffix
|
|
|
| if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
| @@ -285,7 +317,7 @@
|
| return NULL;
|
| }
|
|
|
| -// check if this prefix entry matches
|
| +// check if this prefix entry matches
|
| char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
|
| {
|
| int tmpl; // length of tmpword
|
| @@ -293,7 +325,7 @@
|
| char tmpword[MAXWORDUTF8LEN + 4];
|
| char result[MAXLNLEN];
|
| char * st;
|
| -
|
| +
|
| *result = '\0';
|
|
|
| // on entry prefix is 0 length or already matches the beginning of the word.
|
| @@ -303,7 +335,8 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
|
|
| // generate new root word by removing prefix and adding
|
| // back any characters that would have been stripped
|
| @@ -324,41 +357,56 @@
|
| if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
| do {
|
| if (TESTAFF(he->astr, aflag, he->alen) &&
|
| - // forbid single prefixes with pseudoroot flag
|
| - ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
|
| + // forbid single prefixes with needaffix flag
|
| + ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
|
| // needflag
|
| ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
|
| (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
|
| - if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
|
| - if (he->description) {
|
| - if ((*(he->description)=='[')||(*(he->description)=='<')) strcat(result,he->word);
|
| - strcat(result,he->description);
|
| + if (morphcode) {
|
| + mystrcat(result, " ", MAXLNLEN);
|
| + mystrcat(result, morphcode, MAXLNLEN);
|
| + } else mystrcat(result,getKey(), MAXLNLEN);
|
| + if (!HENTRY_FIND(he, MORPH_STEM)) {
|
| + mystrcat(result, " ", MAXLNLEN);
|
| + mystrcat(result, MORPH_STEM, MAXLNLEN);
|
| + mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
|
| }
|
| - strcat(result, "\n");
|
| + // store the pointer of the hash entry
|
| + if (HENTRY_DATA(he)) {
|
| + mystrcat(result, " ", MAXLNLEN);
|
| + mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
|
| + } else {
|
| + // return with debug information
|
| + char * flag = pmyMgr->encode_flag(getFlag());
|
| + mystrcat(result, " ", MAXLNLEN);
|
| + mystrcat(result, MORPH_FLAG, MAXLNLEN);
|
| + mystrcat(result, flag, MAXLNLEN);
|
| + free(flag);
|
| + }
|
| + mystrcat(result, "\n", MAXLNLEN);
|
| }
|
| he = he->next_homonym;
|
| } while (he);
|
| }
|
|
|
| - // prefix matched but no root word was found
|
| - // if aeXPRODUCT is allowed, try again but now
|
| + // prefix matched but no root word was found
|
| + // if aeXPRODUCT is allowed, try again but now
|
| // ross checked combined with a suffix
|
|
|
| if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
| - st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
|
| + st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
|
| FLAG_NULL, needflag);
|
| if (st) {
|
| - strcat(result, st);
|
| + mystrcat(result, st, MAXLNLEN);
|
| free(st);
|
| }
|
| }
|
| }
|
| }
|
| -
|
| +
|
| if (*result) return mystrdup(result);
|
| return NULL;
|
| }
|
| -#endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
|
|
| SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
| {
|
| @@ -366,22 +414,22 @@
|
| pmyMgr = pmgr;
|
|
|
| // set up its intial values
|
| - aflag = dp->aflag; // char flag
|
| + aflag = dp->aflag; // char flag
|
| strip = dp->strip; // string to strip
|
| appnd = dp->appnd; // string to append
|
| stripl = dp->stripl; // length of strip string
|
| appndl = dp->appndl; // length of append string
|
| - numconds = dp->numconds; // number of conditions to match
|
| - opts = dp->opts; // cross product flag
|
| + numconds = dp->numconds; // length of the condition
|
| + opts = dp->opts; // cross product flag
|
|
|
| // then copy over all of the conditions
|
| - memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
|
| + if (opts & aeLONGCOND) {
|
| + memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
|
| + c.l.conds2 = dp->c.l.conds2;
|
| + } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
|
|
| rappnd = myrevstrdup(appnd);
|
| -
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| morphcode = dp->morphcode;
|
| -#endif
|
| contclass = dp->contclass;
|
| contclasslen = dp->contclasslen;
|
| }
|
| @@ -395,15 +443,9 @@
|
| if (strip) free(strip);
|
| pmyMgr = NULL;
|
| appnd = NULL;
|
| - strip = NULL;
|
| - if (opts & aeUTF8) {
|
| - for (int i = 0; i < numconds; i++) {
|
| - if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
|
| - }
|
| - }
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| + strip = NULL;
|
| + if (opts & aeLONGCOND) free(c.l.conds2);
|
| if (morphcode && !(opts & aeALIASM)) free(morphcode);
|
| -#endif
|
| if (contclass && !(opts & aeALIASF)) free(contclass);
|
| }
|
|
|
| @@ -413,7 +455,8 @@
|
| char tword[MAXWORDUTF8LEN + 4];
|
|
|
| /* make sure all conditions match */
|
| - if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
|
| + if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
| + (len >= numconds) && test_condition(word + len, word) &&
|
| (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
|
| ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
| /* we have a match so add suffix */
|
| @@ -428,56 +471,114 @@
|
| return NULL;
|
| }
|
|
|
| +inline char * SfxEntry::nextchar(char * p) {
|
| + if (p) {
|
| + p++;
|
| + if (opts & aeLONGCOND) {
|
| + // jump to the 2nd part of the condition
|
| + if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
|
| + // end of the MAXCONDLEN length condition
|
| + } else if (p == c.conds + MAXCONDLEN) return NULL;
|
| + return *p ? p : NULL;
|
| + }
|
| + return NULL;
|
| +}
|
|
|
| inline int SfxEntry::test_condition(const char * st, const char * beg)
|
| {
|
| - int cond;
|
| - unsigned char * cp = (unsigned char *) st;
|
| - if (!(opts & aeUTF8)) { // 256-character codepage
|
| - // D\xf6m\xf6lki affix algorithm
|
| - for (cond = numconds; --cond >= 0; ) {
|
| - if ((conds.base[*--cp] & (1 << cond)) == 0) return 0;
|
| - }
|
| - } else { // UTF-8 encoding
|
| - unsigned short wc;
|
| - for (cond = numconds; --cond >= 0; ) {
|
| - // go to next character position and check limit
|
| - if ((char *) --cp < beg) return 0;
|
| - // a simple 7-bit ASCII character in UTF-8
|
| - if ((*cp >> 7) == 0) {
|
| - if ((conds.utf8.ascii[*cp] & (1 << cond)) == 0) return 0;
|
| - // UTF-8 multibyte character
|
| - } else {
|
| - // go to first character of UTF-8 multibyte character
|
| - for (; (*cp & 0xc0) == 0x80; cp--);
|
| - // not dot wildcard in rule
|
| - if (!conds.utf8.all[cond]) {
|
| - if (conds.utf8.neg[cond]) {
|
| - u8_u16((w_char *) &wc, 1, (char *) cp);
|
| - if (conds.utf8.wchars[cond] &&
|
| - flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
|
| - wc, (short) conds.utf8.wlen[cond])) return 0;
|
| - } else {
|
| - if (!conds.utf8.wchars[cond]) return 0;
|
| - u8_u16((w_char *) &wc, 1, (char *) cp);
|
| - if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
|
| - wc, (short)conds.utf8.wlen[cond])) return 0;
|
| + const char * pos = NULL; // group with pos input position
|
| + bool neg = false; // complementer
|
| + bool ingroup = false; // character in the group
|
| + if (numconds == 0) return 1;
|
| + char * p = c.conds;
|
| + st--;
|
| + int i = 1;
|
| + while (1) {
|
| + switch (*p) {
|
| + case '\0': return 1;
|
| + case '[': { p = nextchar(p); pos = st; break; }
|
| + case '^': { p = nextchar(p); neg = true; break; }
|
| + case ']': { if (!neg && !ingroup) return 0;
|
| + i++;
|
| + // skip the next character
|
| + if (!ingroup) {
|
| + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
| + st--;
|
| + }
|
| + pos = NULL;
|
| + neg = false;
|
| + ingroup = false;
|
| + p = nextchar(p);
|
| + if (st < beg && p) return 0; // word <= condition
|
| + break;
|
| + }
|
| + case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
| + p = nextchar(p);
|
| + // skip the next character
|
| + for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
| + if (st < beg) { // word <= condition
|
| + if (p) return 0; else return 1;
|
| + }
|
| + if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
|
| + st--;
|
| + if (st < beg) { // word <= condition
|
| + if (p) return 0; else return 1;
|
| + }
|
| }
|
| + break;
|
| }
|
| - }
|
| + default: {
|
| + if (*st == *p) {
|
| + p = nextchar(p);
|
| + if ((opts & aeUTF8) && (*st & 0x80)) {
|
| + st--;
|
| + while (p && (st >= beg)) {
|
| + if (*p != *st) {
|
| + if (!pos) return 0;
|
| + st = pos;
|
| + break;
|
| + }
|
| + // first byte of the UTF-8 multibyte character
|
| + if ((*p & 0xc0) != 0x80) break;
|
| + p = nextchar(p);
|
| + st--;
|
| + }
|
| + if (pos && st != pos) {
|
| + if (neg) return 0;
|
| + else if (i == numconds) return 1;
|
| + ingroup = true;
|
| + while (p && *p != ']' && (p = nextchar(p)));
|
| + st--;
|
| + }
|
| + if (p && *p != ']') p = nextchar(p);
|
| + } else if (pos) {
|
| + if (neg) return 0;
|
| + else if (i == numconds) return 1;
|
| + ingroup = true;
|
| + while (p && *p != ']' && (p = nextchar(p)));
|
| +// if (p && *p != ']') p = nextchar(p);
|
| + st--;
|
| + }
|
| + if (!pos) {
|
| + i++;
|
| + st--;
|
| + }
|
| + if (st < beg && p && *p != ']') return 0; // word <= condition
|
| + } else if (pos) { // group
|
| + p = nextchar(p);
|
| + } else return 0;
|
| + }
|
| }
|
| + if (!p) return 1;
|
| }
|
| - return 1;
|
| }
|
|
|
| -
|
| -
|
| -// see if this suffix is present in the word
|
| +// see if this suffix is present in the word
|
| struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
| AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
|
| const FLAG badflag)
|
| {
|
| - int tmpl; // length of tmpword
|
| + int tmpl; // length of tmpword
|
| struct hentry * he; // hash entry pointer
|
| unsigned char * cp;
|
| char tmpword[MAXWORDUTF8LEN + 4];
|
| @@ -497,9 +598,10 @@
|
| tmpl = len - appndl;
|
| // the second condition is not enough for UTF-8 strings
|
| // it checked in test_condition()
|
| -
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
|
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
| +
|
| // generate new root word by removing suffix and adding
|
| // back any characters that would have been stripped or
|
| // or null terminating the shorter string
|
| @@ -514,7 +616,8 @@
|
|
|
| // now make sure all of the conditions on characters
|
| // are met. Please see the appendix at the end of
|
| - // this file for more info on exactly what is being // tested
|
| + // this file for more info on exactly what is being
|
| + // tested
|
|
|
| // if all conditions are met then check if resulting
|
| // root word in the dictionary
|
| @@ -528,21 +631,21 @@
|
| do {
|
| // check conditional suffix (enabled by prefix)
|
| if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
|
| - TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
| - (((optflags & aeXPRODUCT) == 0) ||
|
| + TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
| + (((optflags & aeXPRODUCT) == 0) ||
|
| TESTAFF(he->astr, ep->getFlag(), he->alen) ||
|
| // enabled by prefix
|
| ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
| ) &&
|
| // handle cont. class
|
| - ((!cclass) ||
|
| + ((!cclass) ||
|
| ((contclass) && TESTAFF(contclass, cclass, contclasslen))
|
| ) &&
|
| // check only in compound homonyms (bad flags)
|
| (!badflag || !TESTAFF(he->astr, badflag, he->alen)
|
| - ) &&
|
| + ) &&
|
| // handle required flag
|
| - ((!needflag) ||
|
| + ((!needflag) ||
|
| (TESTAFF(he->astr, needflag, he->alen) ||
|
| ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
|
| )
|
| @@ -550,12 +653,12 @@
|
| he = he->next_homonym; // check homonyms
|
| } while (he);
|
|
|
| - // obsolote stemming code (used only by the
|
| + // obsolote stemming code (used only by the
|
| // experimental SuffixMgr:suggest_pos_stems)
|
| // store resulting root in wlst
|
| } else if (wlst && (*ns < maxSug)) {
|
| int cwrd = 1;
|
| - for (int k=0; k < *ns; k++)
|
| + for (int k=0; k < *ns; k++)
|
| if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
|
| if (cwrd) {
|
| wlst[*ns] = mystrdup(tmpword);
|
| @@ -572,11 +675,11 @@
|
| return NULL;
|
| }
|
|
|
| -// see if two-level suffix is present in the word
|
| +// see if two-level suffix is present in the word
|
| struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
|
| AffEntry* ppfx, const FLAG needflag)
|
| {
|
| - int tmpl; // length of tmpword
|
| + int tmpl; // length of tmpword
|
| struct hentry * he; // hash entry pointer
|
| unsigned char * cp;
|
| char tmpword[MAXWORDUTF8LEN + 4];
|
| @@ -596,7 +699,8 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
|
|
| // generate new root word by removing suffix and adding
|
| // back any characters that would have been stripped or
|
| @@ -620,7 +724,7 @@
|
| if (test_condition((char *) cp, (char *) tmpword)) {
|
| if (ppfx) {
|
| // handle conditional suffix
|
| - if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
| + if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
| he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
|
| else
|
| he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
|
| @@ -633,19 +737,18 @@
|
| return NULL;
|
| }
|
|
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| -// see if two-level suffix is present in the word
|
| +// see if two-level suffix is present in the word
|
| char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
|
| AffEntry* ppfx, const FLAG needflag)
|
| {
|
| - int tmpl; // length of tmpword
|
| + int tmpl; // length of tmpword
|
| unsigned char * cp;
|
| char tmpword[MAXWORDUTF8LEN + 4];
|
| PfxEntry* ep = (PfxEntry *) ppfx;
|
| char * st;
|
|
|
| char result[MAXLNLEN];
|
| -
|
| +
|
| *result = '\0';
|
|
|
| // if this suffix is being cross checked with a prefix
|
| @@ -661,7 +764,8 @@
|
|
|
| tmpl = len - appndl;
|
|
|
| - if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
|
| + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
| + (tmpl + stripl >= numconds)) {
|
|
|
| // generate new root word by removing suffix and adding
|
| // back any characters that would have been stripped or
|
| @@ -689,16 +793,17 @@
|
| st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
|
| if (st) {
|
| if (((PfxEntry *) ppfx)->getMorph()) {
|
| - strcat(result, ((PfxEntry *) ppfx)->getMorph());
|
| + mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
|
| + mystrcat(result, " ", MAXLNLEN);
|
| }
|
| - strcat(result,st);
|
| + mystrcat(result,st, MAXLNLEN);
|
| free(st);
|
| mychomp(result);
|
| }
|
| } else {
|
| st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
|
| if (st) {
|
| - strcat(result, st);
|
| + mystrcat(result, st, MAXLNLEN);
|
| free(st);
|
| mychomp(result);
|
| }
|
| @@ -706,7 +811,7 @@
|
| } else {
|
| st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
|
| if (st) {
|
| - strcat(result, st);
|
| + mystrcat(result, st, MAXLNLEN);
|
| free(st);
|
| mychomp(result);
|
| }
|
| @@ -716,28 +821,28 @@
|
| }
|
| return NULL;
|
| }
|
| -#endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
|
|
| // get next homonym with same affix
|
| -struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
|
| +struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
|
| const FLAG cclass, const FLAG needflag)
|
| {
|
| PfxEntry* ep = (PfxEntry *) ppfx;
|
| + FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
|
|
|
| while (he->next_homonym) {
|
| he = he->next_homonym;
|
| - if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
| - ((optflags & aeXPRODUCT) == 0 ||
|
| - TESTAFF(he->astr, ep->getFlag(), he->alen) ||
|
| + if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
| + ((optflags & aeXPRODUCT) == 0 ||
|
| + TESTAFF(he->astr, eFlag, he->alen) ||
|
| // handle conditional suffix
|
| - ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
| + ((contclass) && TESTAFF(contclass, eFlag, contclasslen))
|
| ) &&
|
| // handle cont. class
|
| - ((!cclass) ||
|
| + ((!cclass) ||
|
| ((contclass) && TESTAFF(contclass, cclass, contclasslen))
|
| ) &&
|
| // handle required flag
|
| - ((!needflag) ||
|
| + ((!needflag) ||
|
| (TESTAFF(he->astr, needflag, he->alen) ||
|
| ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
|
| )
|
|
|
| Property changes on: chrome\third_party\hunspell\src\hunspell\affentry.cxx
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|