chrome/third_party/hunspell/src/hunspell/hunspell.cxx - Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for...

Unified Diff: chrome/third_party/hunspell/src/hunspell/hunspell.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/third_party/hunspell/src/hunspell/hunspell.cxx

===================================================================

--- chrome/third_party/hunspell/src/hunspell/hunspell.cxx (revision 21721)

+++ chrome/third_party/hunspell/src/hunspell/hunspell.cxx (working copy)

@@ -6,16 +6,17 @@

#include <cstring>

#include <cstdio>

#else

-#include <stdlib.h>

+#include <stdlib.h>

#include <string.h>

-#include <stdio.h>

+#include <stdio.h>

#endif

#include "hunspell.hxx"

#include "hunspell.h"

+#include "csutil.hxx"

#ifndef MOZILLA_CLIENT

-#ifndef W32

+#ifndef WIN32

using namespace std;

#endif

@@ -23,27 +24,34 @@

#ifdef HUNSPELL_CHROME_CLIENT

Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)

#else

-Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle)

+Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle, const char * key = NULL)

#endif

{

encoding = NULL;

csconv = NULL;

utf8 = 0;

complexprefixes = 0;

+#ifndef HUNSPELL_CHROME_CLIENT

+ affixpath = mystrdup(affpath);

+#endif

+ maxdic = 0;

#ifdef HUNSPELL_CHROME_CLIENT

bdict_reader = new hunspell::BDictReader;

bdict_reader->Init(bdict_data, bdict_length);

- pHMgr = new HashMgr(bdict_reader);

- pAMgr = new AffixMgr(bdict_reader, pHMgr);

+ pHMgr[0] = new HashMgr(bdict_reader);

+ if (pHMgr[0]) maxdic = 1;

+ pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);

#else

/* first set up the hash manager */

- pHMgr = new HashMgr(dic_handle, aff_handle);

+ pHMgr[0] = new HashMgr(dic_handle, aff_handle, key);

+ if (pHMgr[0]) maxdic = 1;

/* next set up the affix manager */

/* it needs access to the hash manager lookup methods */

- pAMgr = new AffixMgr(aff_handle, pHMgr);

+ pAMgr = new AffixMgr(aff_handle, pHMgr, &maxdic, key);

#endif

/* get the preferred try string and the dictionary */

@@ -65,10 +73,13 @@

{

if (pSMgr) delete pSMgr;

if (pAMgr) delete pAMgr;

- if (pHMgr) delete pHMgr;

+ for (int i = 0; i < maxdic; i++) delete pHMgr[i];

+ maxdic = 0;

pSMgr = NULL;

pAMgr = NULL;

- pHMgr = NULL;

+#ifdef MOZILLA_CLIENT

+ free(csconv);

+#endif

csconv= NULL;

if (encoding) free(encoding);

encoding = NULL;

@@ -76,27 +87,38 @@

#ifdef HUNSPELL_CHROME_CLIENT

if (bdict_reader) delete bdict_reader;

bdict_reader = NULL;

+#else

+ if (affixpath) free(affixpath);

+ affixpath = NULL;

#endif

}

+#ifndef HUNSPELL_CHROME_CLIENT

+// load extra dictionaries

+int Hunspell::add_dic(const char * dpath, const char * key) {

+ if (maxdic == MAXDIC || !affixpath) return 1;

+ pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);

+ if (pHMgr[maxdic]) maxdic++; else return 1;

+ return 0;

+#endif

// make a copy of src at destination while removing all leading

// blanks and removing any trailing periods after recording

// their presence with the abbreviation flag

-// also since already going through character by character,

+// also since already going through character by character,

// set the capitalization type

// return the length of the "cleaned" (and UTF-8 encoded) word

-int Hunspell::cleanword2(char * dest, const char * src,

+int Hunspell::cleanword2(char * dest, const char * src,

w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)

unsigned char * p = (unsigned char *) dest;

const unsigned char * q = (const unsigned char * ) src;

- int firstcap = 0;

// first skip over any leading blanks

while ((*q != '\0') && (*q == ' ')) q++;

// now strip off any trailing periods (recording their presence)

*pabbrev = 0;

int nl = strlen((const char *)q);

@@ -104,80 +126,43 @@

nl--;

(*pabbrev)++;

}

// if no characters are left it can't be capitalized

- if (nl <= 0) {

+ if (nl <= 0) {

*pcaptype = NOCAP;

*p = '\0';

return 0;

}

- // now determine the capitalization type of the first nl letters

- int ncap = 0;

- int nneutral = 0;

- *nc = 0;

- if (!utf8) {

- while (nl > 0) {

- (*nc)++;

- if (csconv[(*q)].ccase) ncap++;

- if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;

- *p++ = *q++;

- nl--;

- }

- // remember to terminate the destination string

- *p = '\0';

- if (ncap) {

- firstcap = csconv[(unsigned char)(*dest)].ccase;

- }

- } else {

- unsigned short idx;

- *nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);

+ strncpy(dest, (char *) q, nl);

+ *(dest + nl) = '\0';

+ nl = strlen(dest);

+ if (utf8) {

+ *nc = u8_u16(dest_utf, MAXWORDLEN, dest);

// don't check too long words

if (*nc >= MAXWORDLEN) return 0;

if (*nc == -1) { // big Unicode character (non BMP area)

*pcaptype = NOCAP;

- strcpy((char *) p, (char *) q);

- return strlen(dest);

+ return nl;

}

- *nc -= *pabbrev;

- for (int i = 0; i < *nc; i++) {

- idx = (dest_utf[i].h << 8) + dest_utf[i].l;

- if (idx != unicodetolower(idx, langnum)) ncap++;

- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;

- }

- u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);

- if (ncap) {

- idx = (dest_utf[0].h << 8) + dest_utf[0].l;

- firstcap = (idx != unicodetolower(idx, langnum));

- }

- // now finally set the captype

- if (ncap == 0) {

- *pcaptype = NOCAP;

- } else if ((ncap == 1) && firstcap) {

- *pcaptype = INITCAP;

- } else if ((ncap == *nc) || ((ncap + nneutral) == *nc)) {

- *pcaptype = ALLCAP;

- } else if ((ncap > 1) && firstcap) {

- *pcaptype = HUHINITCAP;

+ *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);

} else {

- *pcaptype = HUHCAP;

+ *pcaptype = get_captype(dest, nl, csconv);

+ *nc = nl;

}

- return strlen(dest);

+ return nl;

-int Hunspell::cleanword(char * dest, const char * src,

+int Hunspell::cleanword(char * dest, const char * src,

int * pcaptype, int * pabbrev)

unsigned char * p = (unsigned char *) dest;

const unsigned char * q = (const unsigned char * ) src;

int firstcap = 0;

// first skip over any leading blanks

while ((*q != '\0') && (*q == ' ')) q++;

// now strip off any trailing periods (recording their presence)

*pabbrev = 0;

int nl = strlen((const char *)q);

@@ -185,9 +170,9 @@

nl--;

(*pabbrev)++;

}

// if no characters are left it can't be capitalized

- if (nl <= 0) {

+ if (nl <= 0) {

*pcaptype = NOCAP;

*p = '\0';

return 0;

@@ -215,8 +200,9 @@

nc = u8_u16(t, MAXWORDLEN, src);

for (int i = 0; i < nc; i++) {

idx = (t[i].h << 8) + t[i].l;

- if (idx != unicodetolower(idx, langnum)) ncap++;

- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;

+ unsigned short low = unicodetolower(idx, langnum);

+ if (idx != low) ncap++;

+ if (unicodetoupper(idx, langnum) == low) nneutral++;

}

u16_u8(dest, MAXWORDUTF8LEN, t, nc);

if (ncap) {

@@ -238,8 +224,7 @@

*pcaptype = HUHCAP;

}

return strlen(dest);

void Hunspell::mkallcap(char * p)

{

@@ -256,7 +241,7 @@

}

u16_u8(p, MAXWORDUTF8LEN, u, nc);

} else {

- while (*p != '\0') {

+ while (*p != '\0') {

*p = csconv[((unsigned char) *p)].cupper;

p++;

}

@@ -269,15 +254,16 @@

unsigned short idx;

for (int i = 0; i < nc; i++) {

idx = (u[i].h << 8) + u[i].l;

- if (idx != unicodetoupper(idx, langnum)) {

- u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);

- u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);

+ unsigned short up = unicodetoupper(idx, langnum);

+ if (idx != up) {

+ u[i].h = (unsigned char) (up >> 8);

+ u[i].l = (unsigned char) (up & 0x00FF);

}

u16_u8(p, MAXWORDUTF8LEN, u, nc);

- return strlen(p);

+ return strlen(p);

} else {

- while (*p != '\0') {

+ while (*p != '\0') {

*p = csconv[((unsigned char) *p)].cupper;

p++;

}

@@ -288,7 +274,7 @@

void Hunspell::mkallsmall(char * p)

{

- while (*p != '\0') {

+ while (*p != '\0') {

*p = csconv[((unsigned char) *p)].clower;

p++;

}

@@ -300,15 +286,16 @@

unsigned short idx;

for (int i = 0; i < nc; i++) {

idx = (u[i].h << 8) + u[i].l;

- if (idx != unicodetolower(idx, langnum)) {

- u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);

- u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);

+ unsigned short low = unicodetolower(idx, langnum);

+ if (idx != low) {

+ u[i].h = (unsigned char) (low >> 8);

+ u[i].l = (unsigned char) (low & 0x00FF);

}

u16_u8(p, MAXWORDUTF8LEN, u, nc);

return strlen(p);

} else {

- while (*p != '\0') {

+ while (*p != '\0') {

*p = csconv[((unsigned char) *p)].clower;

p++;

}

@@ -322,18 +309,18 @@

*p = *source;

for (p++, source++; *(source - 1); p++, source++) {

*p = *source;

- if (*source == '\x9f') *--p = '\xdf';

+ if (*source == '\x9F') *--p = '\xDF';

}

return dest;

}

-// recursive search for right ss-\xdf permutations

+// recursive search for right ss - sharp s permutations

hentry * Hunspell::spellsharps(char * base, char * pos, int n,

int repnum, char * tmp, int * info, char **root) {

pos = strstr(pos, "ss");

if (pos && (n < MAXSHARPS)) {

- *pos = '\xc3';

- *(pos + 1) = '\x9f';

+ *pos = '\xC3';

+ *(pos + 1) = '\x9F';

hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);

if (h) return h;

*pos = 's';

@@ -352,31 +339,32 @@

TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);

}

-/* check and insert a word to beginning of the suggestion array */

-int Hunspell::insert_sug(char ***slst, char * word, int *ns) {

- if (spell(word)) {

- if (*ns == MAXSUGGESTION) {

- (*ns)--;

- free((*slst)[*ns]);

- }

- for (int k = *ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

- (*slst)[0] = mystrdup(word);

- (*ns)++;

+/* insert a word to the beginning of the suggestion array and return ns */

+int Hunspell::insert_sug(char ***slst, char * word, int ns) {

+ char * dup = mystrdup(word);

+ if (!dup) return ns;

+ if (ns == MAXSUGGESTION) {

+ ns--;

+ free((*slst)[ns]);

}

- return 0;

+ for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

+ (*slst)[0] = dup;

+ return ns + 1;

}

int Hunspell::spell(const char * word, int * info, char ** root)

{

#ifdef HUNSPELL_CHROME_CLIENT

- if (pHMgr) pHMgr->EmptyHentryCache();

+ if (pHMgr) pHMgr[0]->EmptyHentryCache();

#endif

struct hentry * rv=NULL;

// need larger vector. For example, Turkish capital letter I converted a

// 2-byte UTF-8 character (dotless i) by mkallsmall.

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- w_char unicw[MAXWORDLEN + 1];

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ w_char unicw[MAXWORDLEN];

+ // Hunspell supports XML input of the simplified API (see manual)

+ if (strcmp(word, SPELL_XML) == 0) return 1;

int nc = strlen(word);

int wl2 = 0;

if (utf8) {

@@ -386,14 +374,18 @@

}

int captype = 0;

int abbv = 0;

- int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

+ int wl = 0;

- if (wl == 0) return 1;

+ // input conversion

+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

- if (info) *info = 0;

+ int info2 = 0;

+ if (wl == 0 || maxdic == 0) return 1;

if (root) *root = NULL;

- // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)

+ // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)

enum { NBEGIN, NNUM, NSEP };

int nstate = NBEGIN;

int i;

@@ -407,173 +399,179 @@

} else break;

}

if ((i == wl) && (nstate == NNUM)) return 1;

+ if (!info) info = &info2; else *info = 0;

- // LANG_hu section: number(s) + (percent or degree) with suffixes

- if (langnum == LANG_hu) {

- if ((nstate == NNUM) && ((cw[i] == '%') || (cw[i] == '\xb0'))

- && checkword(cw + i, info, root)) return 1;

- }

- // END of LANG_hu section

switch(captype) {

- case HUHCAP:

- case HUHINITCAP:

- case NOCAP: {

- rv = checkword(cw, info, root);

- if ((abbv) && !(rv)) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- }

- break;

- }

+ case HUHCAP:

+ case HUHINITCAP:

+ case NOCAP: {

+ rv = checkword(cw, info, root);

+ if ((abbv) && !(rv)) {

+ memcpy(wspace,cw,wl);

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ rv = checkword(wspace, info, root);

+ }

+ break;

+ }

case ALLCAP: {

- rv = checkword(cw, info, root);

- if (rv) break;

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- if (rv) break;

- }

- if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {

- char tmpword[MAXWORDUTF8LEN];

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

+ rv = checkword(cw, info, root);

+ if (rv) break;

+ if (abbv) {

+ memcpy(wspace,cw,wl);

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ rv = checkword(wspace, info, root);

+ if (rv) break;

+ }

+ // Spec. prefix handling for Catalan, French, Italian:

+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).

+ if (pAMgr && strchr(cw, '\'')) {

+ wl = mkallsmall2(cw, unicw, nc);

+ char * apostrophe = strchr(cw, '\'');

+ if (utf8) {

+ w_char tmpword[MAXWORDLEN];

+ *apostrophe = '\0';

+ wl2 = u8_u16(tmpword, MAXWORDLEN, cw);

+ *apostrophe = '\'';

+ if (wl2 < nc) {

+ mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);

+ rv = checkword(cw, info, root);

+ if (rv) break;

+ }

+ } else {

+ mkinitcap2(apostrophe + 1, unicw, nc);

+ rv = checkword(cw, info, root);

+ if (rv) break;

+ }

+ mkinitcap2(cw, unicw, nc);

+ rv = checkword(cw, info, root);

+ if (rv) break;

+ }

+ if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {

+ char tmpword[MAXWORDUTF8LEN];

+ wl = mkallsmall2(cw, unicw, nc);

+ memcpy(wspace,cw,(wl+1));

+ rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

+ if (!rv) {

+ wl2 = mkinitcap2(cw, unicw, nc);

+ rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);

+ }

+ if ((abbv) && !(rv)) {

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

+ if (!rv) {

+ memcpy(wspace, cw, wl2);

+ *(wspace+wl2) = '.';

+ *(wspace+wl2+1) = '\0';

rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- if (!rv) {

- wl2 = mkinitcap2(cw, unicw, nc);

- rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);

- }

- if ((abbv) && !(rv)) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- if (!rv) {

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- }

- if (rv) break;

}

- case INITCAP: {

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

- rv = checkword(wspace, info, root);

- if (!rv || (is_keepcase(rv) && !((captype == INITCAP) &&

- // if CHECKSHARPS: KEEPCASE words with \xdf are allowed

- // in INITCAP form, too.

- pAMgr->get_checksharps() && ((utf8 && strstr(wspace, "\xc3\x9f")) ||

- (!utf8 && strchr(wspace, '\xdf')))))) {

- wl2 = mkinitcap2(cw, unicw, nc);

- rv = checkword(cw, info, root);

- if (rv && (captype == ALLCAP) && is_keepcase(rv)) rv = NULL;

- }

- if (abbv && !rv) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- if (!rv || is_keepcase(rv)) {

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- rv = checkword(wspace, info, root);

- if (rv && ((captype == ALLCAP) && is_keepcase(rv))) rv = NULL;

- }

- break;

- }

+ if (rv) break;

+ }

+ case INITCAP: {

+ wl = mkallsmall2(cw, unicw, nc);

+ memcpy(wspace,cw,(wl+1));

+ wl2 = mkinitcap2(cw, unicw, nc);

+ if (captype == INITCAP) *info += SPELL_INITCAP;

+ rv = checkword(cw, info, root);

+ if (captype == INITCAP) *info -= SPELL_INITCAP;

+ // forbid bad capitalization

+ // (for example, ijs -> Ijs instead of IJs in Dutch)

+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

+ if (*info & SPELL_FORBIDDEN) {

+ rv = NULL;

+ break;

+ }

+ if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

+ if (rv) break;

+ rv = checkword(wspace, info, root);

+ if (abbv && !rv) {

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ rv = checkword(wspace, info, root);

+ if (!rv) {

+ memcpy(wspace, cw, wl2);

+ *(wspace+wl2) = '.';

+ *(wspace+wl2+1) = '\0';

+ if (captype == INITCAP) *info += SPELL_INITCAP;

+ rv = checkword(wspace, info, root);

+ if (captype == INITCAP) *info -= SPELL_INITCAP;

+ if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

+ break;

+ }

+ if (rv && is_keepcase(rv) &&

+ ((captype == ALLCAP) ||

+ // if CHECKSHARPS: KEEPCASE words with \xDF are allowed

+ // in INITCAP form, too.

+ !(pAMgr->get_checksharps() &&

+ ((utf8 && strstr(wspace, "\xC3\x9F")) ||

+ (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;

+ break;

+ }

}

if (rv) return 1;

- // recursive breaking at break points (not good for morphological analysis)

+ // recursive breaking at break points

if (wordbreak) {

char * s;

char r;

- for (int j = 0; j < pAMgr->get_numbreak(); j++) {

+ int corr = 0;

+ wl = strlen(cw);

+ int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;

+ // check boundary patterns (^begin and end$)

+ for (int j = 0; j < numbreak; j++) {

+ int plen = strlen(wordbreak[j]);

+ if (plen == 1 || plen > wl) continue;

+ if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0

+ && spell(cw + plen - 1)) return 1;

+ if (wordbreak[j][plen - 1] == '$' &&

+ strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {

+ r = cw[wl - plen + 1];

+ cw[wl - plen + 1] = '\0';

+ if (spell(cw)) return 1;

+ cw[wl - plen + 1] = r;

+ }

+ // other patterns

+ for (int j = 0; j < numbreak; j++) {

+ int result = 0;

+ int plen = strlen(wordbreak[j]);

s=(char *) strstr(cw, wordbreak[j]);

- if (s) {

+ if (s && (s > cw) && (s < cw + wl - plen)) {

+ if (!spell(s + plen)) continue;

r = *s;

*s = '\0';

// examine 2 sides of the break point

- if (spell(cw) && spell(s + strlen(wordbreak[j]))) {

- *s = r;

- return 1;

- }

+ if (spell(cw)) return 1;

*s = r;

+ // LANG_hu: spec. dash rule

+ if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {

+ r = s[1];

+ s[1] = '\0';

+ if (spell(cw)) return 1; // check the first part with dash

+ s[1] = r;

+ }

+ // end of LANG speficic region

}

- // LANG_hu: compoundings with dashes and n-dashes XXX deprecated!

- if (langnum == LANG_hu) {

- int n;

- // compound word with dash (HU) I18n

- char * dash;

- int result = 0;

- // n-dash

- dash = (char *) strstr(cw,"\xe2\x80\x93");

- if (dash && !wordbreak) {

- *dash = '\0';

- // examine 2 sides of the dash

- if (spell(cw) && spell(dash + 3)) {

- *dash = '\xe2';

- return 1;

- }

- *dash = '\xe2';

- }

- dash = (char *) strchr(cw,'-');

- if (dash) {

- *dash='\0';

- // examine 2 sides of the dash

- if (dash[1] == '\0') { // base word ending with dash

- if (spell(cw)) return 1;

- } else {

- // first word ending with dash: word-

- char r2 = *(dash + 1);

- dash[0]='-';

- dash[1]='\0';

- result = spell(cw);

- dash[1] = r2;

- dash[0]='\0';

- if (result && spell(dash+1) && ((strlen(dash+1) > 1) || (dash[1] == 'e') ||

- ((dash[1] > '0') && (dash[1] < '9')))) return 1;

- }

- // affixed number in correct word

- if (result && (dash > cw) && (((*(dash-1)<='9') && (*(dash-1)>='0')) || (*(dash-1)>='.'))) {

- *dash='-';

- n = 1;

- if (*(dash - n) == '.') n++;

- // search first not a number character to left from dash

- while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {

- n++;

- }

- if ((dash - n) < cw) n--;

- // numbers: deprecated

- for(; n >= 1; n--) {

- if ((*(dash - n) >= '0') && (*(dash - n) <= '9') &&

- checkword(dash - n, info, root)) return 1;

- }

return 0;

}

-//int Hunspell::spell(const char * word) {

-// return spell(word, NULL, NULL);

-//}

struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)

{

struct hentry * he = NULL;

- int len;

+ int len, i;

char w2[MAXWORDUTF8LEN];

const char * word;

@@ -600,26 +598,29 @@

}

// look word in hash table

- if (pHMgr) he = pHMgr->lookup(word);

+ for (i = 0; (i < maxdic) && !he; i ++) {

+ he = (pHMgr[i])->lookup(word);

// check forbidden and onlyincompound words

if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

- info += SPELL_FORBIDDEN;

+ if (info) *info += SPELL_FORBIDDEN;

// LANG_hu section: set dash information for suggestions

if (langnum == LANG_hu) {

if (pAMgr->get_compoundflag() &&

TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {

- info += SPELL_COMPOUND;

+ if (info) *info += SPELL_COMPOUND;

}

return NULL;

}

- // he = next not pseudoroot and not onlyincompound homonym or NULL

+ // he = next not needaffix, onlyincompound homonym or onlyupcase word

while (he && (he->astr) &&

- ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||

- (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))

+ ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||

+ (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

+ (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))

)) he = he->next_homonym;

+ }

// check with affixes

if (!he && pAMgr) {

@@ -627,38 +628,42 @@

len = strlen(word);

he = pAMgr->affix_check(word, len, 0);

- // check compound restriction

- if (he && he->astr && pAMgr->get_onlyincompound() &&

- TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;

+ // check compound restriction and onlyupcase

+ if (he && he->astr && (

+ (pAMgr->get_onlyincompound() &&

+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

+ (info && (*info & SPELL_INITCAP) &&

+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {

+ he = NULL;

+ }

if (he) {

if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

- info += SPELL_FORBIDDEN;

+ if (info) *info += SPELL_FORBIDDEN;

return NULL;

}

if (root) {

- *root = mystrdup(he->word);

- if (complexprefixes) {

+ *root = mystrdup(&(he->word));

+ if (*root && complexprefixes) {

if (utf8) reverseword_utf(*root); else reverseword(*root);

}

// try check compound word

} else if (pAMgr->get_compound()) {

- he = pAMgr->compound_check(word, len,

- 0,0,100,0,NULL,0,NULL,NULL,0);

+ he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0);

// LANG_hu section: `moving rule' with last dash

- if ((!he) && (langnum == LANG_hu) && (word[len-1]=='-')) {

+ if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {

char * dup = mystrdup(word);

+ if (!dup) return NULL;

dup[len-1] = '\0';

- he = pAMgr->compound_check(dup, len-1,

- -5,0,100,0,NULL,1,NULL,NULL,0);

+ he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0);

free(dup);

}

- // end of LANG speficic region

+ // end of LANG speficic region

if (he) {

if (root) {

- *root = mystrdup(he->word);

- if (complexprefixes) {

+ *root = mystrdup(&(he->word));

+ if (*root && complexprefixes) {

if (utf8) reverseword_utf(*root); else reverseword(*root);

}

@@ -674,12 +679,18 @@

int Hunspell::suggest(char*** slst, const char * word)

{

#ifdef HUNSPELL_CHROME_CLIENT

- if (pHMgr) pHMgr->EmptyHentryCache();

+ if (pHMgr) pHMgr[0]->EmptyHentryCache();

#endif

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

- w_char unicw[MAXWORDLEN + 1];

+ int onlycmpdsug = 0;

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ if (!pSMgr || maxdic == 0) return 0;

+ w_char unicw[MAXWORDLEN];

+ *slst = NULL;

+ // process XML input of the simplified API (see manual)

+ if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {

+ return spellml(slst, word);

+ }

int nc = strlen(word);

if (utf8) {

if (nc >= MAXWORDUTF8LEN) return 0;

@@ -688,49 +699,73 @@

}

int captype = 0;

int abbv = 0;

- int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

+ int wl = 0;

+ // input conversion

+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

if (wl == 0) return 0;

int ns = 0;

- *slst = NULL;

int capwords = 0;

- int ngramsugs = 0;

switch(captype) {

- case NOCAP: {

- ns = pSMgr->suggest(slst, cw, ns);

+ case NOCAP: {

+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

break;

}

- case INITCAP: {

+ case INITCAP: {

capwords = 1;

- ns = pSMgr->suggest(slst, cw, ns);

+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

if (ns == -1) break;

memcpy(wspace,cw,(wl+1));

mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

break;

}

case HUHINITCAP:

capwords = 1;

- case HUHCAP: {

- ns = pSMgr->suggest(slst, cw, ns);

+ case HUHCAP: {

+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

if (ns != -1) {

int prevns;

+ // something.The -> something. The

+ char * dot = strchr(cw, '.');

+ if (dot && (dot > cw)) {

+ int captype_;

+ if (utf8) {

+ w_char w_[MAXWORDLEN];

+ int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);

+ captype_ = get_captype_utf8(w_, wl_, langnum);

+ } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);

+ if (captype_ == INITCAP) {

+ char * st = mystrdup(cw);

+ if (st) st = (char *) realloc(st, wl + 2);

+ if (st) {

+ st[(dot - cw) + 1] = ' ';

+ strcpy(st + (dot - cw) + 2, dot + 1);

+ ns = insert_sug(slst, st, ns);

+ free(st);

+ }

if (captype == HUHINITCAP) {

// TheOpenOffice.org -> The OpenOffice.org

memcpy(wspace,cw,(wl+1));

mkinitsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

}

memcpy(wspace,cw,(wl+1));

mkallsmall2(wspace, unicw, nc);

- insert_sug(slst, wspace, &ns);

+ if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

prevns = ns;

- ns = pSMgr->suggest(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

if (captype == HUHINITCAP) {

mkinitcap2(wspace, unicw, nc);

- insert_sug(slst, wspace, &ns);

- ns = pSMgr->suggest(slst, wspace, ns);

+ if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

}

// aNew -> "a New" (instead of "a new")

for (int j = prevns; j < ns; j++) {

@@ -739,7 +774,7 @@

int slen = strlen(space + 1);

// different case after space (need capitalisation)

if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {

- w_char w[MAXWORDLEN + 1];

+ w_char w[MAXWORDLEN];

int wc = 0;

char * r = (*slst)[j];

if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);

@@ -754,31 +789,32 @@

break;

}

- case ALLCAP: {

+ case ALLCAP: {

memcpy(wspace, cw, (wl+1));

mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

if (ns == -1) break;

- if (pAMgr && pAMgr->get_keepcase()) insert_sug(slst, wspace, &ns);

+ if (pAMgr && pAMgr->get_keepcase() && spell(wspace))

+ ns = insert_sug(slst, wspace, ns);

mkinitcap2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns);

+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

for (int j=0; j < ns; j++) {

mkallcap((*slst)[j]);

if (pAMgr && pAMgr->get_checksharps()) {

char * pos;

if (utf8) {

- pos = strstr((*slst)[j], "\xc3\x9f");

+ pos = strstr((*slst)[j], "\xC3\x9F");

while (pos) {

*pos = 'S';

*(pos+1) = 'S';

- pos = strstr(pos+2, "\xc3\x9f");

+ pos = strstr(pos+2, "\xC3\x9F");

}

} else {

- pos = strchr((*slst)[j], '\xdf');

+ pos = strchr((*slst)[j], '\xDF');

while (pos) {

(*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);

- mystrrep((*slst)[j], "\xdf", "SS");

- pos = strchr((*slst)[j], '\xdf');

+ mystrrep((*slst)[j], "\xDF", "SS");

+ pos = strchr((*slst)[j], '\xDF');

}

@@ -807,37 +843,76 @@

// END OF LANG_hu section

// try ngram approach since found nothing

- if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {

- ngramsugs = 1;

+ if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {

switch(captype) {

case NOCAP: {

- ns = pSMgr->ngsuggest(*slst, cw, pHMgr);

+ ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);

break;

}

+ case HUHINITCAP:

+ capwords = 1;

case HUHCAP: {

memcpy(wspace,cw,(wl+1));

mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);

- break;

+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

+ break;

}

- case INITCAP: {

+ case INITCAP: {

capwords = 1;

memcpy(wspace,cw,(wl+1));

mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);

+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

break;

}

case ALLCAP: {

memcpy(wspace,cw,(wl+1));

mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);

- for (int j=0; j < ns; j++)

+ int oldns = ns;

+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

+ for (int j = oldns; j < ns; j++)

mkallcap((*slst)[j]);

break;

}

+ // try dash suggestion (Afo-American -> Afro-American)

+ if (strchr(cw, '-')) {

+ char * pos = strchr(cw, '-');

+ char * ppos = cw;

+ int nodashsug = 1;

+ char ** nlst = NULL;

+ int nn = 0;

+ int last = 0;

+ for (int j = 0; j < ns && nodashsug == 1; j++) {

+ if (strchr((*slst)[j], '-')) nodashsug = 0;

+ }

+ while (nodashsug && !last) {

+ if (*pos == '\0') last = 1; else *pos = '\0';

+ if (!spell(ppos)) {

+ nn = suggest(&nlst, ppos);

+ for (int j = nn - 1; j >= 0; j--) {

+ strncpy(wspace, cw, ppos - cw);

+ strcpy(wspace + (ppos - cw), nlst[j]);

+ if (!last) {

+ strcat(wspace, "-");

+ strcat(wspace, pos + 1);

+ }

+ ns = insert_sug(slst, wspace, ns);

+ free(nlst[j]);

+ }

+ if (nlst != NULL) free(nlst);

+ nodashsug = 0;

+ }

+ if (!last) {

+ *pos = '-';

+ ppos = pos + 1;

+ pos = strchr(ppos, '-');

+ }

+ if (!pos) pos = cw + strlen(cw);

+ }

// word reversing wrapper for complex prefixes

if (complexprefixes) {

for (int j = 0; j < ns; j++) {

@@ -858,14 +933,14 @@

}

- // suggest keepcase

- if (pAMgr->get_keepcase()) {

+ // remove bad capitalized and forbidden forms

+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {

switch (captype) {

case INITCAP:

case ALLCAP: {

int l = 0;

for (int j=0; j < ns; j++) {

- if (!spell((*slst)[j])) {

+ if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {

char s[MAXSWUTF8L];

w_char w[MAXSWL];

int len;

@@ -876,21 +951,21 @@

len = strlen(s);

}

mkallsmall2(s, w, len);

- free((*slst)[j]);

+ free((*slst)[j]);

if (spell(s)) {

(*slst)[l] = mystrdup(s);

- l++;

+ if ((*slst)[l]) l++;

} else {

mkinitcap2(s, w, len);

if (spell(s)) {

(*slst)[l] = mystrdup(s);

- l++;

+ if ((*slst)[l]) l++;

}

} else {

(*slst)[l] = (*slst)[j];

l++;

- }

+ }

}

ns = l;

}

@@ -909,9 +984,28 @@

}

l++;

}

+ // output conversion

+ rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;

+ for (int j = 0; rl && j < ns; j++) {

+ if (rl->conv((*slst)[j], wspace)) {

+ free((*slst)[j]);

+ (*slst)[j] = mystrdup(wspace);

+ }

+ // if suggestions removed by nosuggest, onlyincompound parameters

+ if (l == 0 && *slst) {

+ free(*slst);

+ *slst = NULL;

+ }

return l;

}

+void Hunspell::free_list(char *** slst, int n) {

+ freelist(slst, n);

char * Hunspell::get_dic_encoding()

{

return encoding;

@@ -921,9 +1015,9 @@

// XXX need UTF-8 support

int Hunspell::suggest_auto(char*** slst, const char * word)

{

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ if (!pSMgr || maxdic == 0) return 0;

int wl = strlen(word);

if (utf8) {

if (wl >= MAXWORDUTF8LEN) return 0;

@@ -936,15 +1030,15 @@

if (wl == 0) return 0;

int ns = 0;

*slst = NULL; // HU, nsug in pSMgr->suggest

switch(captype) {

- case NOCAP: {

+ case NOCAP: {

ns = pSMgr->suggest_auto(slst, cw, ns);

if (ns>0) break;

break;

}

- case INITCAP: {

+ case INITCAP: {

memcpy(wspace,cw,(wl+1));

mkallsmall(wspace);

ns = pSMgr->suggest_auto(slst, wspace, ns);

@@ -952,10 +1046,11 @@

mkinitcap((*slst)[j]);

ns = pSMgr->suggest_auto(slst, cw, ns);

break;

}

- case HUHCAP: {

+ case HUHINITCAP:

+ case HUHCAP: {

ns = pSMgr->suggest_auto(slst, cw, ns);

if (ns == 0) {

memcpy(wspace,cw,(wl+1));

@@ -965,7 +1060,7 @@

break;

}

- case ALLCAP: {

+ case ALLCAP: {

memcpy(wspace,cw,(wl+1));

mkallsmall(wspace);

ns = pSMgr->suggest_auto(slst, wspace, ns);

@@ -1011,103 +1106,89 @@

}

- // END OF LANG_hu section

+ // END OF LANG_hu section

return ns;

}

+#endif

-// XXX need UTF-8 support

-int Hunspell::stem(char*** slst, const char * word)

+int Hunspell::stem(char*** slst, char ** desc, int n)

{

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

- int wl = strlen(word);

- if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return 0;

- } else {

- if (wl >= MAXWORDLEN) return 0;

+ char result[MAXLNLEN];

+ char result2[MAXLNLEN];

+ *slst = NULL;

+ if (n == 0) return 0;

+ *result2 = '\0';

+ for (int i = 0; i < n; i++) {

+ *result = '\0';

+ // add compound word parts (except the last one)

+ char * s = (char *) desc[i];

+ char * part = strstr(s, MORPH_PART);

+ if (part) {

+ char * nextpart = strstr(part + 1, MORPH_PART);

+ while (nextpart) {

+ copy_field(result + strlen(result), part, MORPH_PART);

+ part = nextpart;

+ nextpart = strstr(part + 1, MORPH_PART);

+ }

+ s = part;

+ }

+ char **pl;

+ char tok[MAXLNLEN];

+ strcpy(tok, s);

+ char * alt = strstr(tok, " | ");

+ while (alt) {

+ alt[1] = MSEP_ALT;

+ alt = strstr(alt, " | ");

+ }

+ int pln = line_tok(tok, &pl, MSEP_ALT);

+ for (int k = 0; k < pln; k++) {

+ // add derivational suffixes

+ if (strstr(pl[k], MORPH_DERI_SFX)) {

+ // remove inflectional suffixes

+ char * is = strstr(pl[k], MORPH_INFL_SFX);

+ if (is) *is = '\0';

+ char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);

+ if (sg) {

+ char ** gen;

+ int genl = line_tok(sg, &gen, MSEP_REC);

+ free(sg);

+ for (int j = 0; j < genl; j++) {

+ sprintf(result2 + strlen(result2), "%c%s%s",

+ MSEP_REC, result, gen[j]);

+ }

+ freelist(&gen, genl);

+ }

+ } else {

+ sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);

+ if (strstr(pl[k], MORPH_SURF_PFX)) {

+ copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);

+ }

+ copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);

+ }

+ freelist(&pl, pln);

}

- int captype = 0;

- int abbv = 0;

- wl = cleanword(cw, word, &captype, &abbv);

- if (wl == 0) return 0;

- int ns = 0;

+ int sln = line_tok(result2, slst, MSEP_REC);

+ return uniqlist(*slst, sln);

- *slst = NULL; // HU, nsug in pSMgr->suggest

- switch(captype) {

- case HUHCAP:

- case NOCAP: {

- ns = pSMgr->suggest_stems(slst, cw, ns);

- if ((abbv) && (ns == 0)) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- }

- break;

- }

- case INITCAP: {

- ns = pSMgr->suggest_stems(slst, cw, ns);

- if (ns == 0) {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- }

- if ((abbv) && (ns == 0)) {

- memcpy(wspace,cw,wl);

- mkallsmall(wspace);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- }

- break;

- }

- case ALLCAP: {

- ns = pSMgr->suggest_stems(slst, cw, ns);

- if (ns != 0) break;

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- if (ns == 0) {

- mkinitcap(wspace);

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- }

- if ((abbv) && (ns == 0)) {

- memcpy(wspace,cw,wl);

- mkallsmall(wspace);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- ns = pSMgr->suggest_stems(slst, wspace, ns);

- }

- break;

- }

- return ns;

+int Hunspell::stem(char*** slst, const char * word)

+ char ** pl;

+ int pln = analyze(&pl, word);

+ int pln2 = stem(slst, pl, pln);

+ freelist(&pl, pln);

+ return pln2;

}

+#ifdef HUNSPELL_EXPERIMENTAL

int Hunspell::suggest_pos_stems(char*** slst, const char * word)

{

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ if (! pSMgr || maxdic == 0) return 0;

int wl = strlen(word);

if (utf8) {

if (wl >= MAXWORDUTF8LEN) return 0;

@@ -1118,14 +1199,14 @@

int abbv = 0;

wl = cleanword(cw, word, &captype, &abbv);

if (wl == 0) return 0;

int ns = 0; // ns=0 = normalized input

*slst = NULL; // HU, nsug in pSMgr->suggest

switch(captype) {

case HUHCAP:

- case NOCAP: {

+ case NOCAP: {

ns = pSMgr->suggest_pos_stems(slst, cw, ns);

if ((abbv) && (ns == 0)) {

@@ -1138,7 +1219,7 @@

break;

}

- case INITCAP: {

+ case INITCAP: {

ns = pSMgr->suggest_pos_stems(slst, cw, ns);

@@ -1147,15 +1228,15 @@

mkallsmall(wspace);

ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

}

break;

}

- case ALLCAP: {

+ case ALLCAP: {

ns = pSMgr->suggest_pos_stems(slst, cw, ns);

if (ns != 0) break;

memcpy(wspace,cw,(wl+1));

mkallsmall(wspace);

ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

@@ -1225,22 +1306,24 @@

return nc;

}

-int Hunspell::put_word(const char * word)

+int Hunspell::add(const char * word)

{

- if (pHMgr) {

- return pHMgr->put_word(word, strlen(word), NULL);

- }

+ if (pHMgr[0]) return (pHMgr[0])->add(word);

return 0;

}

-int Hunspell::put_word_pattern(const char * word, const char * pattern)

+int Hunspell::add_with_affix(const char * word, const char * example)

{

- if (pHMgr) {

- return pHMgr->put_word_pattern(word, strlen(word), pattern);

- }

+ if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);

return 0;

}

+int Hunspell::remove(const char * word)

+ if (pHMgr[0]) return (pHMgr[0])->remove(word);

+ return 0;

const char * Hunspell::get_version()

{

return pAMgr->get_version();

@@ -1251,22 +1334,38 @@

return csconv;

}

-#ifdef HUNSPELL_EXPERIMENTAL

-// XXX need UTF-8 support

-char * Hunspell::morph(const char * word)

+void Hunspell::cat_result(char * result, char * st)

{

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

- int wl = strlen(word);

+ if (st) {

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

+ free(st);

+ }

+int Hunspell::analyze(char*** slst, const char * word)

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ w_char unicw[MAXWORDLEN];

+ int wl2 = 0;

+ *slst = NULL;

+ if (! pSMgr || maxdic == 0) return 0;

+ int nc = strlen(word);

if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return 0;

+ if (nc >= MAXWORDUTF8LEN) return 0;

} else {

- if (wl >= MAXWORDLEN) return 0;

+ if (nc >= MAXWORDLEN) return 0;

}

int captype = 0;

int abbv = 0;

- wl = cleanword(cw, word, &captype, &abbv);

+ int wl = 0;

+ // input conversion

+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

if (wl == 0) {

if (abbv) {

for (wl = 0; wl < abbv; wl++) cw[wl] = '.';

@@ -1277,7 +1376,7 @@

char result[MAXLNLEN];

char * st = NULL;

*result = '\0';

int n = 0;

@@ -1287,177 +1386,103 @@

// test numbers

// LANG_hu section: set dash information for suggestions

if (langnum == LANG_hu) {

- while ((n < wl) &&

+ while ((n < wl) &&

(((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {

n++;

if ((cw[n] == '.') || (cw[n] == ',')) {

- if (((n2 == 0) && (n > 3)) ||

+ if (((n2 == 0) && (n > 3)) ||

((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;

n2++;

n3 = n;

}

- if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;

- if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xb0')) && checkword(cw+n, NULL, NULL))) {

- strcat(result, cw);

+ if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;

+ if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {

+ mystrcat(result, cw, MAXLNLEN);

result[n - 1] = '\0';

- if (n == wl) {

- st = pSMgr->suggest_morph(cw + n - 1);

- if (st) {

- strcat(result, st);

- free(st);

- }

- } else {

+ if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));

+ else {

char sign = cw[n];

cw[n] = '\0';

- st = pSMgr->suggest_morph(cw + n - 1);

- if (st) {

- strcat(result, st);

- free(st);

- }

- strcat(result, "+"); // XXX SPEC. MORPHCODE

+ cat_result(result, pSMgr->suggest_morph(cw + n - 1));

+ mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE

cw[n] = sign;

- st = pSMgr->suggest_morph(cw + n);

- if (st) {

- strcat(result, st);

- free(st);

- }

+ cat_result(result, pSMgr->suggest_morph(cw + n));

}

- return mystrdup(result);

+ return line_tok(result, slst, MSEP_REC);

}

// END OF LANG_hu section

switch(captype) {

- case NOCAP: {

- st = pSMgr->suggest_morph(cw);

- if (st) {

- strcat(result, st);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,wl);

+ case HUHCAP:

+ case HUHINITCAP:

+ case NOCAP: {

+ cat_result(result, pSMgr->suggest_morph(cw));

+ if (abbv) {

+ memcpy(wspace,cw,wl);

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ cat_result(result, pSMgr->suggest_morph(wspace));

+ }

+ break;

+ }

+ case INITCAP: {

+ wl = mkallsmall2(cw, unicw, nc);

+ memcpy(wspace,cw,(wl+1));

+ wl2 = mkinitcap2(cw, unicw, nc);

+ cat_result(result, pSMgr->suggest_morph(wspace));

+ cat_result(result, pSMgr->suggest_morph(cw));

+ if (abbv) {

*(wspace+wl) = '.';

*(wspace+wl+1) = '\0';

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

+ cat_result(result, pSMgr->suggest_morph(wspace));

+ memcpy(wspace, cw, wl2);

+ *(wspace+wl2) = '.';

+ *(wspace+wl2+1) = '\0';

+ cat_result(result, pSMgr->suggest_morph(wspace));

}

- break;

+ break;

}

- case INITCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- strcat(result, st);

- free(st);

- }

- st = pSMgr->suggest_morph(cw);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,wl);

+ case ALLCAP: {

+ cat_result(result, pSMgr->suggest_morph(cw));

+ if (abbv) {

+ memcpy(wspace,cw,wl);

*(wspace+wl) = '.';

*(wspace+wl+1) = '\0';

- mkallsmall(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

+ cat_result(result, pSMgr->suggest_morph(cw));

}

- break;

- }

- case HUHCAP: {

- st = pSMgr->suggest_morph(cw);

- if (st) {

- strcat(result, st);

- free(st);

- }

-#if 0

+ wl = mkallsmall2(cw, unicw, nc);

memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

+ wl2 = mkinitcap2(cw, unicw, nc);

+ cat_result(result, pSMgr->suggest_morph(wspace));

+ cat_result(result, pSMgr->suggest_morph(cw));

+ if (abbv) {

+ *(wspace+wl) = '.';

+ *(wspace+wl+1) = '\0';

+ cat_result(result, pSMgr->suggest_morph(wspace));

+ memcpy(wspace, cw, wl2);

+ *(wspace+wl2) = '.';

+ *(wspace+wl2+1) = '\0';

+ cat_result(result, pSMgr->suggest_morph(wspace));

}

-#endif

break;

- }

- case ALLCAP: {

- memcpy(wspace,cw,(wl+1));

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- strcat(result, st);

- free(st);

- }

- mkallsmall(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,(wl+1));

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- if (*result) strcat(result, "\n");

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- strcat(result, st);

- free(st);

- }

- mkallsmall(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph(wspace);

- if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

- free(st);

- }

- break;

}

- if (result && (*result)) {

+ if (*result) {

// word reversing wrapper for complex prefixes

if (complexprefixes) {

if (utf8) reverseword_utf(result); else reverseword(result);

}

- return mystrdup(result);

+ return line_tok(result, slst, MSEP_REC);

}

// compound word with dash (HU) I18n

@@ -1466,24 +1491,24 @@

// LANG_hu section: set dash information for suggestions

if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');

if ((langnum == LANG_hu) && dash) {

- *dash='\0';

+ *dash='\0';

// examine 2 sides of the dash

if (dash[1] == '\0') { // base word ending with dash

- if (spell(cw)) return pSMgr->suggest_morph(cw);

+ if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);

} else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.

if (spell(cw) && (spell("-e"))) {

st = pSMgr->suggest_morph(cw);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- strcat(result,"+"); // XXX spec. separator in MORPHCODE

+ mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

st = pSMgr->suggest_morph("-e");

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- return mystrdup(result);

+ return line_tok(result, slst, MSEP_REC);

}

} else {

// first word ending with dash: word- XXX ???

@@ -1495,22 +1520,22 @@

dash[0]='\0';

if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||

((dash[1] > '0') && (dash[1] < '9')))) {

- st = morph(cw);

+ st = pSMgr->suggest_morph(cw);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- strcat(result,"+"); // XXX spec. separator in MORPHCODE

+ mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

}

- st = morph(dash+1);

+ st = pSMgr->suggest_morph(dash+1);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- return mystrdup(result);

+ return line_tok(result, slst, MSEP_REC);

}

// affixed number in correct word

- if (nresult && (dash > cw) && (((*(dash-1)<='9') &&

+ if (nresult && (dash > cw) && (((*(dash-1)<='9') &&

(*(dash-1)>='0')) || (*(dash-1)=='.'))) {

*dash='-';

n = 1;

@@ -1525,195 +1550,338 @@

// 56-hoz, 6-hoz

for(; n >= 1; n--) {

if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {

- strcat(result, cw);

+ mystrcat(result, cw, MAXLNLEN);

result[dash - cw - n] = '\0';

st = pSMgr->suggest_morph(dash - n);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- return mystrdup(result);

+ return line_tok(result, slst, MSEP_REC);

}

- return NULL;

+ return 0;

}

+int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)

+ *slst = NULL;

+ if (!pSMgr || !pln) return 0;

+ char **pl2;

+ int pl2n = analyze(&pl2, word);

+ int captype = 0;

+ int abbv = 0;

+ char cw[MAXWORDUTF8LEN];

+ cleanword(cw, word, &captype, &abbv);

+ char result[MAXLNLEN];

+ *result = '\0';

+ for (int i = 0; i < pln; i++) {

+ cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));

+ }

+ freelist(&pl2, pl2n);

+ if (*result) {

+ // allcap

+ if (captype == ALLCAP) mkallcap(result);

+ // line split

+ int linenum = line_tok(result, slst, MSEP_REC);

+ // capitalize

+ if (captype == INITCAP || captype == HUHINITCAP) {

+ for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);

+ }

+ // temporary filtering of prefix related errors (eg.

+ // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")

+ int r = 0;

+ for (int j=0; j < linenum; j++) {

+ if (!spell((*slst)[j])) {

+ free((*slst)[j]);

+ (*slst)[j] = NULL;

+ } else {

+ if (r < j) (*slst)[r] = (*slst)[j];

+ r++;

+ }

+ if (r > 0) return r;

+ free(*slst);

+ *slst = NULL;

+ }

+ return 0;

+int Hunspell::generate(char*** slst, const char * word, const char * pattern)

+ char **pl;

+ int pln = analyze(&pl, pattern);

+ int n = generate(slst, word, pl, pln);

+ freelist(&pl, pln);

+ return uniqlist(*slst, n);

+// minimal XML parser functions

+int Hunspell::get_xml_par(char * dest, const char * par, int max)

+ char * d = dest;

+ if (!par) return 0;

+ char end = *par;

+ char * dmax = dest + max;

+ if (end == '>') end = '<';

+ else if (end != '\'' && end != '"') return 0; // bad XML

+ for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;

+ *d = '\0';

+ mystrrep(dest, "<", "<");

+ mystrrep(dest, "&", "&");

+ return d - dest;

+// return the beginning of the element (attr == NULL) or the attribute

+const char * Hunspell::get_xml_pos(const char * s, const char * attr)

+ const char * end = strchr(s, '>');

+ const char * p = s;

+ if (attr == NULL) return end;

+ do {

+ p = strstr(p, attr);

+ if (!p || p >= end) return 0;

+ } while (*(p-1) != ' ' && *(p-1) != '\n');

+ return p + strlen(attr);

+int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {

+ char cw[MAXWORDUTF8LEN];

+ if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&

+ strcmp(cw, value) == 0) return 1;

+ return 0;

+int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {

+ int n = 0;

+ char * p;

+ if (!list) return 0;

+ for (p = list; (p = strstr(p, tag)); p++) n++;

+ if (n == 0) return 0;

+ *slst = (char **) malloc(sizeof(char *) * n);

+ if (!*slst) return 0;

+ for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {

+ int l = strlen(p);

+ (*slst)[n] = (char *) malloc(l);

+ if (!(*slst)[n]) return (n > 0 ? n - 1 : 0);

+ get_xml_par((*slst)[n], p + strlen(tag) - 1, l);

+ }

+ return n;

+int Hunspell::spellml(char*** slst, const char * word)

+ char *q, *q2;

+ char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];

+ q = (char *) strstr(word, "<query");

+ if (!q) return 0; // bad XML input

+ q2 = strchr(q, '>');

+ if (!q2) return 0; // bad XML input

+ q2 = strstr(q2, "<word");

+ if (!q2) return 0; // bad XML input

+ if (check_xml_par(q, "type=", "analyze")) {

+ int n = 0, s = 0;

+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) n = analyze(slst, cw);

+ if (n == 0) return 0;

+ // convert the result to <code><a>ana1</a><a>ana2</a></code> format

+ for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);

+ char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&

+ if (!r) return 0;

+ strcpy(r, "<code>");

+ for (int i = 0; i < n; i++) {

+ int l = strlen(r);

+ strcpy(r + l, "<a>");

+ strcpy(r + l + 3, (*slst)[i]);

+ mystrrep(r + l + 3, "\t", " ");

+ mystrrep(r + l + 3, "<", "<");

+ mystrrep(r + l + 3, "&", "&");

+ strcat(r, "</a>");

+ free((*slst)[i]);

+ }

+ strcat(r, "</code>");

+ (*slst)[0] = r;

+ return 1;

+ } else if (check_xml_par(q, "type=", "stem")) {

+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) return stem(slst, cw);

+ } else if (check_xml_par(q, "type=", "generate")) {

+ int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN);

+ if (n == 0) return 0;

+ char * q3 = strstr(q2 + 1, "<word");

+ if (q3) {

+ if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN)) {

+ return generate(slst, cw, cw2);

+ }

+ } else {

+ char ** slst2;

+ if ((q2 = strstr(q2 + 1, "<code")) &&

+ (n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {

+ int n2 = generate(slst, cw, slst2, n);

+ freelist(&slst2, n);

+ return uniqlist(*slst, n2);

+ }

+ return 0;

+#ifdef HUNSPELL_EXPERIMENTAL

// XXX need UTF-8 support

char * Hunspell::morph_with_correction(const char * word)

{

- char cw[MAXWORDUTF8LEN + 4];

- char wspace[MAXWORDUTF8LEN + 4];

- if (! pSMgr) return 0;

+ char cw[MAXWORDUTF8LEN];

+ char wspace[MAXWORDUTF8LEN];

+ if (! pSMgr || maxdic == 0) return NULL;

int wl = strlen(word);

if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return 0;

+ if (wl >= MAXWORDUTF8LEN) return NULL;

} else {

- if (wl >= MAXWORDLEN) return 0;

+ if (wl >= MAXWORDLEN) return NULL;

}

int captype = 0;

int abbv = 0;

wl = cleanword(cw, word, &captype, &abbv);

- if (wl == 0) return 0;

+ if (wl == 0) return NULL;

char result[MAXLNLEN];

char * st = NULL;

*result = '\0';

switch(captype) {

- case NOCAP: {

+ case NOCAP: {

st = pSMgr->suggest_morph_for_spelling_error(cw);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- if (abbv) {

- memcpy(wspace,cw,wl);

+ if (abbv) {

+ memcpy(wspace,cw,wl);

*(wspace+wl) = '.';

*(wspace+wl+1) = '\0';

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

break;

}

- case INITCAP: {

+ case INITCAP: {

memcpy(wspace,cw,(wl+1));

mkallsmall(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- }

- st = pSMgr->suggest_morph_for_spelling_error(cw);

+ }

+ st = pSMgr->suggest_morph_for_spelling_error(cw);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- if (abbv) {

- memcpy(wspace,cw,wl);

+ if (abbv) {

+ memcpy(wspace,cw,wl);

*(wspace+wl) = '.';

*(wspace+wl+1) = '\0';

mkallsmall(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- }

+ }

mkinitcap(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- }

+ }

}

break;

}

- case HUHCAP: {

+ case HUHCAP: {

st = pSMgr->suggest_morph_for_spelling_error(cw);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

memcpy(wspace,cw,(wl+1));

mkallsmall(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- }

+ }

break;

}

- case ALLCAP: {

+ case ALLCAP: {

memcpy(wspace,cw,(wl+1));

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- strcat(result, st);

+ mystrcat(result, st, MAXLNLEN);

free(st);

- }

+ }

mkallsmall(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- mkinitcap(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

+ mkinitcap(wspace);

+ st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- if (abbv) {

+ if (abbv) {

memcpy(wspace,cw,(wl+1));

*(wspace+wl) = '.';

*(wspace+wl+1) = '\0';

- if (*result) strcat(result, "\n");

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- strcat(result, st);

- free(st);

- }

+ mystrcat(result, st, MAXLNLEN);

+ free(st);

+ }

mkallsmall(wspace);

st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- mkinitcap(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

+ mkinitcap(wspace);

+ st = pSMgr->suggest_morph_for_spelling_error(wspace);

if (st) {

- if (*result) strcat(result, "\n");

- strcat(result, st);

+ if (*result) mystrcat(result, "\n", MAXLNLEN);

+ mystrcat(result, st, MAXLNLEN);

free(st);

}

- }

+ }

break;

}

- if (result) return mystrdup(result);

+ if (*result) return mystrdup(result);

return NULL;

}

-/* analyze word

- * return line count

- * XXX need a better data structure for morphological analysis */

-int Hunspell::analyze(char ***out, const char *word) {

- int n = 0;

- if (!word) return 0;

- char * m = morph(word);

- if(!m) return 0;

- if (!out) return line_tok(m, out);

- // without memory allocation

- /* BUG missing buffer size checking */

- int i, p;

- for(p = 0, i = 0; m[i]; i++) {

- if(m[i] == '\n' || !m[i+1]) {

- n++;

- strncpy((*out)[n++], m + p, i - p + 1);

- if (m[i] == '\n') (*out)[n++][i - p] = '\0';

- if(!m[i+1]) break;

- p = i + 1;

- }

- free(m);

- return n;

#endif // END OF HUNSPELL_EXPERIMENTAL CODE

Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle)

@@ -1725,6 +1893,17 @@

#endif

}

+Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,

+ const char * key)

+#ifdef HUNSPELL_CHROME_CLIENT

+ return NULL;

+#else

+ return (Hunhandle*)(new Hunspell(affpath, dpath, key));

+#endif

void Hunspell_destroy(Hunhandle *pHunspell)

{

delete (Hunspell*)(pHunspell);

@@ -1745,3 +1924,57 @@

return ((Hunspell*)pHunspell)->suggest(slst, word);

}

+int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)

+ return ((Hunspell*)pHunspell)->analyze(slst, word);

+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)

+ return ((Hunspell*)pHunspell)->stem(slst, word);

+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)

+ return ((Hunspell*)pHunspell)->stem(slst, desc, n);

+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,

+ const char * word2)

+ return ((Hunspell*)pHunspell)->generate(slst, word, word2);

+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,

+ char** desc, int n)

+ return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);

+ /* functions for run-time modification of the dictionary */

+ /* add word to the run-time dictionary */

+int Hunspell_add(Hunhandle *pHunspell, const char * word) {

+ return ((Hunspell*)pHunspell)->add(word);

+ /* add word to the run-time dictionary with affix flags of

+ * the example (a dictionary word): Hunspell will recognize

+ * affixed forms of the new word, too.

+ */

+int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,

+ const char * example) {

+ return ((Hunspell*)pHunspell)->add_with_affix(word, example);

+ /* remove word from the run-time dictionary */

+int Hunspell_remove(Hunhandle *pHunspell, const char * word) {

+ return ((Hunspell*)pHunspell)->remove(word);

+void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n) {

+ freelist(slst, n);

Property changes on: chrome\third_party\hunspell\src\hunspell\hunspell.cxx

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/hunspell.hxx ('k') | chrome/third_party/hunspell/src/hunspell/hunzip.hxx » ('j') | no next file with comments »