Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(376)

Unified Diff: chrome/third_party/hunspell/src/hunspell/hunspell.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/third_party/hunspell/src/hunspell/hunspell.cxx
===================================================================
--- chrome/third_party/hunspell/src/hunspell/hunspell.cxx (revision 21721)
+++ chrome/third_party/hunspell/src/hunspell/hunspell.cxx (working copy)
@@ -6,16 +6,17 @@
#include <cstring>
#include <cstdio>
#else
-#include <stdlib.h>
+#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
+#include <stdio.h>
#endif
#include "hunspell.hxx"
#include "hunspell.h"
+#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
@@ -23,27 +24,34 @@
#ifdef HUNSPELL_CHROME_CLIENT
Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
#else
-Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle)
+Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle, const char * key = NULL)
#endif
{
encoding = NULL;
csconv = NULL;
utf8 = 0;
complexprefixes = 0;
+#ifndef HUNSPELL_CHROME_CLIENT
+ affixpath = mystrdup(affpath);
+#endif
+ maxdic = 0;
#ifdef HUNSPELL_CHROME_CLIENT
bdict_reader = new hunspell::BDictReader;
bdict_reader->Init(bdict_data, bdict_length);
- pHMgr = new HashMgr(bdict_reader);
- pAMgr = new AffixMgr(bdict_reader, pHMgr);
+ pHMgr[0] = new HashMgr(bdict_reader);
+ if (pHMgr[0]) maxdic = 1;
+
+ pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
#else
/* first set up the hash manager */
- pHMgr = new HashMgr(dic_handle, aff_handle);
+ pHMgr[0] = new HashMgr(dic_handle, aff_handle, key);
+ if (pHMgr[0]) maxdic = 1;
/* next set up the affix manager */
/* it needs access to the hash manager lookup methods */
- pAMgr = new AffixMgr(aff_handle, pHMgr);
+ pAMgr = new AffixMgr(aff_handle, pHMgr, &maxdic, key);
#endif
/* get the preferred try string and the dictionary */
@@ -65,10 +73,13 @@
{
if (pSMgr) delete pSMgr;
if (pAMgr) delete pAMgr;
- if (pHMgr) delete pHMgr;
+ for (int i = 0; i < maxdic; i++) delete pHMgr[i];
+ maxdic = 0;
pSMgr = NULL;
pAMgr = NULL;
- pHMgr = NULL;
+#ifdef MOZILLA_CLIENT
+ free(csconv);
+#endif
csconv= NULL;
if (encoding) free(encoding);
encoding = NULL;
@@ -76,27 +87,38 @@
#ifdef HUNSPELL_CHROME_CLIENT
if (bdict_reader) delete bdict_reader;
bdict_reader = NULL;
+#else
+ if (affixpath) free(affixpath);
+ affixpath = NULL;
#endif
}
+#ifndef HUNSPELL_CHROME_CLIENT
+// load extra dictionaries
+int Hunspell::add_dic(const char * dpath, const char * key) {
+ if (maxdic == MAXDIC || !affixpath) return 1;
+ pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
+ if (pHMgr[maxdic]) maxdic++; else return 1;
+ return 0;
+}
+#endif
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
// their presence with the abbreviation flag
-// also since already going through character by character,
+// also since already going through character by character,
// set the capitalization type
// return the length of the "cleaned" (and UTF-8 encoded) word
-int Hunspell::cleanword2(char * dest, const char * src,
+int Hunspell::cleanword2(char * dest, const char * src,
w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
-{
+{
unsigned char * p = (unsigned char *) dest;
const unsigned char * q = (const unsigned char * ) src;
- int firstcap = 0;
// first skip over any leading blanks
while ((*q != '\0') && (*q == ' ')) q++;
-
+
// now strip off any trailing periods (recording their presence)
*pabbrev = 0;
int nl = strlen((const char *)q);
@@ -104,80 +126,43 @@
nl--;
(*pabbrev)++;
}
-
+
// if no characters are left it can't be capitalized
- if (nl <= 0) {
+ if (nl <= 0) {
*pcaptype = NOCAP;
*p = '\0';
return 0;
}
- // now determine the capitalization type of the first nl letters
- int ncap = 0;
- int nneutral = 0;
- *nc = 0;
-
- if (!utf8) {
- while (nl > 0) {
- (*nc)++;
- if (csconv[(*q)].ccase) ncap++;
- if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
- *p++ = *q++;
- nl--;
- }
- // remember to terminate the destination string
- *p = '\0';
- if (ncap) {
- firstcap = csconv[(unsigned char)(*dest)].ccase;
- }
- } else {
- unsigned short idx;
- *nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
+ strncpy(dest, (char *) q, nl);
+ *(dest + nl) = '\0';
+ nl = strlen(dest);
+ if (utf8) {
+ *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
// don't check too long words
if (*nc >= MAXWORDLEN) return 0;
if (*nc == -1) { // big Unicode character (non BMP area)
*pcaptype = NOCAP;
- strcpy((char *) p, (char *) q);
- return strlen(dest);
+ return nl;
}
- *nc -= *pabbrev;
- for (int i = 0; i < *nc; i++) {
- idx = (dest_utf[i].h << 8) + dest_utf[i].l;
- if (idx != unicodetolower(idx, langnum)) ncap++;
- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
- }
- u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);
- if (ncap) {
- idx = (dest_utf[0].h << 8) + dest_utf[0].l;
- firstcap = (idx != unicodetolower(idx, langnum));
- }
- }
-
- // now finally set the captype
- if (ncap == 0) {
- *pcaptype = NOCAP;
- } else if ((ncap == 1) && firstcap) {
- *pcaptype = INITCAP;
- } else if ((ncap == *nc) || ((ncap + nneutral) == *nc)) {
- *pcaptype = ALLCAP;
- } else if ((ncap > 1) && firstcap) {
- *pcaptype = HUHINITCAP;
+ *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
} else {
- *pcaptype = HUHCAP;
+ *pcaptype = get_captype(dest, nl, csconv);
+ *nc = nl;
}
- return strlen(dest);
-}
+ return nl;
+}
-int Hunspell::cleanword(char * dest, const char * src,
+int Hunspell::cleanword(char * dest, const char * src,
int * pcaptype, int * pabbrev)
-{
+{
unsigned char * p = (unsigned char *) dest;
const unsigned char * q = (const unsigned char * ) src;
int firstcap = 0;
// first skip over any leading blanks
while ((*q != '\0') && (*q == ' ')) q++;
-
+
// now strip off any trailing periods (recording their presence)
*pabbrev = 0;
int nl = strlen((const char *)q);
@@ -185,9 +170,9 @@
nl--;
(*pabbrev)++;
}
-
+
// if no characters are left it can't be capitalized
- if (nl <= 0) {
+ if (nl <= 0) {
*pcaptype = NOCAP;
*p = '\0';
return 0;
@@ -215,8 +200,9 @@
nc = u8_u16(t, MAXWORDLEN, src);
for (int i = 0; i < nc; i++) {
idx = (t[i].h << 8) + t[i].l;
- if (idx != unicodetolower(idx, langnum)) ncap++;
- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
+ unsigned short low = unicodetolower(idx, langnum);
+ if (idx != low) ncap++;
+ if (unicodetoupper(idx, langnum) == low) nneutral++;
}
u16_u8(dest, MAXWORDUTF8LEN, t, nc);
if (ncap) {
@@ -238,8 +224,7 @@
*pcaptype = HUHCAP;
}
return strlen(dest);
-}
-
+}
void Hunspell::mkallcap(char * p)
{
@@ -256,7 +241,7 @@
}
u16_u8(p, MAXWORDUTF8LEN, u, nc);
} else {
- while (*p != '\0') {
+ while (*p != '\0') {
*p = csconv[((unsigned char) *p)].cupper;
p++;
}
@@ -269,15 +254,16 @@
unsigned short idx;
for (int i = 0; i < nc; i++) {
idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetoupper(idx, langnum)) {
- u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
- u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short up = unicodetoupper(idx, langnum);
+ if (idx != up) {
+ u[i].h = (unsigned char) (up >> 8);
+ u[i].l = (unsigned char) (up & 0x00FF);
}
}
u16_u8(p, MAXWORDUTF8LEN, u, nc);
- return strlen(p);
+ return strlen(p);
} else {
- while (*p != '\0') {
+ while (*p != '\0') {
*p = csconv[((unsigned char) *p)].cupper;
p++;
}
@@ -288,7 +274,7 @@
void Hunspell::mkallsmall(char * p)
{
- while (*p != '\0') {
+ while (*p != '\0') {
*p = csconv[((unsigned char) *p)].clower;
p++;
}
@@ -300,15 +286,16 @@
unsigned short idx;
for (int i = 0; i < nc; i++) {
idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetolower(idx, langnum)) {
- u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);
- u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short low = unicodetolower(idx, langnum);
+ if (idx != low) {
+ u[i].h = (unsigned char) (low >> 8);
+ u[i].l = (unsigned char) (low & 0x00FF);
}
}
u16_u8(p, MAXWORDUTF8LEN, u, nc);
return strlen(p);
} else {
- while (*p != '\0') {
+ while (*p != '\0') {
*p = csconv[((unsigned char) *p)].clower;
p++;
}
@@ -322,18 +309,18 @@
*p = *source;
for (p++, source++; *(source - 1); p++, source++) {
*p = *source;
- if (*source == '\x9f') *--p = '\xdf';
+ if (*source == '\x9F') *--p = '\xDF';
}
return dest;
}
-// recursive search for right ss-\xdf permutations
+// recursive search for right ss - sharp s permutations
hentry * Hunspell::spellsharps(char * base, char * pos, int n,
int repnum, char * tmp, int * info, char **root) {
pos = strstr(pos, "ss");
if (pos && (n < MAXSHARPS)) {
- *pos = '\xc3';
- *(pos + 1) = '\x9f';
+ *pos = '\xC3';
+ *(pos + 1) = '\x9F';
hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
if (h) return h;
*pos = 's';
@@ -352,31 +339,32 @@
TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
}
-/* check and insert a word to beginning of the suggestion array */
-int Hunspell::insert_sug(char ***slst, char * word, int *ns) {
- if (spell(word)) {
- if (*ns == MAXSUGGESTION) {
- (*ns)--;
- free((*slst)[*ns]);
- }
- for (int k = *ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
- (*slst)[0] = mystrdup(word);
- (*ns)++;
+/* insert a word to the beginning of the suggestion array and return ns */
+int Hunspell::insert_sug(char ***slst, char * word, int ns) {
+ char * dup = mystrdup(word);
+ if (!dup) return ns;
+ if (ns == MAXSUGGESTION) {
+ ns--;
+ free((*slst)[ns]);
}
- return 0;
+ for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
+ (*slst)[0] = dup;
+ return ns + 1;
}
int Hunspell::spell(const char * word, int * info, char ** root)
{
#ifdef HUNSPELL_CHROME_CLIENT
- if (pHMgr) pHMgr->EmptyHentryCache();
+ if (pHMgr) pHMgr[0]->EmptyHentryCache();
#endif
struct hentry * rv=NULL;
// need larger vector. For example, Turkish capital letter I converted a
// 2-byte UTF-8 character (dotless i) by mkallsmall.
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- w_char unicw[MAXWORDLEN + 1];
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ w_char unicw[MAXWORDLEN];
+ // Hunspell supports XML input of the simplified API (see manual)
+ if (strcmp(word, SPELL_XML) == 0) return 1;
int nc = strlen(word);
int wl2 = 0;
if (utf8) {
@@ -386,14 +374,18 @@
}
int captype = 0;
int abbv = 0;
- int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+ int wl = 0;
- if (wl == 0) return 1;
+ // input conversion
+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
- if (info) *info = 0;
+ int info2 = 0;
+ if (wl == 0 || maxdic == 0) return 1;
if (root) *root = NULL;
- // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
+ // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
enum { NBEGIN, NNUM, NSEP };
int nstate = NBEGIN;
int i;
@@ -407,173 +399,179 @@
} else break;
}
if ((i == wl) && (nstate == NNUM)) return 1;
+ if (!info) info = &info2; else *info = 0;
- // LANG_hu section: number(s) + (percent or degree) with suffixes
- if (langnum == LANG_hu) {
- if ((nstate == NNUM) && ((cw[i] == '%') || (cw[i] == '\xb0'))
- && checkword(cw + i, info, root)) return 1;
- }
- // END of LANG_hu section
-
switch(captype) {
- case HUHCAP:
- case HUHINITCAP:
- case NOCAP: {
- rv = checkword(cw, info, root);
- if ((abbv) && !(rv)) {
- memcpy(wspace,cw,wl);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- rv = checkword(wspace, info, root);
- }
- break;
- }
+ case HUHCAP:
+ case HUHINITCAP:
+ case NOCAP: {
+ rv = checkword(cw, info, root);
+ if ((abbv) && !(rv)) {
+ memcpy(wspace,cw,wl);
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ rv = checkword(wspace, info, root);
+ }
+ break;
+ }
case ALLCAP: {
- rv = checkword(cw, info, root);
- if (rv) break;
- if (abbv) {
- memcpy(wspace,cw,wl);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- rv = checkword(wspace, info, root);
- if (rv) break;
- }
- if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
- char tmpword[MAXWORDUTF8LEN];
- wl = mkallsmall2(cw, unicw, nc);
- memcpy(wspace,cw,(wl+1));
+ rv = checkword(cw, info, root);
+ if (rv) break;
+ if (abbv) {
+ memcpy(wspace,cw,wl);
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ rv = checkword(wspace, info, root);
+ if (rv) break;
+ }
+ // Spec. prefix handling for Catalan, French, Italian:
+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
+ if (pAMgr && strchr(cw, '\'')) {
+ wl = mkallsmall2(cw, unicw, nc);
+ char * apostrophe = strchr(cw, '\'');
+ if (utf8) {
+ w_char tmpword[MAXWORDLEN];
+ *apostrophe = '\0';
+ wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
+ *apostrophe = '\'';
+ if (wl2 < nc) {
+ mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
+ rv = checkword(cw, info, root);
+ if (rv) break;
+ }
+ } else {
+ mkinitcap2(apostrophe + 1, unicw, nc);
+ rv = checkword(cw, info, root);
+ if (rv) break;
+ }
+ mkinitcap2(cw, unicw, nc);
+ rv = checkword(cw, info, root);
+ if (rv) break;
+ }
+ if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
+ char tmpword[MAXWORDUTF8LEN];
+ wl = mkallsmall2(cw, unicw, nc);
+ memcpy(wspace,cw,(wl+1));
+ rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
+ if (!rv) {
+ wl2 = mkinitcap2(cw, unicw, nc);
+ rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
+ }
+ if ((abbv) && !(rv)) {
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
+ if (!rv) {
+ memcpy(wspace, cw, wl2);
+ *(wspace+wl2) = '.';
+ *(wspace+wl2+1) = '\0';
rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
- if (!rv) {
- wl2 = mkinitcap2(cw, unicw, nc);
- rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
- }
- if ((abbv) && !(rv)) {
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
- if (!rv) {
- memcpy(wspace, cw, wl2);
- *(wspace+wl2) = '.';
- *(wspace+wl2+1) = '\0';
- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
- }
- }
- if (rv) break;
}
}
- case INITCAP: {
- wl = mkallsmall2(cw, unicw, nc);
- memcpy(wspace,cw,(wl+1));
- rv = checkword(wspace, info, root);
- if (!rv || (is_keepcase(rv) && !((captype == INITCAP) &&
- // if CHECKSHARPS: KEEPCASE words with \xdf are allowed
- // in INITCAP form, too.
- pAMgr->get_checksharps() && ((utf8 && strstr(wspace, "\xc3\x9f")) ||
- (!utf8 && strchr(wspace, '\xdf')))))) {
- wl2 = mkinitcap2(cw, unicw, nc);
- rv = checkword(cw, info, root);
- if (rv && (captype == ALLCAP) && is_keepcase(rv)) rv = NULL;
- }
- if (abbv && !rv) {
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- rv = checkword(wspace, info, root);
- if (!rv || is_keepcase(rv)) {
- memcpy(wspace, cw, wl2);
- *(wspace+wl2) = '.';
- *(wspace+wl2+1) = '\0';
- rv = checkword(wspace, info, root);
- if (rv && ((captype == ALLCAP) && is_keepcase(rv))) rv = NULL;
- }
- }
- break;
- }
+ if (rv) break;
+ }
+ }
+ case INITCAP: {
+ wl = mkallsmall2(cw, unicw, nc);
+ memcpy(wspace,cw,(wl+1));
+ wl2 = mkinitcap2(cw, unicw, nc);
+ if (captype == INITCAP) *info += SPELL_INITCAP;
+ rv = checkword(cw, info, root);
+ if (captype == INITCAP) *info -= SPELL_INITCAP;
+ // forbid bad capitalization
+ // (for example, ijs -> Ijs instead of IJs in Dutch)
+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
+ if (*info & SPELL_FORBIDDEN) {
+ rv = NULL;
+ break;
+ }
+ if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
+ if (rv) break;
+
+ rv = checkword(wspace, info, root);
+ if (abbv && !rv) {
+
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ rv = checkword(wspace, info, root);
+ if (!rv) {
+ memcpy(wspace, cw, wl2);
+ *(wspace+wl2) = '.';
+ *(wspace+wl2+1) = '\0';
+ if (captype == INITCAP) *info += SPELL_INITCAP;
+ rv = checkword(wspace, info, root);
+ if (captype == INITCAP) *info -= SPELL_INITCAP;
+ if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
+ break;
+ }
+ }
+ if (rv && is_keepcase(rv) &&
+ ((captype == ALLCAP) ||
+ // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
+ // in INITCAP form, too.
+ !(pAMgr->get_checksharps() &&
+ ((utf8 && strstr(wspace, "\xC3\x9F")) ||
+ (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
+ break;
+ }
}
-
+
if (rv) return 1;
- // recursive breaking at break points (not good for morphological analysis)
+ // recursive breaking at break points
if (wordbreak) {
char * s;
char r;
- for (int j = 0; j < pAMgr->get_numbreak(); j++) {
+ int corr = 0;
+ wl = strlen(cw);
+ int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
+ // check boundary patterns (^begin and end$)
+ for (int j = 0; j < numbreak; j++) {
+ int plen = strlen(wordbreak[j]);
+ if (plen == 1 || plen > wl) continue;
+ if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
+ && spell(cw + plen - 1)) return 1;
+ if (wordbreak[j][plen - 1] == '$' &&
+ strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
+ r = cw[wl - plen + 1];
+ cw[wl - plen + 1] = '\0';
+ if (spell(cw)) return 1;
+ cw[wl - plen + 1] = r;
+ }
+ }
+ // other patterns
+ for (int j = 0; j < numbreak; j++) {
+ int result = 0;
+ int plen = strlen(wordbreak[j]);
s=(char *) strstr(cw, wordbreak[j]);
- if (s) {
+ if (s && (s > cw) && (s < cw + wl - plen)) {
+ if (!spell(s + plen)) continue;
r = *s;
*s = '\0';
// examine 2 sides of the break point
- if (spell(cw) && spell(s + strlen(wordbreak[j]))) {
- *s = r;
- return 1;
- }
+ if (spell(cw)) return 1;
*s = r;
+
+ // LANG_hu: spec. dash rule
+ if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
+ r = s[1];
+ s[1] = '\0';
+ if (spell(cw)) return 1; // check the first part with dash
+ s[1] = r;
+ }
+ // end of LANG speficic region
+
}
}
}
- // LANG_hu: compoundings with dashes and n-dashes XXX deprecated!
- if (langnum == LANG_hu) {
- int n;
- // compound word with dash (HU) I18n
- char * dash;
- int result = 0;
- // n-dash
- dash = (char *) strstr(cw,"\xe2\x80\x93");
- if (dash && !wordbreak) {
- *dash = '\0';
- // examine 2 sides of the dash
- if (spell(cw) && spell(dash + 3)) {
- *dash = '\xe2';
- return 1;
- }
- *dash = '\xe2';
- }
- dash = (char *) strchr(cw,'-');
- if (dash) {
- *dash='\0';
- // examine 2 sides of the dash
- if (dash[1] == '\0') { // base word ending with dash
- if (spell(cw)) return 1;
- } else {
- // first word ending with dash: word-
- char r2 = *(dash + 1);
- dash[0]='-';
- dash[1]='\0';
- result = spell(cw);
- dash[1] = r2;
- dash[0]='\0';
- if (result && spell(dash+1) && ((strlen(dash+1) > 1) || (dash[1] == 'e') ||
- ((dash[1] > '0') && (dash[1] < '9')))) return 1;
- }
- // affixed number in correct word
- if (result && (dash > cw) && (((*(dash-1)<='9') && (*(dash-1)>='0')) || (*(dash-1)>='.'))) {
- *dash='-';
- n = 1;
- if (*(dash - n) == '.') n++;
- // search first not a number character to left from dash
- while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
- n++;
- }
- if ((dash - n) < cw) n--;
- // numbers: deprecated
- for(; n >= 1; n--) {
- if ((*(dash - n) >= '0') && (*(dash - n) <= '9') &&
- checkword(dash - n, info, root)) return 1;
- }
- }
- }
- }
return 0;
}
-//int Hunspell::spell(const char * word) {
-// return spell(word, NULL, NULL);
-//}
-
struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
{
struct hentry * he = NULL;
- int len;
+ int len, i;
char w2[MAXWORDUTF8LEN];
const char * word;
@@ -600,26 +598,29 @@
}
// look word in hash table
- if (pHMgr) he = pHMgr->lookup(word);
+ for (i = 0; (i < maxdic) && !he; i ++) {
+ he = (pHMgr[i])->lookup(word);
// check forbidden and onlyincompound words
if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
- info += SPELL_FORBIDDEN;
+ if (info) *info += SPELL_FORBIDDEN;
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) {
if (pAMgr->get_compoundflag() &&
TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
- info += SPELL_COMPOUND;
+ if (info) *info += SPELL_COMPOUND;
}
}
return NULL;
}
- // he = next not pseudoroot and not onlyincompound homonym or NULL
+ // he = next not needaffix, onlyincompound homonym or onlyupcase word
while (he && (he->astr) &&
- ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
- (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
+ ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
+ (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
)) he = he->next_homonym;
+ }
// check with affixes
if (!he && pAMgr) {
@@ -627,38 +628,42 @@
len = strlen(word);
he = pAMgr->affix_check(word, len, 0);
- // check compound restriction
- if (he && he->astr && pAMgr->get_onlyincompound() &&
- TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;
+ // check compound restriction and onlyupcase
+ if (he && he->astr && (
+ (pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) &&
+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
+ he = NULL;
+ }
if (he) {
if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
- info += SPELL_FORBIDDEN;
+ if (info) *info += SPELL_FORBIDDEN;
return NULL;
}
if (root) {
- *root = mystrdup(he->word);
- if (complexprefixes) {
+ *root = mystrdup(&(he->word));
+ if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
}
// try check compound word
} else if (pAMgr->get_compound()) {
- he = pAMgr->compound_check(word, len,
- 0,0,100,0,NULL,0,NULL,NULL,0);
+ he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0);
// LANG_hu section: `moving rule' with last dash
- if ((!he) && (langnum == LANG_hu) && (word[len-1]=='-')) {
+ if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
char * dup = mystrdup(word);
+ if (!dup) return NULL;
dup[len-1] = '\0';
- he = pAMgr->compound_check(dup, len-1,
- -5,0,100,0,NULL,1,NULL,NULL,0);
+ he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0);
free(dup);
}
- // end of LANG speficic region
+ // end of LANG speficic region
if (he) {
if (root) {
- *root = mystrdup(he->word);
- if (complexprefixes) {
+ *root = mystrdup(&(he->word));
+ if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
}
@@ -674,12 +679,18 @@
int Hunspell::suggest(char*** slst, const char * word)
{
#ifdef HUNSPELL_CHROME_CLIENT
- if (pHMgr) pHMgr->EmptyHentryCache();
+ if (pHMgr) pHMgr[0]->EmptyHentryCache();
#endif
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
- w_char unicw[MAXWORDLEN + 1];
+ int onlycmpdsug = 0;
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ if (!pSMgr || maxdic == 0) return 0;
+ w_char unicw[MAXWORDLEN];
+ *slst = NULL;
+ // process XML input of the simplified API (see manual)
+ if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
+ return spellml(slst, word);
+ }
int nc = strlen(word);
if (utf8) {
if (nc >= MAXWORDUTF8LEN) return 0;
@@ -688,49 +699,73 @@
}
int captype = 0;
int abbv = 0;
- int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+ int wl = 0;
+
+ // input conversion
+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+
if (wl == 0) return 0;
int ns = 0;
- *slst = NULL;
int capwords = 0;
- int ngramsugs = 0;
switch(captype) {
- case NOCAP: {
- ns = pSMgr->suggest(slst, cw, ns);
+ case NOCAP: {
+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
break;
}
- case INITCAP: {
+ case INITCAP: {
capwords = 1;
- ns = pSMgr->suggest(slst, cw, ns);
+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
if (ns == -1) break;
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->suggest(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
break;
}
case HUHINITCAP:
capwords = 1;
- case HUHCAP: {
- ns = pSMgr->suggest(slst, cw, ns);
+ case HUHCAP: {
+ ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
if (ns != -1) {
int prevns;
+ // something.The -> something. The
+ char * dot = strchr(cw, '.');
+ if (dot && (dot > cw)) {
+ int captype_;
+ if (utf8) {
+ w_char w_[MAXWORDLEN];
+ int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
+ captype_ = get_captype_utf8(w_, wl_, langnum);
+ } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
+ if (captype_ == INITCAP) {
+ char * st = mystrdup(cw);
+ if (st) st = (char *) realloc(st, wl + 2);
+ if (st) {
+ st[(dot - cw) + 1] = ' ';
+ strcpy(st + (dot - cw) + 2, dot + 1);
+ ns = insert_sug(slst, st, ns);
+ free(st);
+ }
+ }
+ }
if (captype == HUHINITCAP) {
// TheOpenOffice.org -> The OpenOffice.org
memcpy(wspace,cw,(wl+1));
mkinitsmall2(wspace, unicw, nc);
- ns = pSMgr->suggest(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
}
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- insert_sug(slst, wspace, &ns);
+ if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
prevns = ns;
- ns = pSMgr->suggest(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
if (captype == HUHINITCAP) {
mkinitcap2(wspace, unicw, nc);
- insert_sug(slst, wspace, &ns);
- ns = pSMgr->suggest(slst, wspace, ns);
+ if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
}
// aNew -> "a New" (instead of "a new")
for (int j = prevns; j < ns; j++) {
@@ -739,7 +774,7 @@
int slen = strlen(space + 1);
// different case after space (need capitalisation)
if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
- w_char w[MAXWORDLEN + 1];
+ w_char w[MAXWORDLEN];
int wc = 0;
char * r = (*slst)[j];
if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
@@ -754,31 +789,32 @@
break;
}
- case ALLCAP: {
+ case ALLCAP: {
memcpy(wspace, cw, (wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->suggest(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
if (ns == -1) break;
- if (pAMgr && pAMgr->get_keepcase()) insert_sug(slst, wspace, &ns);
+ if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
+ ns = insert_sug(slst, wspace, ns);
mkinitcap2(wspace, unicw, nc);
- ns = pSMgr->suggest(slst, wspace, ns);
+ ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
for (int j=0; j < ns; j++) {
mkallcap((*slst)[j]);
if (pAMgr && pAMgr->get_checksharps()) {
char * pos;
if (utf8) {
- pos = strstr((*slst)[j], "\xc3\x9f");
+ pos = strstr((*slst)[j], "\xC3\x9F");
while (pos) {
*pos = 'S';
*(pos+1) = 'S';
- pos = strstr(pos+2, "\xc3\x9f");
+ pos = strstr(pos+2, "\xC3\x9F");
}
} else {
- pos = strchr((*slst)[j], '\xdf');
+ pos = strchr((*slst)[j], '\xDF');
while (pos) {
(*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
- mystrrep((*slst)[j], "\xdf", "SS");
- pos = strchr((*slst)[j], '\xdf');
+ mystrrep((*slst)[j], "\xDF", "SS");
+ pos = strchr((*slst)[j], '\xDF');
}
}
}
@@ -807,37 +843,76 @@
// END OF LANG_hu section
// try ngram approach since found nothing
- if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
- ngramsugs = 1;
+ if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
switch(captype) {
case NOCAP: {
- ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
break;
}
+ case HUHINITCAP:
+ capwords = 1;
case HUHCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
- break;
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
+ break;
}
- case INITCAP: {
+ case INITCAP: {
capwords = 1;
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
- for (int j=0; j < ns; j++)
+ int oldns = ns;
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
+ for (int j = oldns; j < ns; j++)
mkallcap((*slst)[j]);
break;
}
}
}
+ // try dash suggestion (Afo-American -> Afro-American)
+ if (strchr(cw, '-')) {
+ char * pos = strchr(cw, '-');
+ char * ppos = cw;
+ int nodashsug = 1;
+ char ** nlst = NULL;
+ int nn = 0;
+ int last = 0;
+ for (int j = 0; j < ns && nodashsug == 1; j++) {
+ if (strchr((*slst)[j], '-')) nodashsug = 0;
+ }
+ while (nodashsug && !last) {
+ if (*pos == '\0') last = 1; else *pos = '\0';
+ if (!spell(ppos)) {
+ nn = suggest(&nlst, ppos);
+ for (int j = nn - 1; j >= 0; j--) {
+ strncpy(wspace, cw, ppos - cw);
+ strcpy(wspace + (ppos - cw), nlst[j]);
+ if (!last) {
+ strcat(wspace, "-");
+ strcat(wspace, pos + 1);
+ }
+ ns = insert_sug(slst, wspace, ns);
+ free(nlst[j]);
+ }
+ if (nlst != NULL) free(nlst);
+ nodashsug = 0;
+ }
+ if (!last) {
+ *pos = '-';
+ ppos = pos + 1;
+ pos = strchr(ppos, '-');
+ }
+ if (!pos) pos = cw + strlen(cw);
+ }
+ }
+
// word reversing wrapper for complex prefixes
if (complexprefixes) {
for (int j = 0; j < ns; j++) {
@@ -858,14 +933,14 @@
}
}
- // suggest keepcase
- if (pAMgr->get_keepcase()) {
+ // remove bad capitalized and forbidden forms
+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
switch (captype) {
case INITCAP:
case ALLCAP: {
int l = 0;
for (int j=0; j < ns; j++) {
- if (!spell((*slst)[j])) {
+ if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
char s[MAXSWUTF8L];
w_char w[MAXSWL];
int len;
@@ -876,21 +951,21 @@
len = strlen(s);
}
mkallsmall2(s, w, len);
- free((*slst)[j]);
+ free((*slst)[j]);
if (spell(s)) {
(*slst)[l] = mystrdup(s);
- l++;
+ if ((*slst)[l]) l++;
} else {
mkinitcap2(s, w, len);
if (spell(s)) {
(*slst)[l] = mystrdup(s);
- l++;
+ if ((*slst)[l]) l++;
}
}
} else {
(*slst)[l] = (*slst)[j];
l++;
- }
+ }
}
ns = l;
}
@@ -909,9 +984,28 @@
}
l++;
}
+
+ // output conversion
+ rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ for (int j = 0; rl && j < ns; j++) {
+ if (rl->conv((*slst)[j], wspace)) {
+ free((*slst)[j]);
+ (*slst)[j] = mystrdup(wspace);
+ }
+ }
+
+ // if suggestions removed by nosuggest, onlyincompound parameters
+ if (l == 0 && *slst) {
+ free(*slst);
+ *slst = NULL;
+ }
return l;
}
+void Hunspell::free_list(char *** slst, int n) {
+ freelist(slst, n);
+}
+
char * Hunspell::get_dic_encoding()
{
return encoding;
@@ -921,9 +1015,9 @@
// XXX need UTF-8 support
int Hunspell::suggest_auto(char*** slst, const char * word)
{
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ if (!pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -936,15 +1030,15 @@
if (wl == 0) return 0;
int ns = 0;
*slst = NULL; // HU, nsug in pSMgr->suggest
-
+
switch(captype) {
- case NOCAP: {
+ case NOCAP: {
ns = pSMgr->suggest_auto(slst, cw, ns);
if (ns>0) break;
break;
}
- case INITCAP: {
+ case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
ns = pSMgr->suggest_auto(slst, wspace, ns);
@@ -952,10 +1046,11 @@
mkinitcap((*slst)[j]);
ns = pSMgr->suggest_auto(slst, cw, ns);
break;
-
+
}
- case HUHCAP: {
+ case HUHINITCAP:
+ case HUHCAP: {
ns = pSMgr->suggest_auto(slst, cw, ns);
if (ns == 0) {
memcpy(wspace,cw,(wl+1));
@@ -965,7 +1060,7 @@
break;
}
- case ALLCAP: {
+ case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
ns = pSMgr->suggest_auto(slst, wspace, ns);
@@ -1011,103 +1106,89 @@
}
}
}
- // END OF LANG_hu section
+ // END OF LANG_hu section
return ns;
}
+#endif
-// XXX need UTF-8 support
-int Hunspell::stem(char*** slst, const char * word)
+int Hunspell::stem(char*** slst, char ** desc, int n)
{
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
- int wl = strlen(word);
- if (utf8) {
- if (wl >= MAXWORDUTF8LEN) return 0;
- } else {
- if (wl >= MAXWORDLEN) return 0;
+ char result[MAXLNLEN];
+ char result2[MAXLNLEN];
+ *slst = NULL;
+ if (n == 0) return 0;
+ *result2 = '\0';
+ for (int i = 0; i < n; i++) {
+ *result = '\0';
+ // add compound word parts (except the last one)
+ char * s = (char *) desc[i];
+ char * part = strstr(s, MORPH_PART);
+ if (part) {
+ char * nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ copy_field(result + strlen(result), part, MORPH_PART);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ char **pl;
+ char tok[MAXLNLEN];
+ strcpy(tok, s);
+ char * alt = strstr(tok, " | ");
+ while (alt) {
+ alt[1] = MSEP_ALT;
+ alt = strstr(alt, " | ");
+ }
+ int pln = line_tok(tok, &pl, MSEP_ALT);
+ for (int k = 0; k < pln; k++) {
+ // add derivational suffixes
+ if (strstr(pl[k], MORPH_DERI_SFX)) {
+ // remove inflectional suffixes
+ char * is = strstr(pl[k], MORPH_INFL_SFX);
+ if (is) *is = '\0';
+ char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
+ if (sg) {
+ char ** gen;
+ int genl = line_tok(sg, &gen, MSEP_REC);
+ free(sg);
+ for (int j = 0; j < genl; j++) {
+ sprintf(result2 + strlen(result2), "%c%s%s",
+ MSEP_REC, result, gen[j]);
+ }
+ freelist(&gen, genl);
+ }
+ } else {
+ sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
+ if (strstr(pl[k], MORPH_SURF_PFX)) {
+ copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
+ }
+ copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
+ }
+ }
+ freelist(&pl, pln);
}
- int captype = 0;
- int abbv = 0;
- wl = cleanword(cw, word, &captype, &abbv);
- if (wl == 0) return 0;
-
- int ns = 0;
+ int sln = line_tok(result2, slst, MSEP_REC);
+ return uniqlist(*slst, sln);
- *slst = NULL; // HU, nsug in pSMgr->suggest
-
- switch(captype) {
- case HUHCAP:
- case NOCAP: {
- ns = pSMgr->suggest_stems(slst, cw, ns);
+}
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- break;
- }
-
- case INITCAP: {
-
- ns = pSMgr->suggest_stems(slst, cw, ns);
-
- if (ns == 0) {
- memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
-
- }
-
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- mkallsmall(wspace);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- break;
-
- }
-
- case ALLCAP: {
- ns = pSMgr->suggest_stems(slst, cw, ns);
- if (ns != 0) break;
-
- memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
-
- if (ns == 0) {
- mkinitcap(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- mkallsmall(wspace);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
-
- break;
- }
- }
-
- return ns;
+int Hunspell::stem(char*** slst, const char * word)
+{
+ char ** pl;
+ int pln = analyze(&pl, word);
+ int pln2 = stem(slst, pl, pln);
+ freelist(&pl, pln);
+ return pln2;
}
+#ifdef HUNSPELL_EXPERIMENTAL
int Hunspell::suggest_pos_stems(char*** slst, const char * word)
{
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ if (! pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1118,14 +1199,14 @@
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
if (wl == 0) return 0;
-
+
int ns = 0; // ns=0 = normalized input
*slst = NULL; // HU, nsug in pSMgr->suggest
-
+
switch(captype) {
case HUHCAP:
- case NOCAP: {
+ case NOCAP: {
ns = pSMgr->suggest_pos_stems(slst, cw, ns);
if ((abbv) && (ns == 0)) {
@@ -1138,7 +1219,7 @@
break;
}
- case INITCAP: {
+ case INITCAP: {
ns = pSMgr->suggest_pos_stems(slst, cw, ns);
@@ -1147,15 +1228,15 @@
mkallsmall(wspace);
ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
}
-
+
break;
-
+
}
- case ALLCAP: {
+ case ALLCAP: {
ns = pSMgr->suggest_pos_stems(slst, cw, ns);
if (ns != 0) break;
-
+
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
@@ -1225,22 +1306,24 @@
return nc;
}
-int Hunspell::put_word(const char * word)
+int Hunspell::add(const char * word)
{
- if (pHMgr) {
- return pHMgr->put_word(word, strlen(word), NULL);
- }
+ if (pHMgr[0]) return (pHMgr[0])->add(word);
return 0;
}
-int Hunspell::put_word_pattern(const char * word, const char * pattern)
+int Hunspell::add_with_affix(const char * word, const char * example)
{
- if (pHMgr) {
- return pHMgr->put_word_pattern(word, strlen(word), pattern);
- }
+ if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
return 0;
}
+int Hunspell::remove(const char * word)
+{
+ if (pHMgr[0]) return (pHMgr[0])->remove(word);
+ return 0;
+}
+
const char * Hunspell::get_version()
{
return pAMgr->get_version();
@@ -1251,22 +1334,38 @@
return csconv;
}
-#ifdef HUNSPELL_EXPERIMENTAL
-// XXX need UTF-8 support
-char * Hunspell::morph(const char * word)
+void Hunspell::cat_result(char * result, char * st)
{
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
- int wl = strlen(word);
+ if (st) {
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
+}
+
+int Hunspell::analyze(char*** slst, const char * word)
+{
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ w_char unicw[MAXWORDLEN];
+ int wl2 = 0;
+ *slst = NULL;
+ if (! pSMgr || maxdic == 0) return 0;
+ int nc = strlen(word);
if (utf8) {
- if (wl >= MAXWORDUTF8LEN) return 0;
+ if (nc >= MAXWORDUTF8LEN) return 0;
} else {
- if (wl >= MAXWORDLEN) return 0;
+ if (nc >= MAXWORDLEN) return 0;
}
int captype = 0;
int abbv = 0;
- wl = cleanword(cw, word, &captype, &abbv);
+ int wl = 0;
+
+ // input conversion
+ RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
+ else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+
if (wl == 0) {
if (abbv) {
for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
@@ -1277,7 +1376,7 @@
char result[MAXLNLEN];
char * st = NULL;
-
+
*result = '\0';
int n = 0;
@@ -1287,177 +1386,103 @@
// test numbers
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) {
- while ((n < wl) &&
+ while ((n < wl) &&
(((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
n++;
if ((cw[n] == '.') || (cw[n] == ',')) {
- if (((n2 == 0) && (n > 3)) ||
+ if (((n2 == 0) && (n > 3)) ||
((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
n2++;
n3 = n;
}
}
- if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;
- if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xb0')) && checkword(cw+n, NULL, NULL))) {
- strcat(result, cw);
+ if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
+ if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
+ mystrcat(result, cw, MAXLNLEN);
result[n - 1] = '\0';
- if (n == wl) {
- st = pSMgr->suggest_morph(cw + n - 1);
- if (st) {
- strcat(result, st);
- free(st);
- }
- } else {
+ if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
+ else {
char sign = cw[n];
cw[n] = '\0';
- st = pSMgr->suggest_morph(cw + n - 1);
- if (st) {
- strcat(result, st);
- free(st);
- }
- strcat(result, "+"); // XXX SPEC. MORPHCODE
+ cat_result(result, pSMgr->suggest_morph(cw + n - 1));
+ mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
cw[n] = sign;
- st = pSMgr->suggest_morph(cw + n);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(cw + n));
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
// END OF LANG_hu section
-
+
switch(captype) {
- case NOCAP: {
- st = pSMgr->suggest_morph(cw);
- if (st) {
- strcat(result, st);
- free(st);
- }
- if (abbv) {
- memcpy(wspace,cw,wl);
+ case HUHCAP:
+ case HUHINITCAP:
+ case NOCAP: {
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
+ memcpy(wspace,cw,wl);
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ }
+ break;
+ }
+ case INITCAP: {
+ wl = mkallsmall2(cw, unicw, nc);
+ memcpy(wspace,cw,(wl+1));
+ wl2 = mkinitcap2(cw, unicw, nc);
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
+
+ memcpy(wspace, cw, wl2);
+ *(wspace+wl2) = '.';
+ *(wspace+wl2+1) = '\0';
+
+ cat_result(result, pSMgr->suggest_morph(wspace));
}
- break;
+ break;
}
- case INITCAP: {
- memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
- st = pSMgr->suggest_morph(cw);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- if (abbv) {
- memcpy(wspace,cw,wl);
+ case ALLCAP: {
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
- mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(cw));
}
- break;
- }
- case HUHCAP: {
- st = pSMgr->suggest_morph(cw);
- if (st) {
- strcat(result, st);
- free(st);
- }
-#if 0
+ wl = mkallsmall2(cw, unicw, nc);
memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
+ wl2 = mkinitcap2(cw, unicw, nc);
+
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ cat_result(result, pSMgr->suggest_morph(wspace));
+
+ memcpy(wspace, cw, wl2);
+ *(wspace+wl2) = '.';
+ *(wspace+wl2+1) = '\0';
+
+ cat_result(result, pSMgr->suggest_morph(wspace));
}
-#endif
break;
- }
- case ALLCAP: {
- memcpy(wspace,cw,(wl+1));
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
- mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- if (abbv) {
- memcpy(wspace,cw,(wl+1));
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- if (*result) strcat(result, "\n");
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
- mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- }
- break;
}
}
- if (result && (*result)) {
+ if (*result) {
// word reversing wrapper for complex prefixes
if (complexprefixes) {
if (utf8) reverseword_utf(result); else reverseword(result);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
+
}
// compound word with dash (HU) I18n
@@ -1466,24 +1491,24 @@
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
if ((langnum == LANG_hu) && dash) {
- *dash='\0';
+ *dash='\0';
// examine 2 sides of the dash
if (dash[1] == '\0') { // base word ending with dash
- if (spell(cw)) return pSMgr->suggest_morph(cw);
+ if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
} else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
if (spell(cw) && (spell("-e"))) {
st = pSMgr->suggest_morph(cw);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- strcat(result,"+"); // XXX spec. separator in MORPHCODE
+ mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
st = pSMgr->suggest_morph("-e");
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
} else {
// first word ending with dash: word- XXX ???
@@ -1495,22 +1520,22 @@
dash[0]='\0';
if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
((dash[1] > '0') && (dash[1] < '9')))) {
- st = morph(cw);
+ st = pSMgr->suggest_morph(cw);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- strcat(result,"+"); // XXX spec. separator in MORPHCODE
+ mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
}
- st = morph(dash+1);
+ st = pSMgr->suggest_morph(dash+1);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
// affixed number in correct word
- if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
+ if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
(*(dash-1)>='0')) || (*(dash-1)=='.'))) {
*dash='-';
n = 1;
@@ -1525,195 +1550,338 @@
// 56-hoz, 6-hoz
for(; n >= 1; n--) {
if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
- strcat(result, cw);
+ mystrcat(result, cw, MAXLNLEN);
result[dash - cw - n] = '\0';
st = pSMgr->suggest_morph(dash - n);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
}
}
- return NULL;
+ return 0;
}
+int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
+{
+ *slst = NULL;
+ if (!pSMgr || !pln) return 0;
+ char **pl2;
+ int pl2n = analyze(&pl2, word);
+ int captype = 0;
+ int abbv = 0;
+ char cw[MAXWORDUTF8LEN];
+ cleanword(cw, word, &captype, &abbv);
+ char result[MAXLNLEN];
+ *result = '\0';
+
+ for (int i = 0; i < pln; i++) {
+ cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
+ }
+ freelist(&pl2, pl2n);
+
+ if (*result) {
+ // allcap
+ if (captype == ALLCAP) mkallcap(result);
+
+ // line split
+ int linenum = line_tok(result, slst, MSEP_REC);
+
+ // capitalize
+ if (captype == INITCAP || captype == HUHINITCAP) {
+ for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
+ }
+
+ // temporary filtering of prefix related errors (eg.
+ // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
+
+ int r = 0;
+ for (int j=0; j < linenum; j++) {
+ if (!spell((*slst)[j])) {
+ free((*slst)[j]);
+ (*slst)[j] = NULL;
+ } else {
+ if (r < j) (*slst)[r] = (*slst)[j];
+ r++;
+ }
+ }
+ if (r > 0) return r;
+ free(*slst);
+ *slst = NULL;
+ }
+ return 0;
+}
+
+int Hunspell::generate(char*** slst, const char * word, const char * pattern)
+{
+ char **pl;
+ int pln = analyze(&pl, pattern);
+ int n = generate(slst, word, pl, pln);
+ freelist(&pl, pln);
+ return uniqlist(*slst, n);
+}
+
+// minimal XML parser functions
+int Hunspell::get_xml_par(char * dest, const char * par, int max)
+{
+ char * d = dest;
+ if (!par) return 0;
+ char end = *par;
+ char * dmax = dest + max;
+ if (end == '>') end = '<';
+ else if (end != '\'' && end != '"') return 0; // bad XML
+ for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
+ *d = '\0';
+ mystrrep(dest, "&lt;", "<");
+ mystrrep(dest, "&amp;", "&");
+ return d - dest;
+}
+
+// return the beginning of the element (attr == NULL) or the attribute
+const char * Hunspell::get_xml_pos(const char * s, const char * attr)
+{
+ const char * end = strchr(s, '>');
+ const char * p = s;
+ if (attr == NULL) return end;
+ do {
+ p = strstr(p, attr);
+ if (!p || p >= end) return 0;
+ } while (*(p-1) != ' ' && *(p-1) != '\n');
+ return p + strlen(attr);
+}
+
+int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
+ char cw[MAXWORDUTF8LEN];
+ if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
+ strcmp(cw, value) == 0) return 1;
+ return 0;
+}
+
+int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
+ int n = 0;
+ char * p;
+ if (!list) return 0;
+ for (p = list; (p = strstr(p, tag)); p++) n++;
+ if (n == 0) return 0;
+ *slst = (char **) malloc(sizeof(char *) * n);
+ if (!*slst) return 0;
+ for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
+ int l = strlen(p);
+ (*slst)[n] = (char *) malloc(l);
+ if (!(*slst)[n]) return (n > 0 ? n - 1 : 0);
+ get_xml_par((*slst)[n], p + strlen(tag) - 1, l);
+ }
+ return n;
+}
+
+int Hunspell::spellml(char*** slst, const char * word)
+{
+ char *q, *q2;
+ char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
+ q = (char *) strstr(word, "<query");
+ if (!q) return 0; // bad XML input
+ q2 = strchr(q, '>');
+ if (!q2) return 0; // bad XML input
+ q2 = strstr(q2, "<word");
+ if (!q2) return 0; // bad XML input
+ if (check_xml_par(q, "type=", "analyze")) {
+ int n = 0, s = 0;
+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) n = analyze(slst, cw);
+ if (n == 0) return 0;
+ // convert the result to <code><a>ana1</a><a>ana2</a></code> format
+ for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
+ char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
+ if (!r) return 0;
+ strcpy(r, "<code>");
+ for (int i = 0; i < n; i++) {
+ int l = strlen(r);
+ strcpy(r + l, "<a>");
+ strcpy(r + l + 3, (*slst)[i]);
+ mystrrep(r + l + 3, "\t", " ");
+ mystrrep(r + l + 3, "<", "&lt;");
+ mystrrep(r + l + 3, "&", "&amp;");
+ strcat(r, "</a>");
+ free((*slst)[i]);
+ }
+ strcat(r, "</code>");
+ (*slst)[0] = r;
+ return 1;
+ } else if (check_xml_par(q, "type=", "stem")) {
+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) return stem(slst, cw);
+ } else if (check_xml_par(q, "type=", "generate")) {
+ int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN);
+ if (n == 0) return 0;
+ char * q3 = strstr(q2 + 1, "<word");
+ if (q3) {
+ if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN)) {
+ return generate(slst, cw, cw2);
+ }
+ } else {
+ char ** slst2;
+ if ((q2 = strstr(q2 + 1, "<code")) &&
+ (n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
+ int n2 = generate(slst, cw, slst2, n);
+ freelist(&slst2, n);
+ return uniqlist(*slst, n2);
+ }
+ }
+ }
+ return 0;
+}
+
+
+#ifdef HUNSPELL_EXPERIMENTAL
// XXX need UTF-8 support
char * Hunspell::morph_with_correction(const char * word)
{
- char cw[MAXWORDUTF8LEN + 4];
- char wspace[MAXWORDUTF8LEN + 4];
- if (! pSMgr) return 0;
+ char cw[MAXWORDUTF8LEN];
+ char wspace[MAXWORDUTF8LEN];
+ if (! pSMgr || maxdic == 0) return NULL;
int wl = strlen(word);
if (utf8) {
- if (wl >= MAXWORDUTF8LEN) return 0;
+ if (wl >= MAXWORDUTF8LEN) return NULL;
} else {
- if (wl >= MAXWORDLEN) return 0;
+ if (wl >= MAXWORDLEN) return NULL;
}
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
- if (wl == 0) return 0;
+ if (wl == 0) return NULL;
char result[MAXLNLEN];
char * st = NULL;
-
+
*result = '\0';
-
-
+
+
switch(captype) {
- case NOCAP: {
+ case NOCAP: {
st = pSMgr->suggest_morph_for_spelling_error(cw);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- if (abbv) {
- memcpy(wspace,cw,wl);
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
}
break;
}
- case INITCAP: {
+ case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- }
- st = pSMgr->suggest_morph_for_spelling_error(cw);
+ }
+ st = pSMgr->suggest_morph_for_spelling_error(cw);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- if (abbv) {
- memcpy(wspace,cw,wl);
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- }
+ }
mkinitcap(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- }
+ }
}
break;
}
- case HUHCAP: {
+ case HUHCAP: {
st = pSMgr->suggest_morph_for_spelling_error(cw);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- }
+ }
break;
}
- case ALLCAP: {
+ case ALLCAP: {
memcpy(wspace,cw,(wl+1));
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
- }
+ }
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- mkinitcap(wspace);
- st = pSMgr->suggest_morph_for_spelling_error(wspace);
+ mkinitcap(wspace);
+ st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- if (abbv) {
+ if (abbv) {
memcpy(wspace,cw,(wl+1));
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
- if (*result) strcat(result, "\n");
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- strcat(result, st);
- free(st);
- }
+ mystrcat(result, st, MAXLNLEN);
+ free(st);
+ }
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- mkinitcap(wspace);
- st = pSMgr->suggest_morph_for_spelling_error(wspace);
+ mkinitcap(wspace);
+ st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
+ if (*result) mystrcat(result, "\n", MAXLNLEN);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
- }
+ }
break;
}
}
- if (result) return mystrdup(result);
+ if (*result) return mystrdup(result);
return NULL;
}
-/* analyze word
- * return line count
- * XXX need a better data structure for morphological analysis */
-int Hunspell::analyze(char ***out, const char *word) {
- int n = 0;
- if (!word) return 0;
- char * m = morph(word);
- if(!m) return 0;
- if (!out) return line_tok(m, out);
-
- // without memory allocation
- /* BUG missing buffer size checking */
- int i, p;
- for(p = 0, i = 0; m[i]; i++) {
- if(m[i] == '\n' || !m[i+1]) {
- n++;
- strncpy((*out)[n++], m + p, i - p + 1);
- if (m[i] == '\n') (*out)[n++][i - p] = '\0';
- if(!m[i+1]) break;
- p = i + 1;
- }
- }
- free(m);
- return n;
-}
-
#endif // END OF HUNSPELL_EXPERIMENTAL CODE
Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle)
@@ -1725,6 +1893,17 @@
#endif
}
+
+Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+ const char * key)
+{
+#ifdef HUNSPELL_CHROME_CLIENT
+ return NULL;
+#else
+ return (Hunhandle*)(new Hunspell(affpath, dpath, key));
+#endif
+}
+
void Hunspell_destroy(Hunhandle *pHunspell)
{
delete (Hunspell*)(pHunspell);
@@ -1745,3 +1924,57 @@
return ((Hunspell*)pHunspell)->suggest(slst, word);
}
+int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
+{
+ return ((Hunspell*)pHunspell)->analyze(slst, word);
+}
+
+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
+{
+ return ((Hunspell*)pHunspell)->stem(slst, word);
+}
+
+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)
+{
+ return ((Hunspell*)pHunspell)->stem(slst, desc, n);
+}
+
+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+ const char * word2)
+{
+ return ((Hunspell*)pHunspell)->generate(slst, word, word2);
+}
+
+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+ char** desc, int n)
+{
+ return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
+}
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
+
+int Hunspell_add(Hunhandle *pHunspell, const char * word) {
+ return ((Hunspell*)pHunspell)->add(word);
+}
+
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
+ const char * example) {
+ return ((Hunspell*)pHunspell)->add_with_affix(word, example);
+}
+
+ /* remove word from the run-time dictionary */
+
+int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
+ return ((Hunspell*)pHunspell)->remove(word);
+}
+
+void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n) {
+ freelist(slst, n);
+}
Property changes on: chrome\third_party\hunspell\src\hunspell\hunspell.cxx
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/hunspell.hxx ('k') | chrome/third_party/hunspell/src/hunspell/hunzip.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698