Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(490)

Side by Side Diff: third_party/hunspell_new/src/hunspell/hunspell.cxx

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #include "license.hunspell"
2 #include "license.myspell"
3
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7
8 #include "hunspell.hxx"
9 #include "hunspell.h"
10 #ifndef HUNSPELL_CHROME_CLIENT
11 #ifndef MOZILLA_CLIENT
12 # include "config.h"
13 #endif
14 #endif
15 #include "csutil.hxx"
16
17 #ifdef HUNSPELL_CHROME_CLIENT
18 Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
19 #else
20 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
21 #endif
22 {
23 encoding = NULL;
24 csconv = NULL;
25 utf8 = 0;
26 complexprefixes = 0;
27 #ifndef HUNSPELL_CHROME_CLIENT
28 affixpath = mystrdup(affpath);
29 #endif
30 maxdic = 0;
31
32 #ifdef HUNSPELL_CHROME_CLIENT
33 bdict_reader = new hunspell::BDictReader;
34 bdict_reader->Init(bdict_data, bdict_length);
35
36 pHMgr[0] = new HashMgr(bdict_reader);
37 if (pHMgr[0]) maxdic = 1;
38
39 pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
40 #else
41 /* first set up the hash manager */
42 pHMgr[0] = new HashMgr(dpath, affpath, key);
43 if (pHMgr[0]) maxdic = 1;
44
45 /* next set up the affix manager */
46 /* it needs access to the hash manager lookup methods */
47 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
48 #endif
49
50 /* get the preferred try string and the dictionary */
51 /* encoding from the Affix Manager for that dictionary */
52 char * try_string = pAMgr->get_try_string();
53 encoding = pAMgr->get_encoding();
54 langnum = pAMgr->get_langnum();
55 utf8 = pAMgr->get_utf8();
56 if (!utf8)
57 csconv = get_current_cs(encoding);
58 complexprefixes = pAMgr->get_complexprefixes();
59 wordbreak = pAMgr->get_breaktable();
60
61 /* and finally set up the suggestion manager */
62 #ifdef HUNSPELL_CHROME_CLIENT
63 pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);
64 #else
65 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
66 #endif
67 if (try_string) free(try_string);
68 }
69
70 Hunspell::~Hunspell()
71 {
72 if (pSMgr) delete pSMgr;
73 if (pAMgr) delete pAMgr;
74 for (int i = 0; i < maxdic; i++) delete pHMgr[i];
75 maxdic = 0;
76 pSMgr = NULL;
77 pAMgr = NULL;
78 #ifdef MOZILLA_CLIENT
79 delete [] csconv;
80 #endif
81 csconv= NULL;
82 if (encoding) free(encoding);
83 encoding = NULL;
84 #ifdef HUNSPELL_CHROME_CLIENT
85 if (bdict_reader) delete bdict_reader;
86 bdict_reader = NULL;
87 #else
88 if (affixpath) free(affixpath);
89 affixpath = NULL;
90 #endif
91 }
92
93 #ifndef HUNSPELL_CHROME_CLIENT
94 // load extra dictionaries
95 int Hunspell::add_dic(const char * dpath, const char * key) {
96 if (maxdic == MAXDIC || !affixpath) return 1;
97 pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
98 if (pHMgr[maxdic]) maxdic++; else return 1;
99 return 0;
100 }
101 #endif
102
103 // make a copy of src at destination while removing all leading
104 // blanks and removing any trailing periods after recording
105 // their presence with the abbreviation flag
106 // also since already going through character by character,
107 // set the capitalization type
108 // return the length of the "cleaned" (and UTF-8 encoded) word
109
110 int Hunspell::cleanword2(char * dest, const char * src,
111 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
112 {
113 unsigned char * p = (unsigned char *) dest;
114 const unsigned char * q = (const unsigned char * ) src;
115
116 // first skip over any leading blanks
117 while ((*q != '\0') && (*q == ' ')) q++;
118
119 // now strip off any trailing periods (recording their presence)
120 *pabbrev = 0;
121 int nl = strlen((const char *)q);
122 while ((nl > 0) && (*(q+nl-1)=='.')) {
123 nl--;
124 (*pabbrev)++;
125 }
126
127 // if no characters are left it can't be capitalized
128 if (nl <= 0) {
129 *pcaptype = NOCAP;
130 *p = '\0';
131 return 0;
132 }
133
134 strncpy(dest, (char *) q, nl);
135 *(dest + nl) = '\0';
136 nl = strlen(dest);
137 if (utf8) {
138 *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
139 // don't check too long words
140 if (*nc >= MAXWORDLEN) return 0;
141 if (*nc == -1) { // big Unicode character (non BMP area)
142 *pcaptype = NOCAP;
143 return nl;
144 }
145 *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
146 } else {
147 *pcaptype = get_captype(dest, nl, csconv);
148 *nc = nl;
149 }
150 return nl;
151 }
152
153 int Hunspell::cleanword(char * dest, const char * src,
154 int * pcaptype, int * pabbrev)
155 {
156 unsigned char * p = (unsigned char *) dest;
157 const unsigned char * q = (const unsigned char * ) src;
158 int firstcap = 0;
159
160 // first skip over any leading blanks
161 while ((*q != '\0') && (*q == ' ')) q++;
162
163 // now strip off any trailing periods (recording their presence)
164 *pabbrev = 0;
165 int nl = strlen((const char *)q);
166 while ((nl > 0) && (*(q+nl-1)=='.')) {
167 nl--;
168 (*pabbrev)++;
169 }
170
171 // if no characters are left it can't be capitalized
172 if (nl <= 0) {
173 *pcaptype = NOCAP;
174 *p = '\0';
175 return 0;
176 }
177
178 // now determine the capitalization type of the first nl letters
179 int ncap = 0;
180 int nneutral = 0;
181 int nc = 0;
182
183 if (!utf8) {
184 while (nl > 0) {
185 nc++;
186 if (csconv[(*q)].ccase) ncap++;
187 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
188 *p++ = *q++;
189 nl--;
190 }
191 // remember to terminate the destination string
192 *p = '\0';
193 firstcap = csconv[(unsigned char)(*dest)].ccase;
194 } else {
195 unsigned short idx;
196 w_char t[MAXWORDLEN];
197 nc = u8_u16(t, MAXWORDLEN, src);
198 for (int i = 0; i < nc; i++) {
199 idx = (t[i].h << 8) + t[i].l;
200 unsigned short low = unicodetolower(idx, langnum);
201 if (idx != low) ncap++;
202 if (unicodetoupper(idx, langnum) == low) nneutral++;
203 }
204 u16_u8(dest, MAXWORDUTF8LEN, t, nc);
205 if (ncap) {
206 idx = (t[0].h << 8) + t[0].l;
207 firstcap = (idx != unicodetolower(idx, langnum));
208 }
209 }
210
211 // now finally set the captype
212 if (ncap == 0) {
213 *pcaptype = NOCAP;
214 } else if ((ncap == 1) && firstcap) {
215 *pcaptype = INITCAP;
216 } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
217 *pcaptype = ALLCAP;
218 } else if ((ncap > 1) && firstcap) {
219 *pcaptype = HUHINITCAP;
220 } else {
221 *pcaptype = HUHCAP;
222 }
223 return strlen(dest);
224 }
225
226 void Hunspell::mkallcap(char * p)
227 {
228 if (utf8) {
229 w_char u[MAXWORDLEN];
230 int nc = u8_u16(u, MAXWORDLEN, p);
231 unsigned short idx;
232 for (int i = 0; i < nc; i++) {
233 idx = (u[i].h << 8) + u[i].l;
234 if (idx != unicodetoupper(idx, langnum)) {
235 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
236 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
237 }
238 }
239 u16_u8(p, MAXWORDUTF8LEN, u, nc);
240 } else {
241 while (*p != '\0') {
242 *p = csconv[((unsigned char) *p)].cupper;
243 p++;
244 }
245 }
246 }
247
248 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
249 {
250 if (utf8) {
251 unsigned short idx;
252 for (int i = 0; i < nc; i++) {
253 idx = (u[i].h << 8) + u[i].l;
254 unsigned short up = unicodetoupper(idx, langnum);
255 if (idx != up) {
256 u[i].h = (unsigned char) (up >> 8);
257 u[i].l = (unsigned char) (up & 0x00FF);
258 }
259 }
260 u16_u8(p, MAXWORDUTF8LEN, u, nc);
261 return strlen(p);
262 } else {
263 while (*p != '\0') {
264 *p = csconv[((unsigned char) *p)].cupper;
265 p++;
266 }
267 }
268 return nc;
269 }
270
271
272 void Hunspell::mkallsmall(char * p)
273 {
274 while (*p != '\0') {
275 *p = csconv[((unsigned char) *p)].clower;
276 p++;
277 }
278 }
279
280 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
281 {
282 if (utf8) {
283 unsigned short idx;
284 for (int i = 0; i < nc; i++) {
285 idx = (u[i].h << 8) + u[i].l;
286 unsigned short low = unicodetolower(idx, langnum);
287 if (idx != low) {
288 u[i].h = (unsigned char) (low >> 8);
289 u[i].l = (unsigned char) (low & 0x00FF);
290 }
291 }
292 u16_u8(p, MAXWORDUTF8LEN, u, nc);
293 return strlen(p);
294 } else {
295 while (*p != '\0') {
296 *p = csconv[((unsigned char) *p)].clower;
297 p++;
298 }
299 }
300 return nc;
301 }
302
303 // convert UTF-8 sharp S codes to latin 1
304 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
305 char * p = dest;
306 *p = *source;
307 for (p++, source++; *(source - 1); p++, source++) {
308 *p = *source;
309 if (*source == '\x9F') *--p = '\xDF';
310 }
311 return dest;
312 }
313
314 // recursive search for right ss - sharp s permutations
315 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
316 int repnum, char * tmp, int * info, char **root) {
317 pos = strstr(pos, "ss");
318 if (pos && (n < MAXSHARPS)) {
319 *pos = '\xC3';
320 *(pos + 1) = '\x9F';
321 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, ro ot);
322 if (h) return h;
323 *pos = 's';
324 *(pos + 1) = 's';
325 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
326 if (h) return h;
327 } else if (repnum > 0) {
328 if (utf8) return checkword(base, info, root);
329 return checkword(sharps_u8_l1(tmp, base), info, root);
330 }
331 return NULL;
332 }
333
334 int Hunspell::is_keepcase(const hentry * rv) {
335 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
336 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
337 }
338
339 /* insert a word to the beginning of the suggestion array and return ns */
340 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
341 char * dup = mystrdup(word);
342 if (!dup) return ns;
343 if (ns == MAXSUGGESTION) {
344 ns--;
345 free((*slst)[ns]);
346 }
347 for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
348 (*slst)[0] = dup;
349 return ns + 1;
350 }
351
352 int Hunspell::spell(const char * word, int * info, char ** root)
353 {
354 #ifdef HUNSPELL_CHROME_CLIENT
355 if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
356 #endif
357 struct hentry * rv=NULL;
358 // need larger vector. For example, Turkish capital letter I converted a
359 // 2-byte UTF-8 character (dotless i) by mkallsmall.
360 char cw[MAXWORDUTF8LEN];
361 char wspace[MAXWORDUTF8LEN];
362 w_char unicw[MAXWORDLEN];
363 // Hunspell supports XML input of the simplified API (see manual)
364 if (strcmp(word, SPELL_XML) == 0) return 1;
365 int nc = strlen(word);
366 int wl2 = 0;
367 if (utf8) {
368 if (nc >= MAXWORDUTF8LEN) return 0;
369 } else {
370 if (nc >= MAXWORDLEN) return 0;
371 }
372 int captype = 0;
373 int abbv = 0;
374 int wl = 0;
375
376 // input conversion
377 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
378 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);
379 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
380
381 int info2 = 0;
382 if (wl == 0 || maxdic == 0) return 1;
383 if (root) *root = NULL;
384
385 // allow numbers with dots, dashes and commas (but forbid double separators: " ..", "--" etc.)
386 enum { NBEGIN, NNUM, NSEP };
387 int nstate = NBEGIN;
388 int i;
389
390 for (i = 0; (i < wl); i++) {
391 if ((cw[i] <= '9') && (cw[i] >= '0')) {
392 nstate = NNUM;
393 } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
394 if ((nstate == NSEP) || (i == 0)) break;
395 nstate = NSEP;
396 } else break;
397 }
398 if ((i == wl) && (nstate == NNUM)) return 1;
399 if (!info) info = &info2; else *info = 0;
400
401 switch(captype) {
402 case HUHCAP:
403 case HUHINITCAP:
404 *info += SPELL_ORIGCAP;
405 case NOCAP: {
406 rv = checkword(cw, info, root);
407 if ((abbv) && !(rv)) {
408 memcpy(wspace,cw,wl);
409 *(wspace+wl) = '.';
410 *(wspace+wl+1) = '\0';
411 rv = checkword(wspace, info, root);
412 }
413 break;
414 }
415 case ALLCAP: {
416 *info += SPELL_ORIGCAP;
417 rv = checkword(cw, info, root);
418 if (rv) break;
419 if (abbv) {
420 memcpy(wspace,cw,wl);
421 *(wspace+wl) = '.';
422 *(wspace+wl+1) = '\0';
423 rv = checkword(wspace, info, root);
424 if (rv) break;
425 }
426 // Spec. prefix handling for Catalan, French, Italian:
427 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
428 if (pAMgr && strchr(cw, '\'')) {
429 wl = mkallsmall2(cw, unicw, nc);
430 //There are no really sane circumstances where this could fail,
431 //but anyway...
432 if (char * apostrophe = strchr(cw, '\'')) {
433 if (utf8) {
434 w_char tmpword[MAXWORDLEN];
435 *apostrophe = '\0';
436 wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
437 *apostrophe = '\'';
438 if (wl2 < nc) {
439 mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
440 rv = checkword(cw, info, root);
441 if (rv) break;
442 }
443 } else {
444 mkinitcap2(apostrophe + 1, unicw, nc);
445 rv = checkword(cw, info, root);
446 if (rv) break;
447 }
448 }
449 mkinitcap2(cw, unicw, nc);
450 rv = checkword(cw, info, root);
451 if (rv) break;
452 }
453 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
454 char tmpword[MAXWORDUTF8LEN];
455 wl = mkallsmall2(cw, unicw, nc);
456 memcpy(wspace,cw,(wl+1));
457 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
458 if (!rv) {
459 wl2 = mkinitcap2(cw, unicw, nc);
460 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
461 }
462 if ((abbv) && !(rv)) {
463 *(wspace+wl) = '.';
464 *(wspace+wl+1) = '\0';
465 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
466 if (!rv) {
467 memcpy(wspace, cw, wl2);
468 *(wspace+wl2) = '.';
469 *(wspace+wl2+1) = '\0';
470 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, ro ot);
471 }
472 }
473 if (rv) break;
474 }
475 }
476 case INITCAP: {
477 *info += SPELL_ORIGCAP;
478 wl = mkallsmall2(cw, unicw, nc);
479 memcpy(wspace,cw,(wl+1));
480 wl2 = mkinitcap2(cw, unicw, nc);
481 if (captype == INITCAP) *info += SPELL_INITCAP;
482 rv = checkword(cw, info, root);
483 if (captype == INITCAP) *info -= SPELL_INITCAP;
484 // forbid bad capitalization
485 // (for example, ijs -> Ijs instead of IJs in Dutch)
486 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
487 if (*info & SPELL_FORBIDDEN) {
488 rv = NULL;
489 break;
490 }
491 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
492 if (rv) break;
493
494 rv = checkword(wspace, info, root);
495 if (abbv && !rv) {
496
497 *(wspace+wl) = '.';
498 *(wspace+wl+1) = '\0';
499 rv = checkword(wspace, info, root);
500 if (!rv) {
501 memcpy(wspace, cw, wl2);
502 *(wspace+wl2) = '.';
503 *(wspace+wl2+1) = '\0';
504 if (captype == INITCAP) *info += SPELL_INITCAP;
505 rv = checkword(wspace, info, root);
506 if (captype == INITCAP) *info -= SPELL_INITCAP;
507 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
508 break;
509 }
510 }
511 if (rv && is_keepcase(rv) &&
512 ((captype == ALLCAP) ||
513 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
514 // in INITCAP form, too.
515 !(pAMgr->get_checksharps() &&
516 ((utf8 && strstr(wspace, "\xC3\x9F")) ||
517 (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
518 break;
519 }
520 }
521
522 if (rv) {
523 if (pAMgr && pAMgr->get_warn() && rv->astr &&
524 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
525 *info += SPELL_WARN;
526 if (pAMgr->get_forbidwarn()) return 0;
527 return HUNSPELL_OK_WARN;
528 }
529 return HUNSPELL_OK;
530 }
531
532 // recursive breaking at break points
533 if (wordbreak) {
534 char * s;
535 char r;
536 int nbr = 0;
537 wl = strlen(cw);
538 int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
539
540 // calculate break points for recursion limit
541 for (int j = 0; j < numbreak; j++) {
542 s = cw;
543 do {
544 s = (char *) strstr(s, wordbreak[j]);
545 if (s) {
546 nbr++;
547 s++;
548 }
549 } while (s);
550 }
551 if (nbr >= 10) return 0;
552
553 // check boundary patterns (^begin and end$)
554 for (int j = 0; j < numbreak; j++) {
555 int plen = strlen(wordbreak[j]);
556 if (plen == 1 || plen > wl) continue;
557 if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
558 && spell(cw + plen - 1)) return 1;
559 if (wordbreak[j][plen - 1] == '$' &&
560 strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
561 r = cw[wl - plen + 1];
562 cw[wl - plen + 1] = '\0';
563 if (spell(cw)) return 1;
564 cw[wl - plen + 1] = r;
565 }
566 }
567
568 // other patterns
569 for (int j = 0; j < numbreak; j++) {
570 int plen = strlen(wordbreak[j]);
571 s=(char *) strstr(cw, wordbreak[j]);
572 if (s && (s > cw) && (s < cw + wl - plen)) {
573 if (!spell(s + plen)) continue;
574 r = *s;
575 *s = '\0';
576 // examine 2 sides of the break point
577 if (spell(cw)) return 1;
578 *s = r;
579
580 // LANG_hu: spec. dash rule
581 if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
582 r = s[1];
583 s[1] = '\0';
584 if (spell(cw)) return 1; // check the first part with dash
585 s[1] = r;
586 }
587 // end of LANG speficic region
588
589 }
590 }
591 }
592
593 return 0;
594 }
595
596 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
597 {
598 struct hentry * he = NULL;
599 int len, i;
600 char w2[MAXWORDUTF8LEN];
601 const char * word;
602
603 char * ignoredchars = pAMgr->get_ignore();
604 if (ignoredchars != NULL) {
605 strcpy(w2, w);
606 if (utf8) {
607 int ignoredchars_utf16_len;
608 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredch ars_utf16_len);
609 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len) ;
610 } else {
611 remove_ignored_chars(w2,ignoredchars);
612 }
613 word = w2;
614 } else word = w;
615
616 len = strlen(word);
617
618 if (!len)
619 return NULL;
620
621 #ifdef HUNSPELL_CHROME_CLIENT
622 // We need to check if the word length is valid to make coverity (Event
623 // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.
624 if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAXW ORDLEN))
625 return NULL;
626 #endif
627
628 // word reversing wrapper for complex prefixes
629 if (complexprefixes) {
630 if (word != w2) {
631 strcpy(w2, word);
632 word = w2;
633 }
634 if (utf8) reverseword_utf(w2); else reverseword(w2);
635 }
636
637 // look word in hash table
638 for (i = 0; (i < maxdic) && !he; i ++) {
639 he = (pHMgr[i])->lookup(word);
640
641 // check forbidden and onlyincompound words
642 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenwor d(), he->alen)) {
643 if (info) *info += SPELL_FORBIDDEN;
644 // LANG_hu section: set dash information for suggestions
645 if (langnum == LANG_hu) {
646 if (pAMgr->get_compoundflag() &&
647 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
648 if (info) *info += SPELL_COMPOUND;
649 }
650 }
651 return NULL;
652 }
653
654 // he = next not needaffix, onlyincompound homonym or onlyupcase word
655 while (he && (he->astr) &&
656 ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->al en)) ||
657 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompou nd(), he->alen)) ||
658 (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he- >alen))
659 )) he = he->next_homonym;
660 }
661
662 // check with affixes
663 if (!he && pAMgr) {
664 // try stripping off affixes */
665 he = pAMgr->affix_check(word, len, 0);
666
667 // check compound restriction and onlyupcase
668 if (he && he->astr && (
669 (pAMgr->get_onlyincompound() &&
670 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
671 (info && (*info & SPELL_INITCAP) &&
672 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
673 he = NULL;
674 }
675
676 if (he) {
677 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword( ), he->alen)) {
678 if (info) *info += SPELL_FORBIDDEN;
679 return NULL;
680 }
681 if (root) {
682 *root = mystrdup(he->word);
683 if (*root && complexprefixes) {
684 if (utf8) reverseword_utf(*root); else reverseword(*root);
685 }
686 }
687 // try check compound word
688 } else if (pAMgr->get_compound()) {
689 he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
690 // LANG_hu section: `moving rule' with last dash
691 if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
692 char * dup = mystrdup(word);
693 if (!dup) return NULL;
694 dup[len-1] = '\0';
695 he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, i nfo);
696 free(dup);
697 }
698 // end of LANG speficic region
699 if (he) {
700 if (root) {
701 *root = mystrdup(he->word);
702 if (*root && complexprefixes) {
703 if (utf8) reverseword_utf(*root); else reverseword(*root );
704 }
705 }
706 if (info) *info += SPELL_COMPOUND;
707 }
708 }
709
710 }
711
712 return he;
713 }
714
715 int Hunspell::suggest(char*** slst, const char * word)
716 {
717 #ifdef HUNSPELL_CHROME_CLIENT
718 if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
719 #endif
720 int onlycmpdsug = 0;
721 char cw[MAXWORDUTF8LEN];
722 char wspace[MAXWORDUTF8LEN];
723 if (!pSMgr || maxdic == 0) return 0;
724 w_char unicw[MAXWORDLEN];
725 *slst = NULL;
726 // process XML input of the simplified API (see manual)
727 if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
728 return spellml(slst, word);
729 }
730 int nc = strlen(word);
731 if (utf8) {
732 if (nc >= MAXWORDUTF8LEN) return 0;
733 } else {
734 if (nc >= MAXWORDLEN) return 0;
735 }
736 int captype = 0;
737 int abbv = 0;
738 int wl = 0;
739
740 // input conversion
741 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
742 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);
743 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
744
745 if (wl == 0) return 0;
746 int ns = 0;
747 int capwords = 0;
748
749 // check capitalized form for FORCEUCASE
750 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
751 int info = SPELL_ORIGCAP;
752 char ** wlst;
753 if (checkword(cw, &info, NULL)) {
754 if (*slst) {
755 wlst = *slst;
756 } else {
757 wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
758 if (wlst == NULL) return -1;
759 *slst = wlst;
760 for (int i = 0; i < MAXSUGGESTION; i++) {
761 wlst[i] = NULL;
762 }
763 }
764 wlst[0] = mystrdup(cw);
765 mkinitcap(wlst[0]);
766 return 1;
767 }
768 }
769
770 switch(captype) {
771 case NOCAP: {
772 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
773 break;
774 }
775
776 case INITCAP: {
777 capwords = 1;
778 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
779 if (ns == -1) break;
780 memcpy(wspace,cw,(wl+1));
781 mkallsmall2(wspace, unicw, nc);
782 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
783 break;
784 }
785 case HUHINITCAP:
786 capwords = 1;
787 case HUHCAP: {
788 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
789 if (ns != -1) {
790 int prevns;
791 // something.The -> something. The
792 char * dot = strchr(cw, '.');
793 if (dot && (dot > cw)) {
794 int captype_;
795 if (utf8) {
796 w_char w_[MAXWORDLEN];
797 int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
798 captype_ = get_captype_utf8(w_, wl_, langnum);
799 } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
800 if (captype_ == INITCAP) {
801 char * st = mystrdup(cw);
802 if (st) st = (char *) realloc(st, wl + 2);
803 if (st) {
804 st[(dot - cw) + 1] = ' ';
805 strcpy(st + (dot - cw) + 2, dot + 1);
806 ns = insert_sug(slst, st, ns);
807 free(st);
808 }
809 }
810 }
811 if (captype == HUHINITCAP) {
812 // TheOpenOffice.org -> The OpenOffice.org
813 memcpy(wspace,cw,(wl+1));
814 mkinitsmall2(wspace, unicw, nc);
815 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
816 }
817 memcpy(wspace,cw,(wl+1));
818 mkallsmall2(wspace, unicw, nc);
819 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
820 prevns = ns;
821 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
822 if (captype == HUHINITCAP) {
823 mkinitcap2(wspace, unicw, nc);
824 if (spell(wspace)) ns = insert_sug(slst, wspace, ns) ;
825 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
826 }
827 // aNew -> "a New" (instead of "a new")
828 for (int j = prevns; j < ns; j++) {
829 char * space = strchr((*slst)[j],' ');
830 if (space) {
831 int slen = strlen(space + 1);
832 // different case after space (need capitalisati on)
833 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
834 w_char w[MAXWORDLEN];
835 int wc = 0;
836 char * r = (*slst)[j];
837 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
838 mkinitcap2(space + 1, w, wc);
839 // set as first suggestion
840 for (int k = j; k > 0; k--) (*slst)[k] = (*s lst)[k - 1];
841 (*slst)[0] = r;
842 }
843 }
844 }
845 }
846 break;
847 }
848
849 case ALLCAP: {
850 memcpy(wspace, cw, (wl+1));
851 mkallsmall2(wspace, unicw, nc);
852 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
853 if (ns == -1) break;
854 if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
855 ns = insert_sug(slst, wspace, ns);
856 mkinitcap2(wspace, unicw, nc);
857 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
858 for (int j=0; j < ns; j++) {
859 mkallcap((*slst)[j]);
860 if (pAMgr && pAMgr->get_checksharps()) {
861 char * pos;
862 if (utf8) {
863 pos = strstr((*slst)[j], "\xC3\x9F");
864 while (pos) {
865 *pos = 'S';
866 *(pos+1) = 'S';
867 pos = strstr(pos+2, "\xC3\x9F");
868 }
869 } else {
870 pos = strchr((*slst)[j], '\xDF');
871 while (pos) {
872 (*slst)[j] = (char *) realloc((*slst)[j], st rlen((*slst)[j]) + 2);
873 mystrrep((*slst)[j], "\xDF", "SS");
874 pos = strchr((*slst)[j], '\xDF');
875 }
876 }
877 }
878 }
879 break;
880 }
881 }
882
883 // LANG_hu section: replace '-' with ' ' in Hungarian
884 if (langnum == LANG_hu) {
885 for (int j=0; j < ns; j++) {
886 char * pos = strchr((*slst)[j],'-');
887 if (pos) {
888 int info;
889 char w[MAXWORDUTF8LEN];
890 *pos = '\0';
891 strcpy(w, (*slst)[j]);
892 strcat(w, pos + 1);
893 spell(w, &info, NULL);
894 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
895 *pos = ' ';
896 } else *pos = '-';
897 }
898 }
899 }
900 // END OF LANG_hu section
901
902 // try ngram approach since found nothing or only compound words
903 if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && ( *slst)) {
904 switch(captype) {
905 case NOCAP: {
906 ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
907 break;
908 }
909 case HUHINITCAP:
910 capwords = 1;
911 case HUHCAP: {
912 memcpy(wspace,cw,(wl+1));
913 mkallsmall2(wspace, unicw, nc);
914 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
915 break;
916 }
917 case INITCAP: {
918 capwords = 1;
919 memcpy(wspace,cw,(wl+1));
920 mkallsmall2(wspace, unicw, nc);
921 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
922 break;
923 }
924 case ALLCAP: {
925 memcpy(wspace,cw,(wl+1));
926 mkallsmall2(wspace, unicw, nc);
927 int oldns = ns;
928 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
929 for (int j = oldns; j < ns; j++)
930 mkallcap((*slst)[j]);
931 break;
932 }
933 }
934 }
935
936 // try dash suggestion (Afo-American -> Afro-American)
937 if (char * pos = strchr(cw, '-')) {
938 char * ppos = cw;
939 int nodashsug = 1;
940 char ** nlst = NULL;
941 int nn = 0;
942 int last = 0;
943 if (*slst) {
944 for (int j = 0; j < ns && nodashsug == 1; j++) {
945 if (strchr((*slst)[j], '-')) nodashsug = 0;
946 }
947 }
948 while (nodashsug && !last) {
949 if (*pos == '\0') last = 1; else *pos = '\0';
950 if (!spell(ppos)) {
951 nn = suggest(&nlst, ppos);
952 for (int j = nn - 1; j >= 0; j--) {
953 strncpy(wspace, cw, ppos - cw);
954 strcpy(wspace + (ppos - cw), nlst[j]);
955 if (!last) {
956 strcat(wspace, "-");
957 strcat(wspace, pos + 1);
958 }
959 ns = insert_sug(slst, wspace, ns);
960 free(nlst[j]);
961 }
962 if (nlst != NULL) free(nlst);
963 nodashsug = 0;
964 }
965 if (!last) {
966 *pos = '-';
967 ppos = pos + 1;
968 pos = strchr(ppos, '-');
969 }
970 if (!pos) pos = cw + strlen(cw);
971 }
972 }
973
974 // word reversing wrapper for complex prefixes
975 if (complexprefixes) {
976 for (int j = 0; j < ns; j++) {
977 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
978 }
979 }
980
981 // capitalize
982 if (capwords) for (int j=0; j < ns; j++) {
983 mkinitcap((*slst)[j]);
984 }
985
986 // expand suggestions with dot(s)
987 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
988 for (int j = 0; j < ns; j++) {
989 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
990 strcat((*slst)[j], word + strlen(word) - abbv);
991 }
992 }
993
994 // remove bad capitalized and forbidden forms
995 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
996 switch (captype) {
997 case INITCAP:
998 case ALLCAP: {
999 int l = 0;
1000 for (int j=0; j < ns; j++) {
1001 if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
1002 char s[MAXSWUTF8L];
1003 w_char w[MAXSWL];
1004 int len;
1005 if (utf8) {
1006 len = u8_u16(w, MAXSWL, (*slst)[j]);
1007 } else {
1008 strcpy(s, (*slst)[j]);
1009 len = strlen(s);
1010 }
1011 mkallsmall2(s, w, len);
1012 free((*slst)[j]);
1013 if (spell(s)) {
1014 (*slst)[l] = mystrdup(s);
1015 if ((*slst)[l]) l++;
1016 } else {
1017 mkinitcap2(s, w, len);
1018 if (spell(s)) {
1019 (*slst)[l] = mystrdup(s);
1020 if ((*slst)[l]) l++;
1021 }
1022 }
1023 } else {
1024 (*slst)[l] = (*slst)[j];
1025 l++;
1026 }
1027 }
1028 ns = l;
1029 }
1030 }
1031 }
1032
1033 // remove duplications
1034 int l = 0;
1035 for (int j = 0; j < ns; j++) {
1036 (*slst)[l] = (*slst)[j];
1037 for (int k = 0; k < l; k++) {
1038 if (strcmp((*slst)[k], (*slst)[j]) == 0) {
1039 free((*slst)[j]);
1040 l--;
1041 break;
1042 }
1043 }
1044 l++;
1045 }
1046 ns = l;
1047
1048 // output conversion
1049 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1050 for (int j = 0; rl && j < ns; j++) {
1051 if (rl->conv((*slst)[j], wspace)) {
1052 free((*slst)[j]);
1053 (*slst)[j] = mystrdup(wspace);
1054 }
1055 }
1056
1057 // if suggestions removed by nosuggest, onlyincompound parameters
1058 if (l == 0 && *slst) {
1059 free(*slst);
1060 *slst = NULL;
1061 }
1062 return l;
1063 }
1064
1065 void Hunspell::free_list(char *** slst, int n) {
1066 freelist(slst, n);
1067 }
1068
1069 char * Hunspell::get_dic_encoding()
1070 {
1071 return encoding;
1072 }
1073
1074 #ifdef HUNSPELL_EXPERIMENTAL
1075 // XXX need UTF-8 support
1076 int Hunspell::suggest_auto(char*** slst, const char * word)
1077 {
1078 char cw[MAXWORDUTF8LEN];
1079 char wspace[MAXWORDUTF8LEN];
1080 if (!pSMgr || maxdic == 0) return 0;
1081 int wl = strlen(word);
1082 if (utf8) {
1083 if (wl >= MAXWORDUTF8LEN) return 0;
1084 } else {
1085 if (wl >= MAXWORDLEN) return 0;
1086 }
1087 int captype = 0;
1088 int abbv = 0;
1089 wl = cleanword(cw, word, &captype, &abbv);
1090 if (wl == 0) return 0;
1091 int ns = 0;
1092 *slst = NULL; // HU, nsug in pSMgr->suggest
1093
1094 switch(captype) {
1095 case NOCAP: {
1096 ns = pSMgr->suggest_auto(slst, cw, ns);
1097 if (ns>0) break;
1098 break;
1099 }
1100
1101 case INITCAP: {
1102 memcpy(wspace,cw,(wl+1));
1103 mkallsmall(wspace);
1104 ns = pSMgr->suggest_auto(slst, wspace, ns);
1105 for (int j=0; j < ns; j++)
1106 mkinitcap((*slst)[j]);
1107 ns = pSMgr->suggest_auto(slst, cw, ns);
1108 break;
1109
1110 }
1111
1112 case HUHINITCAP:
1113 case HUHCAP: {
1114 ns = pSMgr->suggest_auto(slst, cw, ns);
1115 if (ns == 0) {
1116 memcpy(wspace,cw,(wl+1));
1117 mkallsmall(wspace);
1118 ns = pSMgr->suggest_auto(slst, wspace, ns);
1119 }
1120 break;
1121 }
1122
1123 case ALLCAP: {
1124 memcpy(wspace,cw,(wl+1));
1125 mkallsmall(wspace);
1126 ns = pSMgr->suggest_auto(slst, wspace, ns);
1127
1128 mkinitcap(wspace);
1129 ns = pSMgr->suggest_auto(slst, wspace, ns);
1130
1131 for (int j=0; j < ns; j++)
1132 mkallcap((*slst)[j]);
1133 break;
1134 }
1135 }
1136
1137 // word reversing wrapper for complex prefixes
1138 if (complexprefixes) {
1139 for (int j = 0; j < ns; j++) {
1140 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1141 }
1142 }
1143
1144 // expand suggestions with dot(s)
1145 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1146 for (int j = 0; j < ns; j++) {
1147 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1148 strcat((*slst)[j], word + strlen(word) - abbv);
1149 }
1150 }
1151
1152 // LANG_hu section: replace '-' with ' ' in Hungarian
1153 if (langnum == LANG_hu) {
1154 for (int j=0; j < ns; j++) {
1155 char * pos = strchr((*slst)[j],'-');
1156 if (pos) {
1157 int info;
1158 char w[MAXWORDUTF8LEN];
1159 *pos = '\0';
1160 strcpy(w, (*slst)[j]);
1161 strcat(w, pos + 1);
1162 spell(w, &info, NULL);
1163 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1164 *pos = ' ';
1165 } else *pos = '-';
1166 }
1167 }
1168 }
1169 // END OF LANG_hu section
1170 return ns;
1171 }
1172 #endif
1173
1174 int Hunspell::stem(char*** slst, char ** desc, int n)
1175 {
1176 char result[MAXLNLEN];
1177 char result2[MAXLNLEN];
1178 *slst = NULL;
1179 if (n == 0) return 0;
1180 *result2 = '\0';
1181 for (int i = 0; i < n; i++) {
1182 *result = '\0';
1183 // add compound word parts (except the last one)
1184 char * s = (char *) desc[i];
1185 char * part = strstr(s, MORPH_PART);
1186 if (part) {
1187 char * nextpart = strstr(part + 1, MORPH_PART);
1188 while (nextpart) {
1189 copy_field(result + strlen(result), part, MORPH_PART);
1190 part = nextpart;
1191 nextpart = strstr(part + 1, MORPH_PART);
1192 }
1193 s = part;
1194 }
1195
1196 char **pl;
1197 char tok[MAXLNLEN];
1198 strcpy(tok, s);
1199 char * alt = strstr(tok, " | ");
1200 while (alt) {
1201 alt[1] = MSEP_ALT;
1202 alt = strstr(alt, " | ");
1203 }
1204 int pln = line_tok(tok, &pl, MSEP_ALT);
1205 for (int k = 0; k < pln; k++) {
1206 // add derivational suffixes
1207 if (strstr(pl[k], MORPH_DERI_SFX)) {
1208 // remove inflectional suffixes
1209 char * is = strstr(pl[k], MORPH_INFL_SFX);
1210 if (is) *is = '\0';
1211 char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1212 if (sg) {
1213 char ** gen;
1214 int genl = line_tok(sg, &gen, MSEP_REC);
1215 free(sg);
1216 for (int j = 0; j < genl; j++) {
1217 sprintf(result2 + strlen(result2), "%c%s%s",
1218 MSEP_REC, result, gen[j]);
1219 }
1220 freelist(&gen, genl);
1221 }
1222 } else {
1223 sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1224 if (strstr(pl[k], MORPH_SURF_PFX)) {
1225 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1226 }
1227 copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1228 }
1229 }
1230 freelist(&pl, pln);
1231 }
1232 int sln = line_tok(result2, slst, MSEP_REC);
1233 return uniqlist(*slst, sln);
1234
1235 }
1236
1237 int Hunspell::stem(char*** slst, const char * word)
1238 {
1239 char ** pl;
1240 int pln = analyze(&pl, word);
1241 int pln2 = stem(slst, pl, pln);
1242 freelist(&pl, pln);
1243 return pln2;
1244 }
1245
1246 #ifdef HUNSPELL_EXPERIMENTAL
1247 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1248 {
1249 char cw[MAXWORDUTF8LEN];
1250 char wspace[MAXWORDUTF8LEN];
1251 if (! pSMgr || maxdic == 0) return 0;
1252 int wl = strlen(word);
1253 if (utf8) {
1254 if (wl >= MAXWORDUTF8LEN) return 0;
1255 } else {
1256 if (wl >= MAXWORDLEN) return 0;
1257 }
1258 int captype = 0;
1259 int abbv = 0;
1260 wl = cleanword(cw, word, &captype, &abbv);
1261 if (wl == 0) return 0;
1262
1263 int ns = 0; // ns=0 = normalized input
1264
1265 *slst = NULL; // HU, nsug in pSMgr->suggest
1266
1267 switch(captype) {
1268 case HUHCAP:
1269 case NOCAP: {
1270 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1271
1272 if ((abbv) && (ns == 0)) {
1273 memcpy(wspace,cw,wl);
1274 *(wspace+wl) = '.';
1275 *(wspace+wl+1) = '\0';
1276 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1277 }
1278
1279 break;
1280 }
1281
1282 case INITCAP: {
1283
1284 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1285
1286 if (ns == 0 || ((*slst)[0][0] == '#')) {
1287 memcpy(wspace,cw,(wl+1));
1288 mkallsmall(wspace);
1289 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1290 }
1291
1292 break;
1293
1294 }
1295
1296 case ALLCAP: {
1297 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1298 if (ns != 0) break;
1299
1300 memcpy(wspace,cw,(wl+1));
1301 mkallsmall(wspace);
1302 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1303
1304 if (ns == 0) {
1305 mkinitcap(wspace);
1306 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1307 }
1308 break;
1309 }
1310 }
1311
1312 return ns;
1313 }
1314 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1315
1316 const char * Hunspell::get_wordchars()
1317 {
1318 return pAMgr->get_wordchars();
1319 }
1320
1321 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1322 {
1323 return pAMgr->get_wordchars_utf16(len);
1324 }
1325
1326 void Hunspell::mkinitcap(char * p)
1327 {
1328 if (!utf8) {
1329 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1330 } else {
1331 int len;
1332 w_char u[MAXWORDLEN];
1333 len = u8_u16(u, MAXWORDLEN, p);
1334 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1335 u[0].h = (unsigned char) (i >> 8);
1336 u[0].l = (unsigned char) (i & 0x00FF);
1337 u16_u8(p, MAXWORDUTF8LEN, u, len);
1338 }
1339 }
1340
1341 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1342 {
1343 if (!utf8) {
1344 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1345 } else if (nc > 0) {
1346 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1347 u[0].h = (unsigned char) (i >> 8);
1348 u[0].l = (unsigned char) (i & 0x00FF);
1349 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1350 return strlen(p);
1351 }
1352 return nc;
1353 }
1354
1355 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1356 {
1357 if (!utf8) {
1358 if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1359 } else if (nc > 0) {
1360 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1361 u[0].h = (unsigned char) (i >> 8);
1362 u[0].l = (unsigned char) (i & 0x00FF);
1363 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1364 return strlen(p);
1365 }
1366 return nc;
1367 }
1368
1369 int Hunspell::add(const char * word)
1370 {
1371 if (pHMgr[0]) return (pHMgr[0])->add(word);
1372 return 0;
1373 }
1374
1375 int Hunspell::add_with_affix(const char * word, const char * example)
1376 {
1377 if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1378 return 0;
1379 }
1380
1381 int Hunspell::remove(const char * word)
1382 {
1383 if (pHMgr[0]) return (pHMgr[0])->remove(word);
1384 return 0;
1385 }
1386
1387 const char * Hunspell::get_version()
1388 {
1389 return pAMgr->get_version();
1390 }
1391
1392 struct cs_info * Hunspell::get_csconv()
1393 {
1394 return csconv;
1395 }
1396
1397 void Hunspell::cat_result(char * result, char * st)
1398 {
1399 if (st) {
1400 if (*result) mystrcat(result, "\n", MAXLNLEN);
1401 mystrcat(result, st, MAXLNLEN);
1402 free(st);
1403 }
1404 }
1405
1406 int Hunspell::analyze(char*** slst, const char * word)
1407 {
1408 char cw[MAXWORDUTF8LEN];
1409 char wspace[MAXWORDUTF8LEN];
1410 w_char unicw[MAXWORDLEN];
1411 int wl2 = 0;
1412 *slst = NULL;
1413 if (! pSMgr || maxdic == 0) return 0;
1414 int nc = strlen(word);
1415 if (utf8) {
1416 if (nc >= MAXWORDUTF8LEN) return 0;
1417 } else {
1418 if (nc >= MAXWORDLEN) return 0;
1419 }
1420 int captype = 0;
1421 int abbv = 0;
1422 int wl = 0;
1423
1424 // input conversion
1425 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1426 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);
1427 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1428
1429 if (wl == 0) {
1430 if (abbv) {
1431 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1432 cw[wl] = '\0';
1433 abbv = 0;
1434 } else return 0;
1435 }
1436
1437 char result[MAXLNLEN];
1438 char * st = NULL;
1439
1440 *result = '\0';
1441
1442 int n = 0;
1443 int n2 = 0;
1444 int n3 = 0;
1445
1446 // test numbers
1447 // LANG_hu section: set dash information for suggestions
1448 if (langnum == LANG_hu) {
1449 while ((n < wl) &&
1450 (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',' )) && (n > 0)))) {
1451 n++;
1452 if ((cw[n] == '.') || (cw[n] == ',')) {
1453 if (((n2 == 0) && (n > 3)) ||
1454 ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) br eak;
1455 n2++;
1456 n3 = n;
1457 }
1458 }
1459
1460 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1461 if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n , NULL, NULL))) {
1462 mystrcat(result, cw, MAXLNLEN);
1463 result[n - 1] = '\0';
1464 if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1465 else {
1466 char sign = cw[n];
1467 cw[n] = '\0';
1468 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1469 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1470 cw[n] = sign;
1471 cat_result(result, pSMgr->suggest_morph(cw + n));
1472 }
1473 return line_tok(result, slst, MSEP_REC);
1474 }
1475 }
1476 // END OF LANG_hu section
1477
1478 switch(captype) {
1479 case HUHCAP:
1480 case HUHINITCAP:
1481 case NOCAP: {
1482 cat_result(result, pSMgr->suggest_morph(cw));
1483 if (abbv) {
1484 memcpy(wspace,cw,wl);
1485 *(wspace+wl) = '.';
1486 *(wspace+wl+1) = '\0';
1487 cat_result(result, pSMgr->suggest_morph(wspace));
1488 }
1489 break;
1490 }
1491 case INITCAP: {
1492 wl = mkallsmall2(cw, unicw, nc);
1493 memcpy(wspace,cw,(wl+1));
1494 wl2 = mkinitcap2(cw, unicw, nc);
1495 cat_result(result, pSMgr->suggest_morph(wspace));
1496 cat_result(result, pSMgr->suggest_morph(cw));
1497 if (abbv) {
1498 *(wspace+wl) = '.';
1499 *(wspace+wl+1) = '\0';
1500 cat_result(result, pSMgr->suggest_morph(wspace));
1501
1502 memcpy(wspace, cw, wl2);
1503 *(wspace+wl2) = '.';
1504 *(wspace+wl2+1) = '\0';
1505
1506 cat_result(result, pSMgr->suggest_morph(wspace));
1507 }
1508 break;
1509 }
1510 case ALLCAP: {
1511 cat_result(result, pSMgr->suggest_morph(cw));
1512 if (abbv) {
1513 memcpy(wspace,cw,wl);
1514 *(wspace+wl) = '.';
1515 *(wspace+wl+1) = '\0';
1516 cat_result(result, pSMgr->suggest_morph(cw));
1517 }
1518 wl = mkallsmall2(cw, unicw, nc);
1519 memcpy(wspace,cw,(wl+1));
1520 wl2 = mkinitcap2(cw, unicw, nc);
1521
1522 cat_result(result, pSMgr->suggest_morph(wspace));
1523 cat_result(result, pSMgr->suggest_morph(cw));
1524 if (abbv) {
1525 *(wspace+wl) = '.';
1526 *(wspace+wl+1) = '\0';
1527 cat_result(result, pSMgr->suggest_morph(wspace));
1528
1529 memcpy(wspace, cw, wl2);
1530 *(wspace+wl2) = '.';
1531 *(wspace+wl2+1) = '\0';
1532
1533 cat_result(result, pSMgr->suggest_morph(wspace));
1534 }
1535 break;
1536 }
1537 }
1538
1539 if (*result) {
1540 // word reversing wrapper for complex prefixes
1541 if (complexprefixes) {
1542 if (utf8) reverseword_utf(result); else reverseword(result);
1543 }
1544 return line_tok(result, slst, MSEP_REC);
1545 }
1546
1547 // compound word with dash (HU) I18n
1548 char * dash = NULL;
1549 int nresult = 0;
1550 // LANG_hu section: set dash information for suggestions
1551 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1552 if ((langnum == LANG_hu) && dash) {
1553 *dash='\0';
1554 // examine 2 sides of the dash
1555 if (dash[1] == '\0') { // base word ending with dash
1556 if (spell(cw)) {
1557 char * p = pSMgr->suggest_morph(cw);
1558 if (p) {
1559 int ret = line_tok(p, slst, MSEP_REC);
1560 free(p);
1561 return ret;
1562 }
1563
1564 }
1565 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1566 if (spell(cw) && (spell("-e"))) {
1567 st = pSMgr->suggest_morph(cw);
1568 if (st) {
1569 mystrcat(result, st, MAXLNLEN);
1570 free(st);
1571 }
1572 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator i n MORPHCODE
1573 st = pSMgr->suggest_morph("-e");
1574 if (st) {
1575 mystrcat(result, st, MAXLNLEN);
1576 free(st);
1577 }
1578 return line_tok(result, slst, MSEP_REC);
1579 }
1580 } else {
1581 // first word ending with dash: word- XXX ???
1582 char r2 = *(dash + 1);
1583 dash[0]='-';
1584 dash[1]='\0';
1585 nresult = spell(cw);
1586 dash[1] = r2;
1587 dash[0]='\0';
1588 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1589 ((dash[1] > '0') && (dash[1] < '9')))) {
1590 st = pSMgr->suggest_morph(cw);
1591 if (st) {
1592 mystrcat(result, st, MAXLNLEN);
1593 free(st);
1594 mystrcat(result,"+", MAXLNLEN); // XXX spec. sep arator in MORPHCODE
1595 }
1596 st = pSMgr->suggest_morph(dash+1);
1597 if (st) {
1598 mystrcat(result, st, MAXLNLEN);
1599 free(st);
1600 }
1601 return line_tok(result, slst, MSEP_REC);
1602 }
1603 }
1604 // affixed number in correct word
1605 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1606 (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1607 *dash='-';
1608 n = 1;
1609 if (*(dash - n) == '.') n++;
1610 // search first not a number character to left from dash
1611 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1612 n++;
1613 }
1614 if ((dash - n) < cw) n--;
1615 // numbers: valami1000000-hoz
1616 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1617 // 56-hoz, 6-hoz
1618 for(; n >= 1; n--) {
1619 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1620 mystrcat(result, cw, MAXLNLEN);
1621 result[dash - cw - n] = '\0';
1622 st = pSMgr->suggest_morph(dash - n);
1623 if (st) {
1624 mystrcat(result, st, MAXLNLEN);
1625 free(st);
1626 }
1627 return line_tok(result, slst, MSEP_REC);
1628 }
1629 }
1630 }
1631 }
1632 return 0;
1633 }
1634
1635 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1636 {
1637 *slst = NULL;
1638 if (!pSMgr || !pln) return 0;
1639 char **pl2;
1640 int pl2n = analyze(&pl2, word);
1641 int captype = 0;
1642 int abbv = 0;
1643 char cw[MAXWORDUTF8LEN];
1644 cleanword(cw, word, &captype, &abbv);
1645 char result[MAXLNLEN];
1646 *result = '\0';
1647
1648 for (int i = 0; i < pln; i++) {
1649 cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1650 }
1651 freelist(&pl2, pl2n);
1652
1653 if (*result) {
1654 // allcap
1655 if (captype == ALLCAP) mkallcap(result);
1656
1657 // line split
1658 int linenum = line_tok(result, slst, MSEP_REC);
1659
1660 // capitalize
1661 if (captype == INITCAP || captype == HUHINITCAP) {
1662 for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1663 }
1664
1665 // temporary filtering of prefix related errors (eg.
1666 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1667
1668 int r = 0;
1669 for (int j=0; j < linenum; j++) {
1670 if (!spell((*slst)[j])) {
1671 free((*slst)[j]);
1672 (*slst)[j] = NULL;
1673 } else {
1674 if (r < j) (*slst)[r] = (*slst)[j];
1675 r++;
1676 }
1677 }
1678 if (r > 0) return r;
1679 free(*slst);
1680 *slst = NULL;
1681 }
1682 return 0;
1683 }
1684
1685 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1686 {
1687 char **pl;
1688 int pln = analyze(&pl, pattern);
1689 int n = generate(slst, word, pl, pln);
1690 freelist(&pl, pln);
1691 return uniqlist(*slst, n);
1692 }
1693
1694 // minimal XML parser functions
1695 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1696 {
1697 char * d = dest;
1698 if (!par) return 0;
1699 char end = *par;
1700 char * dmax = dest + max;
1701 if (end == '>') end = '<';
1702 else if (end != '\'' && end != '"') return 0; // bad XML
1703 for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1704 *d = '\0';
1705 mystrrep(dest, "&lt;", "<");
1706 mystrrep(dest, "&amp;", "&");
1707 return (int)(d - dest);
1708 }
1709
1710 int Hunspell::get_langnum() const
1711 {
1712 return langnum;
1713 }
1714
1715 // return the beginning of the element (attr == NULL) or the attribute
1716 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1717 {
1718 const char * end = strchr(s, '>');
1719 const char * p = s;
1720 if (attr == NULL) return end;
1721 do {
1722 p = strstr(p, attr);
1723 if (!p || p >= end) return 0;
1724 } while (*(p-1) != ' ' && *(p-1) != '\n');
1725 return p + strlen(attr);
1726 }
1727
1728 int Hunspell::check_xml_par(const char * q, const char * attr, const char * valu e) {
1729 char cw[MAXWORDUTF8LEN];
1730 if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1731 strcmp(cw, value) == 0) return 1;
1732 return 0;
1733 }
1734
1735 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1736 int n = 0;
1737 char * p;
1738 if (!list) return 0;
1739 for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
1740 if (n == 0) return 0;
1741 *slst = (char **) malloc(sizeof(char *) * n);
1742 if (!*slst) return 0;
1743 for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
1744 int l = strlen(p);
1745 (*slst)[n] = (char *) malloc(l + 1);
1746 if (!(*slst)[n]) return n;
1747 if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
1748 free((*slst)[n]);
1749 break;
1750 }
1751 }
1752 return n;
1753 }
1754
1755 int Hunspell::spellml(char*** slst, const char * word)
1756 {
1757 char *q, *q2;
1758 char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1759 q = (char *) strstr(word, "<query");
1760 if (!q) return 0; // bad XML input
1761 q2 = strchr(q, '>');
1762 if (!q2) return 0; // bad XML input
1763 q2 = strstr(q2, "<word");
1764 if (!q2) return 0; // bad XML input
1765 if (check_xml_par(q, "type=", "analyze")) {
1766 int n = 0, s = 0;
1767 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(sls t, cw);
1768 if (n == 0) return 0;
1769 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1770 for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
1771 char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp ;
1772 if (!r) return 0;
1773 strcpy(r, "<code>");
1774 for (int i = 0; i < n; i++) {
1775 int l = strlen(r);
1776 strcpy(r + l, "<a>");
1777 strcpy(r + l + 3, (*slst)[i]);
1778 mystrrep(r + l + 3, "\t", " ");
1779 mystrrep(r + l + 3, "<", "&lt;");
1780 mystrrep(r + l + 3, "&", "&amp;");
1781 strcat(r, "</a>");
1782 free((*slst)[i]);
1783 }
1784 strcat(r, "</code>");
1785 (*slst)[0] = r;
1786 return 1;
1787 } else if (check_xml_par(q, "type=", "stem")) {
1788 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst , cw);
1789 } else if (check_xml_par(q, "type=", "generate")) {
1790 int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
1791 if (n == 0) return 0;
1792 char * q3 = strstr(q2 + 1, "<word");
1793 if (q3) {
1794 if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
1795 return generate(slst, cw, cw2);
1796 }
1797 } else {
1798 if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1799 char ** slst2;
1800 if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
1801 int n2 = generate(slst, cw, slst2, n);
1802 freelist(&slst2, n);
1803 return uniqlist(*slst, n2);
1804 }
1805 freelist(&slst2, n);
1806 }
1807 }
1808 }
1809 return 0;
1810 }
1811
1812
1813 #ifdef HUNSPELL_EXPERIMENTAL
1814 // XXX need UTF-8 support
1815 char * Hunspell::morph_with_correction(const char * word)
1816 {
1817 char cw[MAXWORDUTF8LEN];
1818 char wspace[MAXWORDUTF8LEN];
1819 if (! pSMgr || maxdic == 0) return NULL;
1820 int wl = strlen(word);
1821 if (utf8) {
1822 if (wl >= MAXWORDUTF8LEN) return NULL;
1823 } else {
1824 if (wl >= MAXWORDLEN) return NULL;
1825 }
1826 int captype = 0;
1827 int abbv = 0;
1828 wl = cleanword(cw, word, &captype, &abbv);
1829 if (wl == 0) return NULL;
1830
1831 char result[MAXLNLEN];
1832 char * st = NULL;
1833
1834 *result = '\0';
1835
1836
1837 switch(captype) {
1838 case NOCAP: {
1839 st = pSMgr->suggest_morph_for_spelling_error(cw);
1840 if (st) {
1841 mystrcat(result, st, MAXLNLEN);
1842 free(st);
1843 }
1844 if (abbv) {
1845 memcpy(wspace,cw,wl);
1846 *(wspace+wl) = '.';
1847 *(wspace+wl+1) = '\0';
1848 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1849 if (st) {
1850 if (*result) mystrcat(result, "\n", MAXLNLEN);
1851 mystrcat(result, st, MAXLNLEN);
1852 free(st);
1853 }
1854 }
1855 break;
1856 }
1857 case INITCAP: {
1858 memcpy(wspace,cw,(wl+1));
1859 mkallsmall(wspace);
1860 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1861 if (st) {
1862 mystrcat(result, st, MAXLNLEN);
1863 free(st);
1864 }
1865 st = pSMgr->suggest_morph_for_spelling_error(cw);
1866 if (st) {
1867 if (*result) mystrcat(result, "\n", MAXLNLEN);
1868 mystrcat(result, st, MAXLNLEN);
1869 free(st);
1870 }
1871 if (abbv) {
1872 memcpy(wspace,cw,wl);
1873 *(wspace+wl) = '.';
1874 *(wspace+wl+1) = '\0';
1875 mkallsmall(wspace);
1876 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1877 if (st) {
1878 if (*result) mystrcat(result, "\n", MAXLNLEN);
1879 mystrcat(result, st, MAXLNLEN);
1880 free(st);
1881 }
1882 mkinitcap(wspace);
1883 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1884 if (st) {
1885 if (*result) mystrcat(result, "\n", MAXLNLEN);
1886 mystrcat(result, st, MAXLNLEN);
1887 free(st);
1888 }
1889 }
1890 break;
1891 }
1892 case HUHCAP: {
1893 st = pSMgr->suggest_morph_for_spelling_error(cw);
1894 if (st) {
1895 mystrcat(result, st, MAXLNLEN);
1896 free(st);
1897 }
1898 memcpy(wspace,cw,(wl+1));
1899 mkallsmall(wspace);
1900 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1901 if (st) {
1902 if (*result) mystrcat(result, "\n", MAXLNLEN);
1903 mystrcat(result, st, MAXLNLEN);
1904 free(st);
1905 }
1906 break;
1907 }
1908 case ALLCAP: {
1909 memcpy(wspace,cw,(wl+1));
1910 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1911 if (st) {
1912 mystrcat(result, st, MAXLNLEN);
1913 free(st);
1914 }
1915 mkallsmall(wspace);
1916 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1917 if (st) {
1918 if (*result) mystrcat(result, "\n", MAXLNLEN);
1919 mystrcat(result, st, MAXLNLEN);
1920 free(st);
1921 }
1922 mkinitcap(wspace);
1923 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1924 if (st) {
1925 if (*result) mystrcat(result, "\n", MAXLNLEN);
1926 mystrcat(result, st, MAXLNLEN);
1927 free(st);
1928 }
1929 if (abbv) {
1930 memcpy(wspace,cw,(wl+1));
1931 *(wspace+wl) = '.';
1932 *(wspace+wl+1) = '\0';
1933 if (*result) mystrcat(result, "\n", MAXLNLEN);
1934 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1935 if (st) {
1936 mystrcat(result, st, MAXLNLEN);
1937 free(st);
1938 }
1939 mkallsmall(wspace);
1940 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1941 if (st) {
1942 if (*result) mystrcat(result, "\n", MAXLNLEN);
1943 mystrcat(result, st, MAXLNLEN);
1944 free(st);
1945 }
1946 mkinitcap(wspace);
1947 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1948 if (st) {
1949 if (*result) mystrcat(result, "\n", MAXLNLEN);
1950 mystrcat(result, st, MAXLNLEN);
1951 free(st);
1952 }
1953 }
1954 break;
1955 }
1956 }
1957
1958 if (*result) return mystrdup(result);
1959 return NULL;
1960 }
1961
1962 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1963
1964 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1965 {
1966 #ifdef HUNSPELL_CHROME_CLIENT
1967 return NULL;
1968 #else
1969 return (Hunhandle*)(new Hunspell(affpath, dpath));
1970 #endif
1971 }
1972
1973 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1974 const char * key)
1975 {
1976 #ifdef HUNSPELL_CHROME_CLIENT
1977 return NULL;
1978 #else
1979 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1980 #endif
1981 }
1982
1983 void Hunspell_destroy(Hunhandle *pHunspell)
1984 {
1985 delete (Hunspell*)(pHunspell);
1986 }
1987
1988 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1989 {
1990 return ((Hunspell*)pHunspell)->spell(word);
1991 }
1992
1993 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1994 {
1995 return ((Hunspell*)pHunspell)->get_dic_encoding();
1996 }
1997
1998 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1999 {
2000 return ((Hunspell*)pHunspell)->suggest(slst, word);
2001 }
2002
2003 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
2004 {
2005 return ((Hunspell*)pHunspell)->analyze(slst, word);
2006 }
2007
2008 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
2009 {
2010 return ((Hunspell*)pHunspell)->stem(slst, word);
2011 }
2012
2013 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
2014 {
2015 return ((Hunspell*)pHunspell)->stem(slst, desc, n);
2016 }
2017
2018 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
2019 const char * word2)
2020 {
2021 return ((Hunspell*)pHunspell)->generate(slst, word, word2);
2022 }
2023
2024 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
2025 char** desc, int n)
2026 {
2027 return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
2028 }
2029
2030 /* functions for run-time modification of the dictionary */
2031
2032 /* add word to the run-time dictionary */
2033
2034 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
2035 return ((Hunspell*)pHunspell)->add(word);
2036 }
2037
2038 /* add word to the run-time dictionary with affix flags of
2039 * the example (a dictionary word): Hunspell will recognize
2040 * affixed forms of the new word, too.
2041 */
2042
2043 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
2044 const char * example) {
2045 return ((Hunspell*)pHunspell)->add_with_affix(word, example);
2046 }
2047
2048 /* remove word from the run-time dictionary */
2049
2050 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
2051 return ((Hunspell*)pHunspell)->remove(word);
2052 }
2053
2054 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
2055 freelist(slst, n);
2056 }
OLDNEW
« no previous file with comments | « third_party/hunspell_new/src/hunspell/hunspell.hxx ('k') | third_party/hunspell_new/src/hunspell/hunspell.dsp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698