third_party/hunspell_new/src/hunspell/hunspell.cxx - Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell.

Side by Side Diff: third_party/hunspell_new/src/hunspell/hunspell.cxx

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 #include "license.hunspell"

2 #include "license.myspell"

3

4 #include <stdlib.h>

5 #include <string.h>

6 #include <stdio.h>

7

8 #include "hunspell.hxx"

9 #include "hunspell.h"

10 #ifndef HUNSPELL_CHROME_CLIENT

11 #ifndef MOZILLA_CLIENT

12 # include "config.h"

13 #endif

14 #endif

15 #include "csutil.hxx"

16

17 #ifdef HUNSPELL_CHROME_CLIENT

18 Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)

19 #else

20 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)

21 #endif

22 {

23 encoding = NULL;

24 csconv = NULL;

25 utf8 = 0;

26 complexprefixes = 0;

27 #ifndef HUNSPELL_CHROME_CLIENT

28 affixpath = mystrdup(affpath);

29 #endif

30 maxdic = 0;

31

32 #ifdef HUNSPELL_CHROME_CLIENT

33 bdict_reader = new hunspell::BDictReader;

34 bdict_reader->Init(bdict_data, bdict_length);

35

36 pHMgr[0] = new HashMgr(bdict_reader);

37 if (pHMgr[0]) maxdic = 1;

38

39 pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);

40 #else

41 /* first set up the hash manager */

42 pHMgr[0] = new HashMgr(dpath, affpath, key);

43 if (pHMgr[0]) maxdic = 1;

44

45 /* next set up the affix manager */

46 /* it needs access to the hash manager lookup methods */

47 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);

48 #endif

49

50 /* get the preferred try string and the dictionary */

51 /* encoding from the Affix Manager for that dictionary */

52 char * try_string = pAMgr->get_try_string();

53 encoding = pAMgr->get_encoding();

54 langnum = pAMgr->get_langnum();

55 utf8 = pAMgr->get_utf8();

56 if (!utf8)

57 csconv = get_current_cs(encoding);

58 complexprefixes = pAMgr->get_complexprefixes();

59 wordbreak = pAMgr->get_breaktable();

60

61 /* and finally set up the suggestion manager */

62 #ifdef HUNSPELL_CHROME_CLIENT

63 pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);

64 #else

65 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);

66 #endif

67 if (try_string) free(try_string);

68 }

69

70 Hunspell::~Hunspell()

71 {

72 if (pSMgr) delete pSMgr;

73 if (pAMgr) delete pAMgr;

74 for (int i = 0; i < maxdic; i++) delete pHMgr[i];

75 maxdic = 0;

76 pSMgr = NULL;

77 pAMgr = NULL;

78 #ifdef MOZILLA_CLIENT

79 delete [] csconv;

80 #endif

81 csconv= NULL;

82 if (encoding) free(encoding);

83 encoding = NULL;

84 #ifdef HUNSPELL_CHROME_CLIENT

85 if (bdict_reader) delete bdict_reader;

86 bdict_reader = NULL;

87 #else

88 if (affixpath) free(affixpath);

89 affixpath = NULL;

90 #endif

91 }

92

93 #ifndef HUNSPELL_CHROME_CLIENT

94 // load extra dictionaries

95 int Hunspell::add_dic(const char * dpath, const char * key) {

96 if (maxdic == MAXDIC \|\| !affixpath) return 1;

97 pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);

98 if (pHMgr[maxdic]) maxdic++; else return 1;

99 return 0;

100 }

101 #endif

102

103 // make a copy of src at destination while removing all leading

104 // blanks and removing any trailing periods after recording

105 // their presence with the abbreviation flag

106 // also since already going through character by character,

107 // set the capitalization type

108 // return the length of the "cleaned" (and UTF-8 encoded) word

109

110 int Hunspell::cleanword2(char * dest, const char * src,

111 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)

112 {

113 unsigned char * p = (unsigned char *) dest;

114 const unsigned char * q = (const unsigned char * ) src;

115

116 // first skip over any leading blanks

117 while ((q != '\0') && (q == ' ')) q++;

118

119 // now strip off any trailing periods (recording their presence)

120 *pabbrev = 0;

121 int nl = strlen((const char *)q);

122 while ((nl > 0) && (*(q+nl-1)=='.')) {

123 nl--;

124 (*pabbrev)++;

125 }

126

127 // if no characters are left it can't be capitalized

128 if (nl <= 0) {

129 *pcaptype = NOCAP;

130 *p = '\0';

131 return 0;

132 }

133

134 strncpy(dest, (char *) q, nl);

135 *(dest + nl) = '\0';

136 nl = strlen(dest);

137 if (utf8) {

138 *nc = u8_u16(dest_utf, MAXWORDLEN, dest);

139 // don't check too long words

140 if (*nc >= MAXWORDLEN) return 0;

141 if (*nc == -1) { // big Unicode character (non BMP area)

142 *pcaptype = NOCAP;

143 return nl;

144 }

145 pcaptype = get_captype_utf8(dest_utf, nc, langnum);

146 } else {

147 *pcaptype = get_captype(dest, nl, csconv);

148 *nc = nl;

149 }

150 return nl;

151 }

152

153 int Hunspell::cleanword(char * dest, const char * src,

154 int * pcaptype, int * pabbrev)

155 {

156 unsigned char * p = (unsigned char *) dest;

157 const unsigned char * q = (const unsigned char * ) src;

158 int firstcap = 0;

159

160 // first skip over any leading blanks

161 while ((q != '\0') && (q == ' ')) q++;

162

163 // now strip off any trailing periods (recording their presence)

164 *pabbrev = 0;

165 int nl = strlen((const char *)q);

166 while ((nl > 0) && (*(q+nl-1)=='.')) {

167 nl--;

168 (*pabbrev)++;

169 }

170

171 // if no characters are left it can't be capitalized

172 if (nl <= 0) {

173 *pcaptype = NOCAP;

174 *p = '\0';

175 return 0;

176 }

177

178 // now determine the capitalization type of the first nl letters

179 int ncap = 0;

180 int nneutral = 0;

181 int nc = 0;

182

183 if (!utf8) {

184 while (nl > 0) {

185 nc++;

186 if (csconv[(*q)].ccase) ncap++;

187 if (csconv[(q)].cupper == csconv[(q)].clower) nneutral++;

188 p++ = q++;

189 nl--;

190 }

191 // remember to terminate the destination string

192 *p = '\0';

193 firstcap = csconv[(unsigned char)(*dest)].ccase;

194 } else {

195 unsigned short idx;

196 w_char t[MAXWORDLEN];

197 nc = u8_u16(t, MAXWORDLEN, src);

198 for (int i = 0; i < nc; i++) {

199 idx = (t[i].h << 8) + t[i].l;

200 unsigned short low = unicodetolower(idx, langnum);

201 if (idx != low) ncap++;

202 if (unicodetoupper(idx, langnum) == low) nneutral++;

203 }

204 u16_u8(dest, MAXWORDUTF8LEN, t, nc);

205 if (ncap) {

206 idx = (t[0].h << 8) + t[0].l;

207 firstcap = (idx != unicodetolower(idx, langnum));

208 }

209 }

210

211 // now finally set the captype

212 if (ncap == 0) {

213 *pcaptype = NOCAP;

214 } else if ((ncap == 1) && firstcap) {

215 *pcaptype = INITCAP;

216 } else if ((ncap == nc) \|\| ((ncap + nneutral) == nc)){

217 *pcaptype = ALLCAP;

218 } else if ((ncap > 1) && firstcap) {

219 *pcaptype = HUHINITCAP;

220 } else {

221 *pcaptype = HUHCAP;

222 }

223 return strlen(dest);

224 }

225

226 void Hunspell::mkallcap(char * p)

227 {

228 if (utf8) {

229 w_char u[MAXWORDLEN];

230 int nc = u8_u16(u, MAXWORDLEN, p);

231 unsigned short idx;

232 for (int i = 0; i < nc; i++) {

233 idx = (u[i].h << 8) + u[i].l;

234 if (idx != unicodetoupper(idx, langnum)) {

235 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);

236 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);

237 }

238 }

239 u16_u8(p, MAXWORDUTF8LEN, u, nc);

240 } else {

241 while (*p != '\0') {

242 p = csconv[((unsigned char) p)].cupper;

243 p++;

244 }

245 }

246 }

247

248 int Hunspell::mkallcap2(char * p, w_char * u, int nc)

249 {

250 if (utf8) {

251 unsigned short idx;

252 for (int i = 0; i < nc; i++) {

253 idx = (u[i].h << 8) + u[i].l;

254 unsigned short up = unicodetoupper(idx, langnum);

255 if (idx != up) {

256 u[i].h = (unsigned char) (up >> 8);

257 u[i].l = (unsigned char) (up & 0x00FF);

258 }

259 }

260 u16_u8(p, MAXWORDUTF8LEN, u, nc);

261 return strlen(p);

262 } else {

263 while (*p != '\0') {

264 p = csconv[((unsigned char) p)].cupper;

265 p++;

266 }

267 }

268 return nc;

269 }

270

271

272 void Hunspell::mkallsmall(char * p)

273 {

274 while (*p != '\0') {

275 p = csconv[((unsigned char) p)].clower;

276 p++;

277 }

278 }

279

280 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)

281 {

282 if (utf8) {

283 unsigned short idx;

284 for (int i = 0; i < nc; i++) {

285 idx = (u[i].h << 8) + u[i].l;

286 unsigned short low = unicodetolower(idx, langnum);

287 if (idx != low) {

288 u[i].h = (unsigned char) (low >> 8);

289 u[i].l = (unsigned char) (low & 0x00FF);

290 }

291 }

292 u16_u8(p, MAXWORDUTF8LEN, u, nc);

293 return strlen(p);

294 } else {

295 while (*p != '\0') {

296 p = csconv[((unsigned char) p)].clower;

297 p++;

298 }

299 }

300 return nc;

301 }

302

303 // convert UTF-8 sharp S codes to latin 1

304 char * Hunspell::sharps_u8_l1(char * dest, char * source) {

305 char * p = dest;

306 p = source;

307 for (p++, source++; *(source - 1); p++, source++) {

308 p = source;

309 if (source == '\x9F') --p = '\xDF';

310 }

311 return dest;

312 }

313

314 // recursive search for right ss - sharp s permutations

315 hentry * Hunspell::spellsharps(char * base, char * pos, int n,

316 int repnum, char * tmp, int * info, char **root) {

317 pos = strstr(pos, "ss");

318 if (pos && (n < MAXSHARPS)) {

319 *pos = '\xC3';

320 *(pos + 1) = '\x9F';

321 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, ro ot);

322 if (h) return h;

323 *pos = 's';

324 *(pos + 1) = 's';

325 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);

326 if (h) return h;

327 } else if (repnum > 0) {

328 if (utf8) return checkword(base, info, root);

329 return checkword(sharps_u8_l1(tmp, base), info, root);

330 }

331 return NULL;

332 }

333

334 int Hunspell::is_keepcase(const hentry * rv) {

335 return pAMgr && rv->astr && pAMgr->get_keepcase() &&

336 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);

337 }

338

339 /* insert a word to the beginning of the suggestion array and return ns */

340 int Hunspell::insert_sug(char **slst, char word, int ns) {

341 char * dup = mystrdup(word);

342 if (!dup) return ns;

343 if (ns == MAXSUGGESTION) {

344 ns--;

345 free((*slst)[ns]);

346 }

347 for (int k = ns; k > 0; k--) (slst)[k] = (slst)[k - 1];

348 (*slst)[0] = dup;

349 return ns + 1;

350 }

351

352 int Hunspell::spell(const char * word, int * info, char ** root)

353 {

354 #ifdef HUNSPELL_CHROME_CLIENT

355 if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();

356 #endif

357 struct hentry * rv=NULL;

358 // need larger vector. For example, Turkish capital letter I converted a

359 // 2-byte UTF-8 character (dotless i) by mkallsmall.

360 char cw[MAXWORDUTF8LEN];

361 char wspace[MAXWORDUTF8LEN];

362 w_char unicw[MAXWORDLEN];

363 // Hunspell supports XML input of the simplified API (see manual)

364 if (strcmp(word, SPELL_XML) == 0) return 1;

365 int nc = strlen(word);

366 int wl2 = 0;

367 if (utf8) {

368 if (nc >= MAXWORDUTF8LEN) return 0;

369 } else {

370 if (nc >= MAXWORDLEN) return 0;

371 }

372 int captype = 0;

373 int abbv = 0;

374 int wl = 0;

375

376 // input conversion

377 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

378 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);

379 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

380

381 int info2 = 0;

382 if (wl == 0 \|\| maxdic == 0) return 1;

383 if (root) *root = NULL;

384

385 // allow numbers with dots, dashes and commas (but forbid double separators: " ..", "--" etc.)

386 enum { NBEGIN, NNUM, NSEP };

387 int nstate = NBEGIN;

388 int i;

389

390 for (i = 0; (i < wl); i++) {

391 if ((cw[i] <= '9') && (cw[i] >= '0')) {

392 nstate = NNUM;

393 } else if ((cw[i] == ',') \|\| (cw[i] == '.') \|\| (cw[i] == '-')) {

394 if ((nstate == NSEP) \|\| (i == 0)) break;

395 nstate = NSEP;

396 } else break;

397 }

398 if ((i == wl) && (nstate == NNUM)) return 1;

399 if (!info) info = &info2; else *info = 0;

400

401 switch(captype) {

402 case HUHCAP:

403 case HUHINITCAP:

404 *info += SPELL_ORIGCAP;

405 case NOCAP: {

406 rv = checkword(cw, info, root);

407 if ((abbv) && !(rv)) {

408 memcpy(wspace,cw,wl);

409 *(wspace+wl) = '.';

410 *(wspace+wl+1) = '\0';

411 rv = checkword(wspace, info, root);

412 }

413 break;

414 }

415 case ALLCAP: {

416 *info += SPELL_ORIGCAP;

417 rv = checkword(cw, info, root);

418 if (rv) break;

419 if (abbv) {

420 memcpy(wspace,cw,wl);

421 *(wspace+wl) = '.';

422 *(wspace+wl+1) = '\0';

423 rv = checkword(wspace, info, root);

424 if (rv) break;

425 }

426 // Spec. prefix handling for Catalan, French, Italian:

427 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).

428 if (pAMgr && strchr(cw, '\'')) {

429 wl = mkallsmall2(cw, unicw, nc);

430 //There are no really sane circumstances where this could fail,

431 //but anyway...

432 if (char * apostrophe = strchr(cw, '\'')) {

433 if (utf8) {

434 w_char tmpword[MAXWORDLEN];

435 *apostrophe = '\0';

436 wl2 = u8_u16(tmpword, MAXWORDLEN, cw);

437 *apostrophe = '\'';

438 if (wl2 < nc) {

439 mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);

440 rv = checkword(cw, info, root);

441 if (rv) break;

442 }

443 } else {

444 mkinitcap2(apostrophe + 1, unicw, nc);

445 rv = checkword(cw, info, root);

446 if (rv) break;

447 }

448 }

449 mkinitcap2(cw, unicw, nc);

450 rv = checkword(cw, info, root);

451 if (rv) break;

452 }

453 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {

454 char tmpword[MAXWORDUTF8LEN];

455 wl = mkallsmall2(cw, unicw, nc);

456 memcpy(wspace,cw,(wl+1));

457 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

458 if (!rv) {

459 wl2 = mkinitcap2(cw, unicw, nc);

460 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);

461 }

462 if ((abbv) && !(rv)) {

463 *(wspace+wl) = '.';

464 *(wspace+wl+1) = '\0';

465 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

466 if (!rv) {

467 memcpy(wspace, cw, wl2);

468 *(wspace+wl2) = '.';

469 *(wspace+wl2+1) = '\0';

470 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, ro ot);

471 }

472 }

473 if (rv) break;

474 }

475 }

476 case INITCAP: {

477 *info += SPELL_ORIGCAP;

478 wl = mkallsmall2(cw, unicw, nc);

479 memcpy(wspace,cw,(wl+1));

480 wl2 = mkinitcap2(cw, unicw, nc);

481 if (captype == INITCAP) *info += SPELL_INITCAP;

482 rv = checkword(cw, info, root);

483 if (captype == INITCAP) *info -= SPELL_INITCAP;

484 // forbid bad capitalization

485 // (for example, ijs -> Ijs instead of IJs in Dutch)

486 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

487 if (*info & SPELL_FORBIDDEN) {

488 rv = NULL;

489 break;

490 }

491 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

492 if (rv) break;

493

494 rv = checkword(wspace, info, root);

495 if (abbv && !rv) {

496

497 *(wspace+wl) = '.';

498 *(wspace+wl+1) = '\0';

499 rv = checkword(wspace, info, root);

500 if (!rv) {

501 memcpy(wspace, cw, wl2);

502 *(wspace+wl2) = '.';

503 *(wspace+wl2+1) = '\0';

504 if (captype == INITCAP) *info += SPELL_INITCAP;

505 rv = checkword(wspace, info, root);

506 if (captype == INITCAP) *info -= SPELL_INITCAP;

507 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

508 break;

509 }

510 }

511 if (rv && is_keepcase(rv) &&

512 ((captype == ALLCAP) \|\|

513 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed

514 // in INITCAP form, too.

515 !(pAMgr->get_checksharps() &&

516 ((utf8 && strstr(wspace, "\xC3\x9F")) \|\|

517 (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;

518 break;

519 }

520 }

521

522 if (rv) {

523 if (pAMgr && pAMgr->get_warn() && rv->astr &&

524 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {

525 *info += SPELL_WARN;

526 if (pAMgr->get_forbidwarn()) return 0;

527 return HUNSPELL_OK_WARN;

528 }

529 return HUNSPELL_OK;

530 }

531

532 // recursive breaking at break points

533 if (wordbreak) {

534 char * s;

535 char r;

536 int nbr = 0;

537 wl = strlen(cw);

538 int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;

539

540 // calculate break points for recursion limit

541 for (int j = 0; j < numbreak; j++) {

542 s = cw;

543 do {

544 s = (char *) strstr(s, wordbreak[j]);

545 if (s) {

546 nbr++;

547 s++;

548 }

549 } while (s);

550 }

551 if (nbr >= 10) return 0;

552

553 // check boundary patterns (^begin and end$)

554 for (int j = 0; j < numbreak; j++) {

555 int plen = strlen(wordbreak[j]);

556 if (plen == 1 \|\| plen > wl) continue;

557 if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0

558 && spell(cw + plen - 1)) return 1;

559 if (wordbreak[j][plen - 1] == '$' &&

560 strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {

561 r = cw[wl - plen + 1];

562 cw[wl - plen + 1] = '\0';

563 if (spell(cw)) return 1;

564 cw[wl - plen + 1] = r;

565 }

566 }

567

568 // other patterns

569 for (int j = 0; j < numbreak; j++) {

570 int plen = strlen(wordbreak[j]);

571 s=(char *) strstr(cw, wordbreak[j]);

572 if (s && (s > cw) && (s < cw + wl - plen)) {

573 if (!spell(s + plen)) continue;

574 r = *s;

575 *s = '\0';

576 // examine 2 sides of the break point

577 if (spell(cw)) return 1;

578 *s = r;

579

580 // LANG_hu: spec. dash rule

581 if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {

582 r = s[1];

583 s[1] = '\0';

584 if (spell(cw)) return 1; // check the first part with dash

585 s[1] = r;

586 }

587 // end of LANG speficic region

588

589 }

590 }

591 }

592

593 return 0;

594 }

595

596 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)

597 {

598 struct hentry * he = NULL;

599 int len, i;

600 char w2[MAXWORDUTF8LEN];

601 const char * word;

602

603 char * ignoredchars = pAMgr->get_ignore();

604 if (ignoredchars != NULL) {

605 strcpy(w2, w);

606 if (utf8) {

607 int ignoredchars_utf16_len;

608 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredch ars_utf16_len);

609 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len) ;

610 } else {

611 remove_ignored_chars(w2,ignoredchars);

612 }

613 word = w2;

614 } else word = w;

615

616 len = strlen(word);

617

618 if (!len)

619 return NULL;

620

621 #ifdef HUNSPELL_CHROME_CLIENT

622 // We need to check if the word length is valid to make coverity (Event

623 // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.

624 if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) \|\| (!utf8 && strlen(word) >= MAXW ORDLEN))

625 return NULL;

626 #endif

627

628 // word reversing wrapper for complex prefixes

629 if (complexprefixes) {

630 if (word != w2) {

631 strcpy(w2, word);

632 word = w2;

633 }

634 if (utf8) reverseword_utf(w2); else reverseword(w2);

635 }

636

637 // look word in hash table

638 for (i = 0; (i < maxdic) && !he; i ++) {

639 he = (pHMgr[i])->lookup(word);

640

641 // check forbidden and onlyincompound words

642 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenwor d(), he->alen)) {

643 if (info) *info += SPELL_FORBIDDEN;

644 // LANG_hu section: set dash information for suggestions

645 if (langnum == LANG_hu) {

646 if (pAMgr->get_compoundflag() &&

647 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {

648 if (info) *info += SPELL_COMPOUND;

649 }

650 }

651 return NULL;

652 }

653

654 // he = next not needaffix, onlyincompound homonym or onlyupcase word

655 while (he && (he->astr) &&

656 ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->al en)) \|\|

657 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompou nd(), he->alen)) \|\|

658 (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he- >alen))

659 )) he = he->next_homonym;

660 }

661

662 // check with affixes

663 if (!he && pAMgr) {

664 // try stripping off affixes */

665 he = pAMgr->affix_check(word, len, 0);

666

667 // check compound restriction and onlyupcase

668 if (he && he->astr && (

669 (pAMgr->get_onlyincompound() &&

670 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) \|\|

671 (info && (*info & SPELL_INITCAP) &&

672 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {

673 he = NULL;

674 }

675

676 if (he) {

677 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword( ), he->alen)) {

678 if (info) *info += SPELL_FORBIDDEN;

679 return NULL;

680 }

681 if (root) {

682 *root = mystrdup(he->word);

683 if (*root && complexprefixes) {

684 if (utf8) reverseword_utf(root); else reverseword(root);

685 }

686 }

687 // try check compound word

688 } else if (pAMgr->get_compound()) {

689 he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);

690 // LANG_hu section: `moving rule' with last dash

691 if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {

692 char * dup = mystrdup(word);

693 if (!dup) return NULL;

694 dup[len-1] = '\0';

695 he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, i nfo);

696 free(dup);

697 }

698 // end of LANG speficic region

699 if (he) {

700 if (root) {

701 *root = mystrdup(he->word);

702 if (*root && complexprefixes) {

703 if (utf8) reverseword_utf(root); else reverseword(root );

704 }

705 }

706 if (info) *info += SPELL_COMPOUND;

707 }

708 }

709

710 }

711

712 return he;

713 }

714

715 int Hunspell::suggest(char*** slst, const char * word)

716 {

717 #ifdef HUNSPELL_CHROME_CLIENT

718 if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();

719 #endif

720 int onlycmpdsug = 0;

721 char cw[MAXWORDUTF8LEN];

722 char wspace[MAXWORDUTF8LEN];

723 if (!pSMgr \|\| maxdic == 0) return 0;

724 w_char unicw[MAXWORDLEN];

725 *slst = NULL;

726 // process XML input of the simplified API (see manual)

727 if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {

728 return spellml(slst, word);

729 }

730 int nc = strlen(word);

731 if (utf8) {

732 if (nc >= MAXWORDUTF8LEN) return 0;

733 } else {

734 if (nc >= MAXWORDLEN) return 0;

735 }

736 int captype = 0;

737 int abbv = 0;

738 int wl = 0;

739

740 // input conversion

741 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

742 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);

743 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

744

745 if (wl == 0) return 0;

746 int ns = 0;

747 int capwords = 0;

748

749 // check capitalized form for FORCEUCASE

750 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {

751 int info = SPELL_ORIGCAP;

752 char ** wlst;

753 if (checkword(cw, &info, NULL)) {

754 if (*slst) {

755 wlst = *slst;

756 } else {

757 wlst = (char *) malloc(MAXSUGGESTION sizeof(char *));

758 if (wlst == NULL) return -1;

759 *slst = wlst;

760 for (int i = 0; i < MAXSUGGESTION; i++) {

761 wlst[i] = NULL;

762 }

763 }

764 wlst[0] = mystrdup(cw);

765 mkinitcap(wlst[0]);

766 return 1;

767 }

768 }

769

770 switch(captype) {

771 case NOCAP: {

772 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

773 break;

774 }

775

776 case INITCAP: {

777 capwords = 1;

778 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

779 if (ns == -1) break;

780 memcpy(wspace,cw,(wl+1));

781 mkallsmall2(wspace, unicw, nc);

782 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

783 break;

784 }

785 case HUHINITCAP:

786 capwords = 1;

787 case HUHCAP: {

788 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

789 if (ns != -1) {

790 int prevns;

791 // something.The -> something. The

792 char * dot = strchr(cw, '.');

793 if (dot && (dot > cw)) {

794 int captype_;

795 if (utf8) {

796 w_char w_[MAXWORDLEN];

797 int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);

798 captype_ = get_captype_utf8(w_, wl_, langnum);

799 } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);

800 if (captype_ == INITCAP) {

801 char * st = mystrdup(cw);

802 if (st) st = (char *) realloc(st, wl + 2);

803 if (st) {

804 st[(dot - cw) + 1] = ' ';

805 strcpy(st + (dot - cw) + 2, dot + 1);

806 ns = insert_sug(slst, st, ns);

807 free(st);

808 }

809 }

810 }

811 if (captype == HUHINITCAP) {

812 // TheOpenOffice.org -> The OpenOffice.org

813 memcpy(wspace,cw,(wl+1));

814 mkinitsmall2(wspace, unicw, nc);

815 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

816 }

817 memcpy(wspace,cw,(wl+1));

818 mkallsmall2(wspace, unicw, nc);

819 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

820 prevns = ns;

821 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

822 if (captype == HUHINITCAP) {

823 mkinitcap2(wspace, unicw, nc);

824 if (spell(wspace)) ns = insert_sug(slst, wspace, ns) ;

825 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

826 }

827 // aNew -> "a New" (instead of "a new")

828 for (int j = prevns; j < ns; j++) {

829 char * space = strchr((*slst)[j],' ');

830 if (space) {

831 int slen = strlen(space + 1);

832 // different case after space (need capitalisati on)

833 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {

834 w_char w[MAXWORDLEN];

835 int wc = 0;

836 char * r = (*slst)[j];

837 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);

838 mkinitcap2(space + 1, w, wc);

839 // set as first suggestion

840 for (int k = j; k > 0; k--) (slst)[k] = (s lst)[k - 1];

841 (*slst)[0] = r;

842 }

843 }

844 }

845 }

846 break;

847 }

848

849 case ALLCAP: {

850 memcpy(wspace, cw, (wl+1));

851 mkallsmall2(wspace, unicw, nc);

852 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

853 if (ns == -1) break;

854 if (pAMgr && pAMgr->get_keepcase() && spell(wspace))

855 ns = insert_sug(slst, wspace, ns);

856 mkinitcap2(wspace, unicw, nc);

857 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

858 for (int j=0; j < ns; j++) {

859 mkallcap((*slst)[j]);

860 if (pAMgr && pAMgr->get_checksharps()) {

861 char * pos;

862 if (utf8) {

863 pos = strstr((*slst)[j], "\xC3\x9F");

864 while (pos) {

865 *pos = 'S';

866 *(pos+1) = 'S';

867 pos = strstr(pos+2, "\xC3\x9F");

868 }

869 } else {

870 pos = strchr((*slst)[j], '\xDF');

871 while (pos) {

872 (slst)[j] = (char ) realloc((slst)[j], st rlen((slst)[j]) + 2);

873 mystrrep((*slst)[j], "\xDF", "SS");

874 pos = strchr((*slst)[j], '\xDF');

875 }

876 }

877 }

878 }

879 break;

880 }

881 }

882

883 // LANG_hu section: replace '-' with ' ' in Hungarian

884 if (langnum == LANG_hu) {

885 for (int j=0; j < ns; j++) {

886 char * pos = strchr((*slst)[j],'-');

887 if (pos) {

888 int info;

889 char w[MAXWORDUTF8LEN];

890 *pos = '\0';

891 strcpy(w, (*slst)[j]);

892 strcat(w, pos + 1);

893 spell(w, &info, NULL);

894 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

895 *pos = ' ';

896 } else *pos = '-';

897 }

898 }

899 }

900 // END OF LANG_hu section

901

902 // try ngram approach since found nothing or only compound words

903 if (pAMgr && (ns == 0 \|\| onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && ( *slst)) {

904 switch(captype) {

905 case NOCAP: {

906 ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);

907 break;

908 }

909 case HUHINITCAP:

910 capwords = 1;

911 case HUHCAP: {

912 memcpy(wspace,cw,(wl+1));

913 mkallsmall2(wspace, unicw, nc);

914 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

915 break;

916 }

917 case INITCAP: {

918 capwords = 1;

919 memcpy(wspace,cw,(wl+1));

920 mkallsmall2(wspace, unicw, nc);

921 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

922 break;

923 }

924 case ALLCAP: {

925 memcpy(wspace,cw,(wl+1));

926 mkallsmall2(wspace, unicw, nc);

927 int oldns = ns;

928 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

929 for (int j = oldns; j < ns; j++)

930 mkallcap((*slst)[j]);

931 break;

932 }

933 }

934 }

935

936 // try dash suggestion (Afo-American -> Afro-American)

937 if (char * pos = strchr(cw, '-')) {

938 char * ppos = cw;

939 int nodashsug = 1;

940 char ** nlst = NULL;

941 int nn = 0;

942 int last = 0;

943 if (*slst) {

944 for (int j = 0; j < ns && nodashsug == 1; j++) {

945 if (strchr((*slst)[j], '-')) nodashsug = 0;

946 }

947 }

948 while (nodashsug && !last) {

949 if (pos == '\0') last = 1; else pos = '\0';

950 if (!spell(ppos)) {

951 nn = suggest(&nlst, ppos);

952 for (int j = nn - 1; j >= 0; j--) {

953 strncpy(wspace, cw, ppos - cw);

954 strcpy(wspace + (ppos - cw), nlst[j]);

955 if (!last) {

956 strcat(wspace, "-");

957 strcat(wspace, pos + 1);

958 }

959 ns = insert_sug(slst, wspace, ns);

960 free(nlst[j]);

961 }

962 if (nlst != NULL) free(nlst);

963 nodashsug = 0;

964 }

965 if (!last) {

966 *pos = '-';

967 ppos = pos + 1;

968 pos = strchr(ppos, '-');

969 }

970 if (!pos) pos = cw + strlen(cw);

971 }

972 }

973

974 // word reversing wrapper for complex prefixes

975 if (complexprefixes) {

976 for (int j = 0; j < ns; j++) {

977 if (utf8) reverseword_utf((slst)[j]); else reverseword((slst)[j]);

978 }

979 }

980

981 // capitalize

982 if (capwords) for (int j=0; j < ns; j++) {

983 mkinitcap((*slst)[j]);

984 }

985

986 // expand suggestions with dot(s)

987 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

988 for (int j = 0; j < ns; j++) {

989 (slst)[j] = (char ) realloc((slst)[j], strlen((slst)[j]) + 1 + abbv);

990 strcat((*slst)[j], word + strlen(word) - abbv);

991 }

992 }

993

994 // remove bad capitalized and forbidden forms

995 if (pAMgr && (pAMgr->get_keepcase() \|\| pAMgr->get_forbiddenword())) {

996 switch (captype) {

997 case INITCAP:

998 case ALLCAP: {

999 int l = 0;

1000 for (int j=0; j < ns; j++) {

1001 if (!strchr((slst)[j],' ') && !spell((slst)[j])) {

1002 char s[MAXSWUTF8L];

1003 w_char w[MAXSWL];

1004 int len;

1005 if (utf8) {

1006 len = u8_u16(w, MAXSWL, (*slst)[j]);

1007 } else {

1008 strcpy(s, (*slst)[j]);

1009 len = strlen(s);

1010 }

1011 mkallsmall2(s, w, len);

1012 free((*slst)[j]);

1013 if (spell(s)) {

1014 (*slst)[l] = mystrdup(s);

1015 if ((*slst)[l]) l++;

1016 } else {

1017 mkinitcap2(s, w, len);

1018 if (spell(s)) {

1019 (*slst)[l] = mystrdup(s);

1020 if ((*slst)[l]) l++;

1021 }

1022 }

1023 } else {

1024 (slst)[l] = (slst)[j];

1025 l++;

1026 }

1027 }

1028 ns = l;

1029 }

1030 }

1031 }

1032

1033 // remove duplications

1034 int l = 0;

1035 for (int j = 0; j < ns; j++) {

1036 (slst)[l] = (slst)[j];

1037 for (int k = 0; k < l; k++) {

1038 if (strcmp((slst)[k], (slst)[j]) == 0) {

1039 free((*slst)[j]);

1040 l--;

1041 break;

1042 }

1043 }

1044 l++;

1045 }

1046 ns = l;

1047

1048 // output conversion

1049 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;

1050 for (int j = 0; rl && j < ns; j++) {

1051 if (rl->conv((*slst)[j], wspace)) {

1052 free((*slst)[j]);

1053 (*slst)[j] = mystrdup(wspace);

1054 }

1055 }

1056

1057 // if suggestions removed by nosuggest, onlyincompound parameters

1058 if (l == 0 && *slst) {

1059 free(*slst);

1060 *slst = NULL;

1061 }

1062 return l;

1063 }

1064

1065 void Hunspell::free_list(char *** slst, int n) {

1066 freelist(slst, n);

1067 }

1068

1069 char * Hunspell::get_dic_encoding()

1070 {

1071 return encoding;

1072 }

1073

1074 #ifdef HUNSPELL_EXPERIMENTAL

1075 // XXX need UTF-8 support

1076 int Hunspell::suggest_auto(char*** slst, const char * word)

1077 {

1078 char cw[MAXWORDUTF8LEN];

1079 char wspace[MAXWORDUTF8LEN];

1080 if (!pSMgr \|\| maxdic == 0) return 0;

1081 int wl = strlen(word);

1082 if (utf8) {

1083 if (wl >= MAXWORDUTF8LEN) return 0;

1084 } else {

1085 if (wl >= MAXWORDLEN) return 0;

1086 }

1087 int captype = 0;

1088 int abbv = 0;

1089 wl = cleanword(cw, word, &captype, &abbv);

1090 if (wl == 0) return 0;

1091 int ns = 0;

1092 *slst = NULL; // HU, nsug in pSMgr->suggest

1093

1094 switch(captype) {

1095 case NOCAP: {

1096 ns = pSMgr->suggest_auto(slst, cw, ns);

1097 if (ns>0) break;

1098 break;

1099 }

1100

1101 case INITCAP: {

1102 memcpy(wspace,cw,(wl+1));

1103 mkallsmall(wspace);

1104 ns = pSMgr->suggest_auto(slst, wspace, ns);

1105 for (int j=0; j < ns; j++)

1106 mkinitcap((*slst)[j]);

1107 ns = pSMgr->suggest_auto(slst, cw, ns);

1108 break;

1109

1110 }

1111

1112 case HUHINITCAP:

1113 case HUHCAP: {

1114 ns = pSMgr->suggest_auto(slst, cw, ns);

1115 if (ns == 0) {

1116 memcpy(wspace,cw,(wl+1));

1117 mkallsmall(wspace);

1118 ns = pSMgr->suggest_auto(slst, wspace, ns);

1119 }

1120 break;

1121 }

1122

1123 case ALLCAP: {

1124 memcpy(wspace,cw,(wl+1));

1125 mkallsmall(wspace);

1126 ns = pSMgr->suggest_auto(slst, wspace, ns);

1127

1128 mkinitcap(wspace);

1129 ns = pSMgr->suggest_auto(slst, wspace, ns);

1130

1131 for (int j=0; j < ns; j++)

1132 mkallcap((*slst)[j]);

1133 break;

1134 }

1135 }

1136

1137 // word reversing wrapper for complex prefixes

1138 if (complexprefixes) {

1139 for (int j = 0; j < ns; j++) {

1140 if (utf8) reverseword_utf((slst)[j]); else reverseword((slst)[j]);

1141 }

1142 }

1143

1144 // expand suggestions with dot(s)

1145 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

1146 for (int j = 0; j < ns; j++) {

1147 (slst)[j] = (char ) realloc((slst)[j], strlen((slst)[j]) + 1 + abbv);

1148 strcat((*slst)[j], word + strlen(word) - abbv);

1149 }

1150 }

1151

1152 // LANG_hu section: replace '-' with ' ' in Hungarian

1153 if (langnum == LANG_hu) {

1154 for (int j=0; j < ns; j++) {

1155 char * pos = strchr((*slst)[j],'-');

1156 if (pos) {

1157 int info;

1158 char w[MAXWORDUTF8LEN];

1159 *pos = '\0';

1160 strcpy(w, (*slst)[j]);

1161 strcat(w, pos + 1);

1162 spell(w, &info, NULL);

1163 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

1164 *pos = ' ';

1165 } else *pos = '-';

1166 }

1167 }

1168 }

1169 // END OF LANG_hu section

1170 return ns;

1171 }

1172 #endif

1173

1174 int Hunspell::stem(char* slst, char desc, int n)

1175 {

1176 char result[MAXLNLEN];

1177 char result2[MAXLNLEN];

1178 *slst = NULL;

1179 if (n == 0) return 0;

1180 *result2 = '\0';

1181 for (int i = 0; i < n; i++) {

1182 *result = '\0';

1183 // add compound word parts (except the last one)

1184 char * s = (char *) desc[i];

1185 char * part = strstr(s, MORPH_PART);

1186 if (part) {

1187 char * nextpart = strstr(part + 1, MORPH_PART);

1188 while (nextpart) {

1189 copy_field(result + strlen(result), part, MORPH_PART);

1190 part = nextpart;

1191 nextpart = strstr(part + 1, MORPH_PART);

1192 }

1193 s = part;

1194 }

1195

1196 char **pl;

1197 char tok[MAXLNLEN];

1198 strcpy(tok, s);

1199 char * alt = strstr(tok, " \| ");

1200 while (alt) {

1201 alt[1] = MSEP_ALT;

1202 alt = strstr(alt, " \| ");

1203 }

1204 int pln = line_tok(tok, &pl, MSEP_ALT);

1205 for (int k = 0; k < pln; k++) {

1206 // add derivational suffixes

1207 if (strstr(pl[k], MORPH_DERI_SFX)) {

1208 // remove inflectional suffixes

1209 char * is = strstr(pl[k], MORPH_INFL_SFX);

1210 if (is) *is = '\0';

1211 char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);

1212 if (sg) {

1213 char ** gen;

1214 int genl = line_tok(sg, &gen, MSEP_REC);

1215 free(sg);

1216 for (int j = 0; j < genl; j++) {

1217 sprintf(result2 + strlen(result2), "%c%s%s",

1218 MSEP_REC, result, gen[j]);

1219 }

1220 freelist(&gen, genl);

1221 }

1222 } else {

1223 sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);

1224 if (strstr(pl[k], MORPH_SURF_PFX)) {

1225 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);

1226 }

1227 copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);

1228 }

1229 }

1230 freelist(&pl, pln);

1231 }

1232 int sln = line_tok(result2, slst, MSEP_REC);

1233 return uniqlist(*slst, sln);

1234

1235 }

1236

1237 int Hunspell::stem(char*** slst, const char * word)

1238 {

1239 char ** pl;

1240 int pln = analyze(&pl, word);

1241 int pln2 = stem(slst, pl, pln);

1242 freelist(&pl, pln);

1243 return pln2;

1244 }

1245

1246 #ifdef HUNSPELL_EXPERIMENTAL

1247 int Hunspell::suggest_pos_stems(char*** slst, const char * word)

1248 {

1249 char cw[MAXWORDUTF8LEN];

1250 char wspace[MAXWORDUTF8LEN];

1251 if (! pSMgr \|\| maxdic == 0) return 0;

1252 int wl = strlen(word);

1253 if (utf8) {

1254 if (wl >= MAXWORDUTF8LEN) return 0;

1255 } else {

1256 if (wl >= MAXWORDLEN) return 0;

1257 }

1258 int captype = 0;

1259 int abbv = 0;

1260 wl = cleanword(cw, word, &captype, &abbv);

1261 if (wl == 0) return 0;

1262

1263 int ns = 0; // ns=0 = normalized input

1264

1265 *slst = NULL; // HU, nsug in pSMgr->suggest

1266

1267 switch(captype) {

1268 case HUHCAP:

1269 case NOCAP: {

1270 ns = pSMgr->suggest_pos_stems(slst, cw, ns);

1271

1272 if ((abbv) && (ns == 0)) {

1273 memcpy(wspace,cw,wl);

1274 *(wspace+wl) = '.';

1275 *(wspace+wl+1) = '\0';

1276 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

1277 }

1278

1279 break;

1280 }

1281

1282 case INITCAP: {

1283

1284 ns = pSMgr->suggest_pos_stems(slst, cw, ns);

1285

1286 if (ns == 0 \|\| ((*slst)[0][0] == '#')) {

1287 memcpy(wspace,cw,(wl+1));

1288 mkallsmall(wspace);

1289 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

1290 }

1291

1292 break;

1293

1294 }

1295

1296 case ALLCAP: {

1297 ns = pSMgr->suggest_pos_stems(slst, cw, ns);

1298 if (ns != 0) break;

1299

1300 memcpy(wspace,cw,(wl+1));

1301 mkallsmall(wspace);

1302 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

1303

1304 if (ns == 0) {

1305 mkinitcap(wspace);

1306 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

1307 }

1308 break;

1309 }

1310 }

1311

1312 return ns;

1313 }

1314 #endif // END OF HUNSPELL_EXPERIMENTAL CODE

1315

1316 const char * Hunspell::get_wordchars()

1317 {

1318 return pAMgr->get_wordchars();

1319 }

1320

1321 unsigned short * Hunspell::get_wordchars_utf16(int * len)

1322 {

1323 return pAMgr->get_wordchars_utf16(len);

1324 }

1325

1326 void Hunspell::mkinitcap(char * p)

1327 {

1328 if (!utf8) {

1329 if (p != '\0') p = csconv[((unsigned char)*p)].cupper;

1330 } else {

1331 int len;

1332 w_char u[MAXWORDLEN];

1333 len = u8_u16(u, MAXWORDLEN, p);

1334 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

1335 u[0].h = (unsigned char) (i >> 8);

1336 u[0].l = (unsigned char) (i & 0x00FF);

1337 u16_u8(p, MAXWORDUTF8LEN, u, len);

1338 }

1339 }

1340

1341 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)

1342 {

1343 if (!utf8) {

1344 if (p != '\0') p = csconv[((unsigned char)*p)].cupper;

1345 } else if (nc > 0) {

1346 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

1347 u[0].h = (unsigned char) (i >> 8);

1348 u[0].l = (unsigned char) (i & 0x00FF);

1349 u16_u8(p, MAXWORDUTF8LEN, u, nc);

1350 return strlen(p);

1351 }

1352 return nc;

1353 }

1354

1355 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)

1356 {

1357 if (!utf8) {

1358 if (p != '\0') p = csconv[((unsigned char)*p)].clower;

1359 } else if (nc > 0) {

1360 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);

1361 u[0].h = (unsigned char) (i >> 8);

1362 u[0].l = (unsigned char) (i & 0x00FF);

1363 u16_u8(p, MAXWORDUTF8LEN, u, nc);

1364 return strlen(p);

1365 }

1366 return nc;

1367 }

1368

1369 int Hunspell::add(const char * word)

1370 {

1371 if (pHMgr[0]) return (pHMgr[0])->add(word);

1372 return 0;

1373 }

1374

1375 int Hunspell::add_with_affix(const char * word, const char * example)

1376 {

1377 if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);

1378 return 0;

1379 }

1380

1381 int Hunspell::remove(const char * word)

1382 {

1383 if (pHMgr[0]) return (pHMgr[0])->remove(word);

1384 return 0;

1385 }

1386

1387 const char * Hunspell::get_version()

1388 {

1389 return pAMgr->get_version();

1390 }

1391

1392 struct cs_info * Hunspell::get_csconv()

1393 {

1394 return csconv;

1395 }

1396

1397 void Hunspell::cat_result(char * result, char * st)

1398 {

1399 if (st) {

1400 if (*result) mystrcat(result, "\n", MAXLNLEN);

1401 mystrcat(result, st, MAXLNLEN);

1402 free(st);

1403 }

1404 }

1405

1406 int Hunspell::analyze(char*** slst, const char * word)

1407 {

1408 char cw[MAXWORDUTF8LEN];

1409 char wspace[MAXWORDUTF8LEN];

1410 w_char unicw[MAXWORDLEN];

1411 int wl2 = 0;

1412 *slst = NULL;

1413 if (! pSMgr \|\| maxdic == 0) return 0;

1414 int nc = strlen(word);

1415 if (utf8) {

1416 if (nc >= MAXWORDUTF8LEN) return 0;

1417 } else {

1418 if (nc >= MAXWORDLEN) return 0;

1419 }

1420 int captype = 0;

1421 int abbv = 0;

1422 int wl = 0;

1423

1424 // input conversion

1425 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

1426 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &cap type, &abbv);

1427 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

1428

1429 if (wl == 0) {

1430 if (abbv) {

1431 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';

1432 cw[wl] = '\0';

1433 abbv = 0;

1434 } else return 0;

1435 }

1436

1437 char result[MAXLNLEN];

1438 char * st = NULL;

1439

1440 *result = '\0';

1441

1442 int n = 0;

1443 int n2 = 0;

1444 int n3 = 0;

1445

1446 // test numbers

1447 // LANG_hu section: set dash information for suggestions

1448 if (langnum == LANG_hu) {

1449 while ((n < wl) &&

1450 (((cw[n] <= '9') && (cw[n] >= '0')) \|\| (((cw[n] == '.') \|\| (cw[n] == ',' )) && (n > 0)))) {

1451 n++;

1452 if ((cw[n] == '.') \|\| (cw[n] == ',')) {

1453 if (((n2 == 0) && (n > 3)) \|\|

1454 ((n2 > 0) && ((cw[n-1] == '.') \|\| (cw[n-1] == ',')))) br eak;

1455 n2++;

1456 n3 = n;

1457 }

1458 }

1459

1460 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;

1461 if ((n == wl) \|\| ((n>0) && ((cw[n]=='%') \|\| (cw[n]=='\xB0')) && checkword(cw+n , NULL, NULL))) {

1462 mystrcat(result, cw, MAXLNLEN);

1463 result[n - 1] = '\0';

1464 if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));

1465 else {

1466 char sign = cw[n];

1467 cw[n] = '\0';

1468 cat_result(result, pSMgr->suggest_morph(cw + n - 1));

1469 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE

1470 cw[n] = sign;

1471 cat_result(result, pSMgr->suggest_morph(cw + n));

1472 }

1473 return line_tok(result, slst, MSEP_REC);

1474 }

1475 }

1476 // END OF LANG_hu section

1477

1478 switch(captype) {

1479 case HUHCAP:

1480 case HUHINITCAP:

1481 case NOCAP: {

1482 cat_result(result, pSMgr->suggest_morph(cw));

1483 if (abbv) {

1484 memcpy(wspace,cw,wl);

1485 *(wspace+wl) = '.';

1486 *(wspace+wl+1) = '\0';

1487 cat_result(result, pSMgr->suggest_morph(wspace));

1488 }

1489 break;

1490 }

1491 case INITCAP: {

1492 wl = mkallsmall2(cw, unicw, nc);

1493 memcpy(wspace,cw,(wl+1));

1494 wl2 = mkinitcap2(cw, unicw, nc);

1495 cat_result(result, pSMgr->suggest_morph(wspace));

1496 cat_result(result, pSMgr->suggest_morph(cw));

1497 if (abbv) {

1498 *(wspace+wl) = '.';

1499 *(wspace+wl+1) = '\0';

1500 cat_result(result, pSMgr->suggest_morph(wspace));

1501

1502 memcpy(wspace, cw, wl2);

1503 *(wspace+wl2) = '.';

1504 *(wspace+wl2+1) = '\0';

1505

1506 cat_result(result, pSMgr->suggest_morph(wspace));

1507 }

1508 break;

1509 }

1510 case ALLCAP: {

1511 cat_result(result, pSMgr->suggest_morph(cw));

1512 if (abbv) {

1513 memcpy(wspace,cw,wl);

1514 *(wspace+wl) = '.';

1515 *(wspace+wl+1) = '\0';

1516 cat_result(result, pSMgr->suggest_morph(cw));

1517 }

1518 wl = mkallsmall2(cw, unicw, nc);

1519 memcpy(wspace,cw,(wl+1));

1520 wl2 = mkinitcap2(cw, unicw, nc);

1521

1522 cat_result(result, pSMgr->suggest_morph(wspace));

1523 cat_result(result, pSMgr->suggest_morph(cw));

1524 if (abbv) {

1525 *(wspace+wl) = '.';

1526 *(wspace+wl+1) = '\0';

1527 cat_result(result, pSMgr->suggest_morph(wspace));

1528

1529 memcpy(wspace, cw, wl2);

1530 *(wspace+wl2) = '.';

1531 *(wspace+wl2+1) = '\0';

1532

1533 cat_result(result, pSMgr->suggest_morph(wspace));

1534 }

1535 break;

1536 }

1537 }

1538

1539 if (*result) {

1540 // word reversing wrapper for complex prefixes

1541 if (complexprefixes) {

1542 if (utf8) reverseword_utf(result); else reverseword(result);

1543 }

1544 return line_tok(result, slst, MSEP_REC);

1545 }

1546

1547 // compound word with dash (HU) I18n

1548 char * dash = NULL;

1549 int nresult = 0;

1550 // LANG_hu section: set dash information for suggestions

1551 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');

1552 if ((langnum == LANG_hu) && dash) {

1553 *dash='\0';

1554 // examine 2 sides of the dash

1555 if (dash[1] == '\0') { // base word ending with dash

1556 if (spell(cw)) {

1557 char * p = pSMgr->suggest_morph(cw);

1558 if (p) {

1559 int ret = line_tok(p, slst, MSEP_REC);

1560 free(p);

1561 return ret;

1562 }

1563

1564 }

1565 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.

1566 if (spell(cw) && (spell("-e"))) {

1567 st = pSMgr->suggest_morph(cw);

1568 if (st) {

1569 mystrcat(result, st, MAXLNLEN);

1570 free(st);

1571 }

1572 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator i n MORPHCODE

1573 st = pSMgr->suggest_morph("-e");

1574 if (st) {

1575 mystrcat(result, st, MAXLNLEN);

1576 free(st);

1577 }

1578 return line_tok(result, slst, MSEP_REC);

1579 }

1580 } else {

1581 // first word ending with dash: word- XXX ???

1582 char r2 = *(dash + 1);

1583 dash[0]='-';

1584 dash[1]='\0';

1585 nresult = spell(cw);

1586 dash[1] = r2;

1587 dash[0]='\0';

1588 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) \|\|

1589 ((dash[1] > '0') && (dash[1] < '9')))) {

1590 st = pSMgr->suggest_morph(cw);

1591 if (st) {

1592 mystrcat(result, st, MAXLNLEN);

1593 free(st);

1594 mystrcat(result,"+", MAXLNLEN); // XXX spec. sep arator in MORPHCODE

1595 }

1596 st = pSMgr->suggest_morph(dash+1);

1597 if (st) {

1598 mystrcat(result, st, MAXLNLEN);

1599 free(st);

1600 }

1601 return line_tok(result, slst, MSEP_REC);

1602 }

1603 }

1604 // affixed number in correct word

1605 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&

1606 ((dash-1)>='0')) \|\| ((dash-1)=='.'))) {

1607 *dash='-';

1608 n = 1;

1609 if (*(dash - n) == '.') n++;

1610 // search first not a number character to left from dash

1611 while (((dash - n)>=cw) && ((*(dash - n)=='0') \|\| (n < 3)) && (n < 6)) {

1612 n++;

1613 }

1614 if ((dash - n) < cw) n--;

1615 // numbers: valami1000000-hoz

1616 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,

1617 // 56-hoz, 6-hoz

1618 for(; n >= 1; n--) {

1619 if (((dash - n) >= '0') && ((dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {

1620 mystrcat(result, cw, MAXLNLEN);

1621 result[dash - cw - n] = '\0';

1622 st = pSMgr->suggest_morph(dash - n);

1623 if (st) {

1624 mystrcat(result, st, MAXLNLEN);

1625 free(st);

1626 }

1627 return line_tok(result, slst, MSEP_REC);

1628 }

1629 }

1630 }

1631 }

1632 return 0;

1633 }

1634

1635 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)

1636 {

1637 *slst = NULL;

1638 if (!pSMgr \|\| !pln) return 0;

1639 char **pl2;

1640 int pl2n = analyze(&pl2, word);

1641 int captype = 0;

1642 int abbv = 0;

1643 char cw[MAXWORDUTF8LEN];

1644 cleanword(cw, word, &captype, &abbv);

1645 char result[MAXLNLEN];

1646 *result = '\0';

1647

1648 for (int i = 0; i < pln; i++) {

1649 cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));

1650 }

1651 freelist(&pl2, pl2n);

1652

1653 if (*result) {

1654 // allcap

1655 if (captype == ALLCAP) mkallcap(result);

1656

1657 // line split

1658 int linenum = line_tok(result, slst, MSEP_REC);

1659

1660 // capitalize

1661 if (captype == INITCAP \|\| captype == HUHINITCAP) {

1662 for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);

1663 }

1664

1665 // temporary filtering of prefix related errors (eg.

1666 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")

1667

1668 int r = 0;

1669 for (int j=0; j < linenum; j++) {

1670 if (!spell((*slst)[j])) {

1671 free((*slst)[j]);

1672 (*slst)[j] = NULL;

1673 } else {

1674 if (r < j) (slst)[r] = (slst)[j];

1675 r++;

1676 }

1677 }

1678 if (r > 0) return r;

1679 free(*slst);

1680 *slst = NULL;

1681 }

1682 return 0;

1683 }

1684

1685 int Hunspell::generate(char*** slst, const char * word, const char * pattern)

1686 {

1687 char **pl;

1688 int pln = analyze(&pl, pattern);

1689 int n = generate(slst, word, pl, pln);

1690 freelist(&pl, pln);

1691 return uniqlist(*slst, n);

1692 }

1693

1694 // minimal XML parser functions

1695 int Hunspell::get_xml_par(char * dest, const char * par, int max)

1696 {

1697 char * d = dest;

1698 if (!par) return 0;

1699 char end = *par;

1700 char * dmax = dest + max;

1701 if (end == '>') end = '<';

1702 else if (end != '\'' && end != '"') return 0; // bad XML

1703 for (par++; d < dmax && par != '\0' && par != end; par++, d++) d = par;

1704 *d = '\0';

1705 mystrrep(dest, "<", "<");

1706 mystrrep(dest, "&", "&");

1707 return (int)(d - dest);

1708 }

1709

1710 int Hunspell::get_langnum() const

1711 {

1712 return langnum;

1713 }

1714

1715 // return the beginning of the element (attr == NULL) or the attribute

1716 const char * Hunspell::get_xml_pos(const char * s, const char * attr)

1717 {

1718 const char * end = strchr(s, '>');

1719 const char * p = s;

1720 if (attr == NULL) return end;

1721 do {

1722 p = strstr(p, attr);

1723 if (!p \|\| p >= end) return 0;

1724 } while ((p-1) != ' ' && (p-1) != '\n');

1725 return p + strlen(attr);

1726 }

1727

1728 int Hunspell::check_xml_par(const char * q, const char * attr, const char * valu e) {

1729 char cw[MAXWORDUTF8LEN];

1730 if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&

1731 strcmp(cw, value) == 0) return 1;

1732 return 0;

1733 }

1734

1735 int Hunspell::get_xml_list(char **slst, char list, const char * tag) {

1736 int n = 0;

1737 char * p;

1738 if (!list) return 0;

1739 for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;

1740 if (n == 0) return 0;

1741 slst = (char ) malloc(sizeof(char ) * n);

1742 if (!*slst) return 0;

1743 for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {

1744 int l = strlen(p);

1745 (slst)[n] = (char ) malloc(l + 1);

1746 if (!(*slst)[n]) return n;

1747 if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {

1748 free((*slst)[n]);

1749 break;

1750 }

1751 }

1752 return n;

1753 }

1754

1755 int Hunspell::spellml(char*** slst, const char * word)

1756 {

1757 char q, q2;

1758 char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];

1759 q = (char *) strstr(word, "<query");

1760 if (!q) return 0; // bad XML input

1761 q2 = strchr(q, '>');

1762 if (!q2) return 0; // bad XML input

1763 q2 = strstr(q2, "<word");

1764 if (!q2) return 0; // bad XML input

1765 if (check_xml_par(q, "type=", "analyze")) {

1766 int n = 0, s = 0;

1767 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(sls t, cw);

1768 if (n == 0) return 0;

1769 // convert the result to <code><a>ana1</a><a>ana2</a></code> format

1770 for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);

1771 char * r = (char ) malloc(6 + 5 s + 7 * n + 7 + 1); // XXX 5*s->&->&amp ;

1772 if (!r) return 0;

1773 strcpy(r, "<code>");

1774 for (int i = 0; i < n; i++) {

1775 int l = strlen(r);

1776 strcpy(r + l, "<a>");

1777 strcpy(r + l + 3, (*slst)[i]);

1778 mystrrep(r + l + 3, "\t", " ");

1779 mystrrep(r + l + 3, "<", "<");

1780 mystrrep(r + l + 3, "&", "&");

1781 strcat(r, "</a>");

1782 free((*slst)[i]);

1783 }

1784 strcat(r, "</code>");

1785 (*slst)[0] = r;

1786 return 1;

1787 } else if (check_xml_par(q, "type=", "stem")) {

1788 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst , cw);

1789 } else if (check_xml_par(q, "type=", "generate")) {

1790 int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);

1791 if (n == 0) return 0;

1792 char * q3 = strstr(q2 + 1, "<word");

1793 if (q3) {

1794 if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {

1795 return generate(slst, cw, cw2);

1796 }

1797 } else {

1798 if ((q2 = strstr(q2 + 1, "<code")) != NULL) {

1799 char ** slst2;

1800 if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {

1801 int n2 = generate(slst, cw, slst2, n);

1802 freelist(&slst2, n);

1803 return uniqlist(*slst, n2);

1804 }

1805 freelist(&slst2, n);

1806 }

1807 }

1808 }

1809 return 0;

1810 }

1811

1812

1813 #ifdef HUNSPELL_EXPERIMENTAL

1814 // XXX need UTF-8 support

1815 char * Hunspell::morph_with_correction(const char * word)

1816 {

1817 char cw[MAXWORDUTF8LEN];

1818 char wspace[MAXWORDUTF8LEN];

1819 if (! pSMgr \|\| maxdic == 0) return NULL;

1820 int wl = strlen(word);

1821 if (utf8) {

1822 if (wl >= MAXWORDUTF8LEN) return NULL;

1823 } else {

1824 if (wl >= MAXWORDLEN) return NULL;

1825 }

1826 int captype = 0;

1827 int abbv = 0;

1828 wl = cleanword(cw, word, &captype, &abbv);

1829 if (wl == 0) return NULL;

1830

1831 char result[MAXLNLEN];

1832 char * st = NULL;

1833

1834 *result = '\0';

1835

1836

1837 switch(captype) {

1838 case NOCAP: {

1839 st = pSMgr->suggest_morph_for_spelling_error(cw);

1840 if (st) {

1841 mystrcat(result, st, MAXLNLEN);

1842 free(st);

1843 }

1844 if (abbv) {

1845 memcpy(wspace,cw,wl);

1846 *(wspace+wl) = '.';

1847 *(wspace+wl+1) = '\0';

1848 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1849 if (st) {

1850 if (*result) mystrcat(result, "\n", MAXLNLEN);

1851 mystrcat(result, st, MAXLNLEN);

1852 free(st);

1853 }

1854 }

1855 break;

1856 }

1857 case INITCAP: {

1858 memcpy(wspace,cw,(wl+1));

1859 mkallsmall(wspace);

1860 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1861 if (st) {

1862 mystrcat(result, st, MAXLNLEN);

1863 free(st);

1864 }

1865 st = pSMgr->suggest_morph_for_spelling_error(cw);

1866 if (st) {

1867 if (*result) mystrcat(result, "\n", MAXLNLEN);

1868 mystrcat(result, st, MAXLNLEN);

1869 free(st);

1870 }

1871 if (abbv) {

1872 memcpy(wspace,cw,wl);

1873 *(wspace+wl) = '.';

1874 *(wspace+wl+1) = '\0';

1875 mkallsmall(wspace);

1876 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1877 if (st) {

1878 if (*result) mystrcat(result, "\n", MAXLNLEN);

1879 mystrcat(result, st, MAXLNLEN);

1880 free(st);

1881 }

1882 mkinitcap(wspace);

1883 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1884 if (st) {

1885 if (*result) mystrcat(result, "\n", MAXLNLEN);

1886 mystrcat(result, st, MAXLNLEN);

1887 free(st);

1888 }

1889 }

1890 break;

1891 }

1892 case HUHCAP: {

1893 st = pSMgr->suggest_morph_for_spelling_error(cw);

1894 if (st) {

1895 mystrcat(result, st, MAXLNLEN);

1896 free(st);

1897 }

1898 memcpy(wspace,cw,(wl+1));

1899 mkallsmall(wspace);

1900 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1901 if (st) {

1902 if (*result) mystrcat(result, "\n", MAXLNLEN);

1903 mystrcat(result, st, MAXLNLEN);

1904 free(st);

1905 }

1906 break;

1907 }

1908 case ALLCAP: {

1909 memcpy(wspace,cw,(wl+1));

1910 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1911 if (st) {

1912 mystrcat(result, st, MAXLNLEN);

1913 free(st);

1914 }

1915 mkallsmall(wspace);

1916 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1917 if (st) {

1918 if (*result) mystrcat(result, "\n", MAXLNLEN);

1919 mystrcat(result, st, MAXLNLEN);

1920 free(st);

1921 }

1922 mkinitcap(wspace);

1923 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1924 if (st) {

1925 if (*result) mystrcat(result, "\n", MAXLNLEN);

1926 mystrcat(result, st, MAXLNLEN);

1927 free(st);

1928 }

1929 if (abbv) {

1930 memcpy(wspace,cw,(wl+1));

1931 *(wspace+wl) = '.';

1932 *(wspace+wl+1) = '\0';

1933 if (*result) mystrcat(result, "\n", MAXLNLEN);

1934 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1935 if (st) {

1936 mystrcat(result, st, MAXLNLEN);

1937 free(st);

1938 }

1939 mkallsmall(wspace);

1940 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1941 if (st) {

1942 if (*result) mystrcat(result, "\n", MAXLNLEN);

1943 mystrcat(result, st, MAXLNLEN);

1944 free(st);

1945 }

1946 mkinitcap(wspace);

1947 st = pSMgr->suggest_morph_for_spelling_error(wspace);

1948 if (st) {

1949 if (*result) mystrcat(result, "\n", MAXLNLEN);

1950 mystrcat(result, st, MAXLNLEN);

1951 free(st);

1952 }

1953 }

1954 break;

1955 }

1956 }

1957

1958 if (*result) return mystrdup(result);

1959 return NULL;

1960 }

1961

1962 #endif // END OF HUNSPELL_EXPERIMENTAL CODE

1963

1964 Hunhandle Hunspell_create(const char affpath, const char * dpath)

1965 {

1966 #ifdef HUNSPELL_CHROME_CLIENT

1967 return NULL;

1968 #else

1969 return (Hunhandle*)(new Hunspell(affpath, dpath));

1970 #endif

1971 }

1972

1973 Hunhandle Hunspell_create_key(const char affpath, const char * dpath,

1974 const char * key)

1975 {

1976 #ifdef HUNSPELL_CHROME_CLIENT

1977 return NULL;

1978 #else

1979 return (Hunhandle*)(new Hunspell(affpath, dpath, key));

1980 #endif

1981 }

1982

1983 void Hunspell_destroy(Hunhandle *pHunspell)

1984 {

1985 delete (Hunspell*)(pHunspell);

1986 }

1987

1988 int Hunspell_spell(Hunhandle pHunspell, const char word)

1989 {

1990 return ((Hunspell*)pHunspell)->spell(word);

1991 }

1992

1993 char Hunspell_get_dic_encoding(Hunhandle pHunspell)

1994 {

1995 return ((Hunspell*)pHunspell)->get_dic_encoding();

1996 }

1997

1998 int Hunspell_suggest(Hunhandle pHunspell, char** slst, const char * word)

1999 {

2000 return ((Hunspell*)pHunspell)->suggest(slst, word);

2001 }

2002

2003 int Hunspell_analyze(Hunhandle pHunspell, char** slst, const char * word)

2004 {

2005 return ((Hunspell*)pHunspell)->analyze(slst, word);

2006 }

2007

2008 int Hunspell_stem(Hunhandle pHunspell, char** slst, const char * word)

2009 {

2010 return ((Hunspell*)pHunspell)->stem(slst, word);

2011 }

2012

2013 int Hunspell_stem2(Hunhandle pHunspell, char slst, char desc, int n)

2014 {

2015 return ((Hunspell*)pHunspell)->stem(slst, desc, n);

2016 }

2017

2018 int Hunspell_generate(Hunhandle pHunspell, char** slst, const char * word,

2019 const char * word2)

2020 {

2021 return ((Hunspell*)pHunspell)->generate(slst, word, word2);

2022 }

2023

2024 int Hunspell_generate2(Hunhandle pHunspell, char** slst, const char * word,

2025 char** desc, int n)

2026 {

2027 return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);

2028 }

2029

2030 /* functions for run-time modification of the dictionary */

2031

2032 /* add word to the run-time dictionary */

2033

2034 int Hunspell_add(Hunhandle pHunspell, const char word) {

2035 return ((Hunspell*)pHunspell)->add(word);

2036 }

2037

2038 /* add word to the run-time dictionary with affix flags of

2039 * the example (a dictionary word): Hunspell will recognize

2040 * affixed forms of the new word, too.

2041 */

2042

2043 int Hunspell_add_with_affix(Hunhandle pHunspell, const char word,

2044 const char * example) {

2045 return ((Hunspell*)pHunspell)->add_with_affix(word, example);

2046 }

2047

2048 /* remove word from the run-time dictionary */

2049

2050 int Hunspell_remove(Hunhandle pHunspell, const char word) {

2051 return ((Hunspell*)pHunspell)->remove(word);

2052 }

2053

2054 void Hunspell_free_list(Hunhandle , char ** slst, int n) {

2055 freelist(slst, n);

2056 }

OLD	NEW

« no previous file with comments | « third_party/hunspell_new/src/hunspell/hunspell.hxx ('k') | third_party/hunspell_new/src/hunspell/hunspell.dsp » ('j') | no next file with comments »