Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: third_party/hunspell/google.patch

Issue 2239005: Merges our hunspell change to hunspell 1.2.10.... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/deps/
Patch Set: '' Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/hunspell/README.chromium ('k') | third_party/hunspell/hunspell.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 Index: README.chromium
2 ===================================================================
3 --- README.chromium (revision 48261)
4 +++ README.chromium (working copy)
5 @@ -1,29 +1,15 @@
6 -This is a partial copy of Hunspell 1.1.5, with the following changes:
7 -* '#include "config.h"' removed from src/hunspell/hunspell.hxx
8 -* '#include "config.h"' removed from src/hunspell/license.hunspell
9 -* Two unreferenced local variables removed from src/hunspell/suggestmgr.cxx
10 -* src/hunspell/utf_info.cxx moved to src/hunspell/utf_info.hxx, and #include
11 - reference in src/hunspell/csutil.cxx changed accordingly
12 -* Change the input params of the constructors to receive a FILE* instead of
13 - a file path. This is required to use hunspell in the sandbox.
14 - The patch is in google.patch.
15 +This is a partial copy of Hunspell 1.2.10 with the following changes:
16 +* Remove '#include "config.h"' from src/hunspell/hunspell.hxx
17 +* Remove '#include "config.h"' from src/hunspell/license.hunspell
18 +* Change src/hunspell/filemgr.hxx and src/hunspell/filemgr.cxx to use
19 + LineIterator.
20 +* Add ScropedHashEntry, which creates temporary hentry objects, to
21 + src/hunspell/suggestmgr.cxx
22 +* Change the input params of the constructors to receive a BDICTReader instead
23 + of a file path.
24 +The patch is in google.patch.
25
26 -The English dictionary distributed by Firefox has been checked in to the
27 -dictionaries directory. It has several additions over the default
28 -myspell/hunspell dictionary.
29 -
30 -* Workaround for non-ASCII characters
31 -
32 -Visual Studio on Japanese Windows assumes the source files to be
33 -encoded in Shift_JIS. The compiler is unhappy with non-ASCII letters
34 -in the source files of Hunspell. The same problem happens with other
35 -CJK Windows as well. Here is the workaround for this problem:
36 -
37 -Convert 8-bit bytes to hexadecimal escaped forms by
38 -
39 - % perl -i -De 's/([\x80-\xff])/sprintf("\\x%02x", $1)/ge' src/*.cxx
40 -
41 -
42 -Note that Hunspell upstream is going to fix this problem. We'll no
43 -longer need the workaround if the problem is fixed in the upstream.
44 -
45 +All dictionaries used by Chromium has been checked in to the
46 +'third_party/hunspell_dictionaries' directory. They have several additions over
47 +the default myspell/hunspell dictionaries.
48 +(See 'third_party/hunspell_dictionaries/README.chromium' for their details.)
49 Index: src/hunspell/filemgr.cxx
50 ===================================================================
51 --- src/hunspell/filemgr.cxx (revision 48261)
52 +++ src/hunspell/filemgr.cxx (working copy)
53 @@ -7,6 +7,32 @@
54
55 #include "filemgr.hxx"
56
57 +#ifdef HUNSPELL_CHROME_CLIENT
58 +#include "third_party/hunspell/google/bdict_reader.h"
59 +
60 +FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) {
61 +}
62 +
63 +FileMgr::~FileMgr() {
64 +}
65 +
66 +char * FileMgr::getline() {
67 + // Read one line from a BDICT file and store the line to our line buffer.
68 + // To emulate the original FileMgr::getline(), this function returns
69 + // the pointer to our line buffer if we can read a line without errors.
70 + // Otherwise, this function returns NULL.
71 + bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1);
72 + return result ? line_ : NULL;
73 +}
74 +
75 +int FileMgr::getlinenum() {
76 + // This function is used only for displaying a line number that causes a
77 + // parser error. For a BDICT file, providing a line number doesn't help
78 + // identifying the place where causes a parser error so much since it is a
79 + // binary file. So, we just return 0.
80 + return 0;
81 +}
82 +#else
83 int FileMgr::fail(const char * err, const char * par) {
84 fprintf(stderr, err, par);
85 return -1;
86 @@ -47,3 +73,4 @@
87 int FileMgr::getlinenum() {
88 return linenum;
89 }
90 +#endif
91 Index: src/hunspell/suggestmgr.cxx
92 ===================================================================
93 --- src/hunspell/suggestmgr.cxx (revision 48261)
94 +++ src/hunspell/suggestmgr.cxx (working copy)
95 @@ -12,6 +12,99 @@
96
97 const w_char W_VLINE = { '\0', '|' };
98
99 +#ifdef HUNSPELL_CHROME_CLIENT
100 +namespace {
101 +// A simmple class which creates temporary hentry objects which can be
102 +// available only in a scope. To conceal memory operations from SuggestMgr
103 +// functions, this object automatically deletes all hentry objects created
104 +// through CreateScopedHashEntry() calls in its destructor.
105 +// So, the following snippet raises a memory error.
106 +//
107 +// hentry* bad_copy = NULL;
108 +// {
109 +// ScopedHashEntryFactory factory;
110 +// hentry* scoped_copy = factory.CreateHashEntry(source);
111 +// ...
112 +// bad_copy = scoped_copy;
113 +// }
114 +// if (bad_copy->word[0]) // memory for scoped_copy has been deleted!
115 +//
116 +// As listed in the above snippet, it is simple to use this class.
117 +// 1. Declare an instance of this ScopedHashEntryFactory, and;
118 +// 2. Call its CreateHashEntry() member instead of using 'new hentry' or
119 +// 'operator='.
120 +//
121 +// TODO(hbono): this implementation is slower than the previous one of brettw.
122 +// We need to improve it?
123 +//
124 +class ScopedHashEntryFactory {
125 + public:
126 + ScopedHashEntryFactory();
127 + ~ScopedHashEntryFactory();
128 +
129 + // Creates a temporary copy of the given hentry struct.
130 + // The returned copy is available only while this object is available.
131 + // NOTE: this function just calls memcpy() in creating a copy of the given
132 + // hentry struct, i.e. it does NOT copy objects referred by pointers of the
133 + // given hentry struct.
134 + hentry* CreateScopedHashEntry(int index, const hentry* source);
135 +
136 + private:
137 + // A struct which encapsulate the new hentry struct used by hunspell 1.2.8.
138 + // The hentry struct used by hunspell 1.2.8 becomes a variable-length struct,
139 + // i.e. it uses its 'word[1]' array member as a variable-length array.
140 + // C/C++ doesn't check boundaries of a char array. For example, for a char
141 + // array 'char word[1]', we cannot only access 'word[0]' but also access
142 + // 'word[1]', 'word[2]', etc.
143 + // To handle this new hentry struct, this we define a struct which combines
144 + // three values (an hentry struct 'hentry', a char array 'word[kMaxWordLen]',
145 + // and an unsigned short value 'astr') so that a HashEntryItem 'hash_item'
146 + // satisfies the following equations:
147 + // hash_item.entry->word[1] == hash_item->word[0].
148 + // hash_item.entry->word[2] == hash_item->word[1].
149 + // ...
150 + // hash_item.entry->word[n] == hash_item->word[n-1].
151 + // ...
152 + // hash_item.entry->word[kMaxWordLen] == hash_item->word[kMaxWordLen-1].
153 + enum {
154 + kMaxWordLen = 128,
155 + };
156 + struct HashEntryItem {
157 + hentry entry;
158 + char word[kMaxWordLen];
159 + unsigned short astr;
160 + };
161 +
162 + HashEntryItem hash_items_[MAX_ROOTS];
163 +};
164 +
165 +ScopedHashEntryFactory::ScopedHashEntryFactory() {
166 + memset(&hash_items_[0], 0, sizeof(hash_items_));
167 +}
168 +
169 +ScopedHashEntryFactory::~ScopedHashEntryFactory() {
170 +}
171 +
172 +hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index,
173 + const hentry* source) {
174 + if (index >= MAX_ROOTS || source->blen >= kMaxWordLen)
175 + return NULL;
176 +
177 + // Retrieve a HashEntryItem struct from our spool, initialize it, and
178 + // returns the address of its 'hentry' member.
179 + size_t source_size = sizeof(hentry) + source->blen + 1;
180 + HashEntryItem* hash_item = &hash_items_[index];
181 + memcpy(&hash_item->entry, source, source_size);
182 + if (source->astr) {
183 + hash_item->astr = *source->astr;
184 + hash_item->entry.astr = &hash_item->astr;
185 + }
186 + return &hash_item->entry;
187 +}
188 +
189 +} // namespace
190 +#endif
191 +
192 SuggestMgr::SuggestMgr(const char * tryme, int maxn,
193 AffixMgr * aptr)
194 {
195 @@ -1029,6 +1122,11 @@
196
197 struct hentry* hp = NULL;
198 int col = -1;
199 +
200 +#ifdef HUNSPELL_CHROME_CLIENT
201 + ScopedHashEntryFactory hash_entry_factory;
202 +#endif
203 +
204 phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
205 char target[MAXSWUTF8L];
206 char candidate[MAXSWUTF8L];
207 @@ -1066,7 +1164,11 @@
208
209 if (sc > scores[lp]) {
210 scores[lp] = sc;
211 +#ifdef HUNSPELL_CHROME_CLIENT
212 + roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp);
213 +#else
214 roots[lp] = hp;
215 +#endif
216 lval = sc;
217 for (j=0; j < MAX_ROOTS; j++)
218 if (scores[j] < lval) {
219 Index: src/hunspell/replist.hxx
220 ===================================================================
221 --- src/hunspell/replist.hxx (revision 48261)
222 +++ src/hunspell/replist.hxx (working copy)
223 @@ -2,6 +2,12 @@
224 #ifndef _REPLIST_HXX_
225 #define _REPLIST_HXX_
226
227 +#ifdef HUNSPELL_CHROME_CLIENT
228 +// Compilation issues in spellchecker.cc think near is a macro, therefore
229 +// removing it here solves that problem.
230 +#undef near
231 +#endif
232 +
233 #include "hunvisapi.h"
234
235 #include "w_char.hxx"
236 Index: src/hunspell/filemgr.hxx
237 ===================================================================
238 --- src/hunspell/filemgr.hxx (revision 48261)
239 +++ src/hunspell/filemgr.hxx (working copy)
240 @@ -7,6 +7,30 @@
241 #include "hunzip.hxx"
242 #include <stdio.h>
243
244 +#ifdef HUNSPELL_CHROME_CLIENT
245 +namespace hunspell {
246 +class LineIterator;
247 +} // namespace hunspell
248 +
249 +// A class which encapsulates operations of reading a BDICT file.
250 +// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is
251 +// a binary file converted from a DIC file and an AFF file. (See
252 +// "bdict_reader.h" for its format.)
253 +// This class encapsulates the operations of reading a BDICT file and emulates
254 +// the original FileMgr operations for AffixMgr so that it can read a BDICT
255 +// file without so many changes.
256 +class FileMgr {
257 + public:
258 + FileMgr(hunspell::LineIterator* iterator);
259 + ~FileMgr();
260 + char* getline();
261 + int getlinenum();
262 +
263 + protected:
264 + hunspell::LineIterator* iterator_;
265 + char line_[BUFSIZE + 50]; // input buffer
266 +};
267 +#else
268 class LIBHUNSPELL_DLL_EXPORTED FileMgr
269 {
270 protected:
271 @@ -23,3 +47,4 @@
272 int getlinenum();
273 };
274 #endif
275 +#endif
1 Index: src/hunspell/affixmgr.cxx 276 Index: src/hunspell/affixmgr.cxx
2 =================================================================== 277 ===================================================================
3 --- src/hunspell/affixmgr.cxx» (revision 3811) 278 --- src/hunspell/affixmgr.cxx» (revision 48261)
4 +++ src/hunspell/affixmgr.cxx (working copy) 279 +++ src/hunspell/affixmgr.cxx (working copy)
5 @@ -25,7 +27,7 @@ 280 @@ -14,8 +14,14 @@
6 #endif 281
7 #endif 282 #include "csutil.hxx"
8 283
9 -AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) 284 +#ifdef HUNSPELL_CHROME_CLIENT
10 +AffixMgr::AffixMgr(FILE* aff_handle, HashMgr* ptr) 285 +AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md)
11 { 286 +{
287 + bdict_reader = reader;
288 +#else
289 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
290 {
291 +#endif
12 // register hash manager and load affix data from aff file 292 // register hash manager and load affix data from aff file
13 pHMgr = ptr; 293 pHMgr = ptr[0];
14 @@ -104,8 +106,8 @@ 294 alldic = ptr;
295 @@ -99,9 +105,17 @@
296 sFlag[i] = NULL;
297 }
298
299 +#ifdef HUNSPELL_CHROME_CLIENT
300 + // Define dummy parameters for parse_file() to avoid changing the parameters
301 + // of parse_file(). This may make it easier to merge the changes of the
302 + // original hunspell.
303 + const char* affpath = NULL;
304 + const char* key = NULL;
305 +#else
306 for (int j=0; j < CONTSIZE; j++) {
15 contclasses[j] = 0; 307 contclasses[j] = 0;
16 } 308 }
17 309 +#endif
18 - if (parse_file(affpath)) { 310
19 - HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); 311 if (parse_file(affpath, key)) {
20 + if (parse_file(aff_handle)) { 312 HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
21 + HUNSPELL_WARNING(stderr, "Failure loading aff file\n"); 313 @@ -252,6 +266,43 @@
22 wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM "); 314 char * line; // io buffers
23 } 315 char ft; // affix type
24 316
25 @@ -232,7 +234,7 @@ 317 +#ifdef HUNSPELL_CHROME_CLIENT
26 318 + // open the affix file
27 319 + // We're always UTF-8
28 // read in aff file and build up prefix and suffix entry objects 320 + utf8 = 1;
29 -int AffixMgr::parse_file(const char * affpath) 321 +
30 +int AffixMgr::parse_file(FILE* aff_handle) 322 + // A BDICT file stores PFX and SFX lines in a special section and it provides
31 { 323 + // a special line iterator for reading PFX and SFX lines.
32 324 + // We create a FileMgr object from this iterator and parse PFX and SFX lines
33 // io buffers 325 + // before parsing other lines.
34 @@ -250,11 +252,12 @@ 326 + hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator();
35 327 + FileMgr* iterator = new FileMgr(&affix_iterator);
36 // open the affix file 328 + if (!iterator) {
37 FILE * afflst; 329 + HUNSPELL_WARNING(stderr,
38 - afflst = fopen(affpath,"r"); 330 + "error: could not create a FileMgr from an affix line iterator.\n");
39 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); 331 + return 1;
40 if (!afflst) { 332 + }
41 - HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n ",affpath); 333 +
42 + HUNSPELL_WARNING(stderr, "error: could not open affix description file\n"); 334 + while (line = iterator->getline()) {
335 + ft = ' ';
336 + if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
337 + if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
338 + if (ft != ' ')
339 + parse_affix(line, ft, iterator, NULL);
340 + }
341 + delete iterator;
342 +
343 + // Create a FileMgr object for reading lines except PFX and SFX lines.
344 + // We don't need to change the loop below since our FileMgr emulates the
345 + // original one.
346 + hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator();
347 + FileMgr * afflst = new FileMgr(&other_iterator);
348 + if (!afflst) {
349 + HUNSPELL_WARNING(stderr,
350 + "error: could not create a FileMgr from an other line iterator.\n");
351 + return 1;
352 + }
353 +#else
354 // checking flag duplication
355 char dupflags[CONTSIZE];
356 char dupflags_ini = 1;
357 @@ -265,6 +316,7 @@
358 HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n ",affpath);
43 return 1; 359 return 1;
44 } 360 }
45 + fseek(afflst, 0, SEEK_SET); 361 +#endif
46 362
47 // step one is to parse the affix file building up the internal 363 // step one is to parse the affix file building up the internal
48 // affix data structures 364 // affix data structures
365 @@ -274,6 +326,7 @@
366 while ((line = afflst->getline())) {
367 mychomp(line);
368
369 +#ifndef HUNSPELL_CHROME_CLIENT
370 /* remove byte order mark */
371 if (firstline) {
372 firstline = 0;
373 @@ -282,6 +335,7 @@
374 memmove(line, line+3, strlen(line+3)+1);
375 }
376 }
377 +#endif
378
379 /* parse in the keyboard string */
380 if (strncmp(line,"KEY",3) == 0) {
381 @@ -517,6 +571,7 @@
382 }
383 }
384
385 +#ifndef HUNSPELL_CHROME_CLIENT
386 /* parse in the typical fault correcting table */
387 if (strncmp(line,"REP",3) == 0) {
388 if (parse_reptable(line, afflst)) {
389 @@ -524,6 +579,7 @@
390 return 1;
391 }
392 }
393 +#endif
394
395 /* parse in the input conversion table */
396 if (strncmp(line,"ICONV",5) == 0) {
397 @@ -634,6 +690,7 @@
398 checksharps=1;
399 }
400
401 +#ifndef HUNSPELL_CHROME_CLIENT // Chrome handled affixes above.
402 /* parse this affix: P - prefix, S - suffix */
403 ft = ' ';
404 if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
405 @@ -650,6 +707,7 @@
406 return 1;
407 }
408 }
409 +#endif
410
411 }
412 delete afflst;
413 @@ -1247,6 +1305,26 @@
414 const char * r;
415 int lenr, lenp;
416
417 +#ifdef HUNSPELL_CHROME_CLIENT
418 + const char *pattern, *pattern2;
419 + hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator ();
420 + while (iterator.GetNext(&pattern, &pattern2)) {
421 + r = word;
422 + lenr = strlen(pattern2);
423 + lenp = strlen(pattern);
424 +
425 + // search every occurence of the pattern in the word
426 + while ((r=strstr(r, pattern)) != NULL) {
427 + strcpy(candidate, word);
428 + if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
429 + strcpy(candidate+(r-word), pattern2);
430 + strcpy(candidate+(r-word)+lenr, r+lenp);
431 + if (candidate_check(candidate,strlen(candidate))) return 1;
432 + r++; // search for the next letter
433 + }
434 + }
435 +
436 +#else
437 if ((wl < 2) || !numrep) return 0;
438
439 for (int i=0; i < numrep; i++ ) {
440 @@ -1263,6 +1341,7 @@
441 r++; // search for the next letter
442 }
443 }
444 +#endif
445 return 0;
446 }
447
448 @@ -3332,6 +3411,7 @@
449 return 0;
450 }
451
452 +#ifndef HUNSPELL_CHROME_CLIENT
453 /* parse in the typical fault correcting table */
454 int AffixMgr::parse_reptable(char * line, FileMgr * af)
455 {
456 @@ -3407,6 +3487,7 @@
457 }
458 return 0;
459 }
460 +#endif
461
462 /* parse in the typical fault correcting table */
463 int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
464 @@ -4010,6 +4091,7 @@
465 case 1: {
466 np++;
467 aflag = pHMgr->decode_flag(piece);
468 +#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.
469 if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
470 ((at == 'P') && (dupflags[aflag] & dupPFX))) {
471 HUNSPELL_WARNING(stderr, "error: line %d: multiple defi nitions of an affix flag\n",
472 @@ -4017,6 +4099,7 @@
473 // return 1; XXX permissive mode for bad dictionaries
474 }
475 dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
476 +#endif
477 break;
478 }
479 // piece 3 - is cross product indicator
49 Index: src/hunspell/affixmgr.hxx 480 Index: src/hunspell/affixmgr.hxx
50 =================================================================== 481 ===================================================================
51 --- src/hunspell/affixmgr.hxx» (revision 3811) 482 --- src/hunspell/affixmgr.hxx» (revision 48261)
52 +++ src/hunspell/affixmgr.hxx (working copy) 483 +++ src/hunspell/affixmgr.hxx (working copy)
53 @@ -93,7 +93,7 @@ 484 @@ -18,6 +18,40 @@
54 485 class PfxEntry;
486 class SfxEntry;
487
488 +#ifdef HUNSPELL_CHROME_CLIENT
489 +
490 +#include <vector>
491 +
492 +// This class provides an implementation of the contclasses array in AffixMgr
493 +// that is normally a large static array. We should almost never need more than
494 +// 256 elements, so this class only allocates that much to start off with. If
495 +// elements higher than that are actually used, we'll automatically expand.
496 +class ContClasses {
497 + public:
498 + ContClasses() {
499 + // Pre-allocate a buffer so that typically, we'll never have to resize.
500 + EnsureSizeIs(256);
501 + }
502 +
503 + char& operator[](size_t index) {
504 + EnsureSizeIs(index + 1);
505 + return data[index];
506 + }
507 +
508 + void EnsureSizeIs(size_t new_size) {
509 + if (data.size() >= new_size)
510 + return; // Nothing to do.
511 +
512 + size_t old_size = data.size();
513 + data.resize(new_size);
514 + memset(&data[old_size], 0, new_size - old_size);
515 + }
516 +
517 + std::vector<char> data;
518 +};
519 +
520 +#endif // HUNSPELL_CHROME_CLIENT
521 +
522 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
523 {
524
525 @@ -98,12 +132,20 @@
526 int fullstrip;
527
528 int havecontclass; // boolean variable
529 +#ifdef HUNSPELL_CHROME_CLIENT
530 + ContClasses contclasses;
531 +#else
532 char contclasses[CONTSIZE]; // flags of possible continuing cl asses (twofold affix)
533 +#endif
534
55 public: 535 public:
56 536
57 - AffixMgr(const char * affpath, HashMgr * ptr); 537 +#ifdef HUNSPELL_CHROME_CLIENT
58 + AffixMgr(FILE* aff_handle, HashMgr * ptr); 538 + AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md);
539 +#else
540 AffixMgr(const char * affpath, HashMgr** ptr, int * md,
541 const char * key = NULL);
542 +#endif
59 ~AffixMgr(); 543 ~AffixMgr();
60 struct hentry * affix_check(const char * word, int len, 544 struct hentry * affix_check(const char * word, int len,
61 const unsigned short needflag = (unsigned short) 0, char in_compoun d = IN_CPD_NOT); 545 const unsigned short needflag = (unsigned short) 0,
62 @@ -179,7 +179,7 @@ 546 @@ -202,6 +244,10 @@
63 int get_checksharps(void); 547 int get_fullstrip() const;
64 548
65 private: 549 private:
66 - int parse_file(const char * affpath); 550 +#ifdef HUNSPELL_CHROME_CLIENT
67 + int parse_file(FILE* aff_handle); 551 + // Not owned by us, owned by the Hunspell object.
68 // int parse_string(char * line, char ** out, const char * name); 552 + hunspell::BDictReader* bdict_reader;
69 int parse_flag(char * line, unsigned short * out, const char * name); 553 +#endif
70 int parse_num(char * line, int * out, const char * name); 554 int parse_file(const char * affpath, const char * key);
555 int parse_flag(char * line, unsigned short * out, FileMgr * af);
556 int parse_num(char * line, int * out, FileMgr * af);
557 Index: src/hunspell/htypes.hxx
558 ===================================================================
559 --- src/hunspell/htypes.hxx» (revision 48261)
560 +++ src/hunspell/htypes.hxx» (working copy)
561 @@ -1,6 +1,16 @@
562 #ifndef _HTYPES_HXX_
563 #define _HTYPES_HXX_
564
565 +#ifdef HUNSPELL_CHROME_CLIENT
566 +// This is a workaround for preventing errors in parsing Turkish BDICs, which
567 +// contain very long AF lines (~ 12,000 chars).
568 +// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse
569 +// longer lines than MAXDELEN.
570 +#define MAXDELEN (8192 * 2)
571 +#else
572 +#define MAXDELEN 8192
573 +#endif // HUNSPELL_CHROME_CLIENT
574 +
575 #define ROTATE_LEN 5
576
577 #define ROTATE(v,q) \
71 Index: src/hunspell/hashmgr.cxx 578 Index: src/hunspell/hashmgr.cxx
72 =================================================================== 579 ===================================================================
73 --- src/hunspell/hashmgr.cxx» (revision 3811) 580 --- src/hunspell/hashmgr.cxx» (revision 48261)
74 +++ src/hunspell/hashmgr.cxx (working copy) 581 +++ src/hunspell/hashmgr.cxx (working copy)
75 @@ -29,7 +31,7 @@ 582 @@ -12,8 +12,14 @@
76 583
77 // build a hash table from a munched word list 584 // build a hash table from a munched word list
78 585
79 -HashMgr::HashMgr(const char * tpath, const char * apath) 586 +#ifdef HUNSPELL_CHROME_CLIENT
80 +HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) 587 +HashMgr::HashMgr(hunspell::BDictReader* reader)
81 { 588 +{
589 + bdict_reader = reader;
590 +#else
591 HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
592 {
593 +#endif
82 tablesize = 0; 594 tablesize = 0;
83 tableptr = NULL; 595 tableptr = NULL;
84 @@ -43,8 +45,8 @@ 596 flag_mode = FLAG_CHAR;
85 aliasf = NULL; 597 @@ -31,8 +37,14 @@
86 numaliasm = 0; 598 numaliasm = 0;
87 aliasm = NULL; 599 aliasm = NULL;
88 - load_config(apath); 600 forbiddenword = FORBIDDENWORD; // forbidden word signing flag
89 - int ec = load_tables(tpath); 601 +#ifdef HUNSPELL_CHROME_CLIENT
90 + load_config(aff_handle); 602 + // No tables to load, just the AF lines.
91 + int ec = load_tables(dic_handle); 603 + load_config(NULL, NULL);
604 + int ec = LoadAFLines();
605 +#else
606 load_config(apath, key);
607 int ec = load_tables(tpath, key);
608 +#endif
92 if (ec) { 609 if (ec) {
93 /* error condition - what should we do here */ 610 /* error condition - what should we do here */
94 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); 611 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
95 @@ -240,7 +242,7 @@ 612 @@ -91,15 +103,59 @@
613 if (ignorechars) free(ignorechars);
614 if (ignorechars_utf16) free(ignorechars_utf16);
615
616 +#ifdef HUNSPELL_CHROME_CLIENT
617 + EmptyHentryCache();
618 + for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
619 + it != pointer_to_strings_.end(); ++it) {
620 + delete *it;
621 + }
622 +#endif
623 +
624 #ifdef MOZILLA_CLIENT
625 delete [] csconv;
626 #endif
627 }
628
629 +#ifdef HUNSPELL_CHROME_CLIENT
630 +void HashMgr::EmptyHentryCache() {
631 + // We need to delete each cache entry, and each additional one in the linked
632 + // list of homonyms.
633 + for (HEntryCache::iterator i = hentry_cache.begin();
634 + i != hentry_cache.end(); ++i) {
635 + hentry* cur = i->second;
636 + while (cur) {
637 + hentry* next = cur->next_homonym;
638 + DeleteHashEntry(cur);
639 + cur = next;
640 + }
641 + }
642 + hentry_cache.clear();
643 +}
644 +#endif
645 +
646 // lookup a root word in the hashtable
647
648 struct hentry * HashMgr::lookup(const char *word) const
649 {
650 +#ifdef HUNSPELL_CHROME_CLIENT
651 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
652 + int affix_count = bdict_reader->FindWord(word, affix_ids);
653 + if (affix_count == 0) { // look for custom added word
654 + std::map<base::StringPiece, int>::const_iterator iter =
655 + custom_word_to_affix_id_map_.find(word);
656 + if (iter != custom_word_to_affix_id_map_.end()) {
657 + affix_count = 1;
658 + affix_ids[0] = iter->second;
659 + }
660 + }
661 +
662 + static const int kMaxWordLen = 128;
663 + static char word_buf[kMaxWordLen];
664 + // To take account of null-termination, we use upto 127.
665 + strncpy(word_buf, word, kMaxWordLen - 1);
666 +
667 + return AffixIDsToHentry(word_buf, affix_ids, affix_count);
668 +#else
669 struct hentry * dp;
670 if (tableptr) {
671 dp = tableptr[hash(word)];
672 @@ -109,12 +165,14 @@
673 }
674 }
675 return NULL;
676 +#endif
677 }
678
679 // add a word to the hash table (private)
680 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff ,
681 int al, const char * desc, bool onlyupcase)
682 {
683 +#ifndef HUNSPELL_CHROME_CLIENT
684 bool upcasehomonym = false;
685 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
686 // variable-length hash record with word and optional fields
687 @@ -206,6 +264,17 @@
688 » if (hp->astr) free(hp->astr);
689 » free(hp);
690 }
691 +#else
692 + std::map<base::StringPiece, int>::iterator iter =
693 + custom_word_to_affix_id_map_.find(word);
694 + if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
695 + std::string* new_string_word = new std::string(word);
696 + pointer_to_strings_.push_back(new_string_word);
697 + base::StringPiece sp(*(new_string_word));
698 + custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
699 + return 1;
700 + }
701 +#endif
702 return 0;
703 }
704
705 @@ -339,6 +408,43 @@
706 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
707 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
708 {
709 +#ifdef HUNSPELL_CHROME_CLIENT
710 + // Return NULL if dictionary is not valid.
711 + if (!bdict_reader->IsValid())
712 + return NULL;
713 +
714 + // This function is only ever called by one place and not nested. We can
715 + // therefore keep static state between calls and use |col| as a "reset" flag
716 + // to avoid changing the API. It is set to -1 for the first call.
717 + static hunspell::WordIterator word_iterator =
718 + bdict_reader->GetAllWordIterator();
719 + if (col < 0) {
720 + col = 1;
721 + word_iterator = bdict_reader->GetAllWordIterator();
722 + }
723 +
724 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
725 + static const int kMaxWordLen = 128;
726 + static char word[kMaxWordLen];
727 + int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);
728 + if (affix_count == 0)
729 + return NULL;
730 + short word_len = static_cast<short>(strlen(word));
731 +
732 + // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,
733 + // i.e. a struct which uses its array 'word[1]' as a variable-length array.
734 + // As noted above, this function is not nested. So, we just use a static
735 + // struct which consists of an hentry and a char[kMaxWordLen], and initialize
736 + // the static struct and return it for now.
737 + // No need to create linked lists for the extra affixes.
738 + static struct {
739 + hentry entry;
740 + char word[kMaxWordLen];
741 + } hash_entry;
742 +
743 + return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),
744 + &word[0], word_len, affix_ids[0]);
745 +#else
746 if (hp && hp->next != NULL) return hp->next;
747 for (col++; col < tablesize; col++) {
748 if (tableptr[col]) return tableptr[col];
749 @@ -346,11 +452,13 @@
750 // null at end and reset to start
751 col = -1;
752 return NULL;
753 +#endif
96 } 754 }
97 755
98 // load a munched word list and build a hash table on the fly 756 // load a munched word list and build a hash table on the fly
99 -int HashMgr::load_tables(const char * tpath) 757 int HashMgr::load_tables(const char * tpath, const char * key)
100 +int HashMgr::load_tables(FILE* t_handle) 758 {
101 { 759 +#ifndef HUNSPELL_CHROME_CLIENT
102 int wl, al; 760 int al;
103 char * ap; 761 char * ap;
104 @@ -248,8 +250,9 @@ 762 char * dp;
105 unsigned short * flags; 763 @@ -470,6 +578,7 @@
106 764 }
107 // raw dictionary - munched file 765
108 - FILE * rawdict = fopen(tpath, "r"); 766 delete dict;
109 + FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); 767 +#endif
110 if (rawdict == NULL) return 1; 768 return 0;
111 + fseek(rawdict, 0, SEEK_SET); 769 }
112 770
113 // first read the first line of file to get hash table size */ 771 @@ -478,6 +587,9 @@
114 char ts[MAXDELEN]; 772
115 @@ -442,7 +445,7 @@ 773 int HashMgr::hash(const char * word) const
116 } 774 {
117 775 +#ifdef HUNSPELL_CHROME_CLIENT
118 // read in aff file and set flag mode 776 + return 0;
119 -int HashMgr::load_config(const char * affpath) 777 +#else
120 +int HashMgr::load_config(FILE* aff_handle) 778 long hv = 0;
121 { 779 for (int i=0; i < 4 && *word != 0; i++)
780 hv = (hv << 8) | (*word++);
781 @@ -486,6 +598,7 @@
782 hv ^= (*word++);
783 }
784 return (unsigned long) hv % tablesize;
785 +#endif
786 }
787
788 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
789 @@ -607,7 +720,12 @@
122 int firstline = 1; 790 int firstline = 1;
123
124 @@ -451,11 +454,12 @@
125 791
126 // open the affix file 792 // open the affix file
127 FILE * afflst; 793 +#ifdef HUNSPELL_CHROME_CLIENT
128 - afflst = fopen(affpath,"r"); 794 + hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
129 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); 795 + FileMgr * afflst = new FileMgr(&iterator);
796 +#else
797 FileMgr * afflst = new FileMgr(affpath, key);
798 +#endif
130 if (!afflst) { 799 if (!afflst) {
131 - HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\ n",affpath); 800 HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\ n",affpath);
132 + HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n") ;
133 return 1; 801 return 1;
134 } 802 @@ -802,6 +920,121 @@
135 + fseek(afflst, 0, SEEK_SET); 803 return 0;
136 804 }
137 // read in each line ignoring any that do not 805
138 // start with a known line type indicator 806 +#ifdef HUNSPELL_CHROME_CLIENT
807 +int HashMgr::LoadAFLines()
808 +{
809 + utf8 = 1; // We always use UTF-8.
810 +
811 + // Read in all the AF lines which tell us the rules for each affix group ID.
812 + hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
813 + FileMgr afflst(&iterator);
814 + while (char* line = afflst.getline()) {
815 + int rv = parse_aliasf(line, &afflst);
816 + if (rv)
817 + return rv;
818 + }
819 +
820 + return 0;
821 +}
822 +
823 +hentry* HashMgr::InitHashEntry(hentry* entry,
824 + size_t item_size,
825 + const char* word,
826 + int word_length,
827 + int affix_index) const {
828 + // Return if the given buffer doesn't have enough space for a hentry struct
829 + // or the given word is too long.
830 + // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is
831 + // better to return an error if the given word is too long and prevent
832 + // an unexpected result caused by a long word.
833 + const int kMaxWordLen = 128;
834 + if (item_size < sizeof(hentry) + word_length + 1 ||
835 + word_length >= kMaxWordLen)
836 + return NULL;
837 +
838 + // Initialize a hentry struct with the given parameters, and
839 + // append the given string at the end of this hentry struct.
840 + memset(entry, 0, item_size);
841 + FileMgr af(NULL);
842 + entry->alen = static_cast<short>(
843 + const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));
844 + entry->blen = static_cast<unsigned char>(word_length);
845 + memcpy(&entry->word, word, word_length);
846 +
847 + return entry;
848 +}
849 +
850 +hentry* HashMgr::CreateHashEntry(const char* word,
851 + int word_length,
852 + int affix_index) const {
853 + // Return if the given word is too long.
854 + // (See the comment in HashMgr::InitHashEntry().)
855 + const int kMaxWordLen = 128;
856 + if (word_length >= kMaxWordLen)
857 + return NULL;
858 +
859 + const size_t kEntrySize = sizeof(hentry) + word_length + 1;
860 + struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));
861 + if (entry)
862 + InitHashEntry(entry, kEntrySize, word, word_length, affix_index);
863 +
864 + return entry;
865 +}
866 +
867 +void HashMgr::DeleteHashEntry(hentry* entry) const {
868 + free(entry);
869 +}
870 +
871 +hentry* HashMgr::AffixIDsToHentry(char* word,
872 + int* affix_ids,
873 + int affix_count) const
874 +{
875 + if (affix_count == 0)
876 + return NULL;
877 +
878 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
879 + std::string std_word(word);
880 + HEntryCache::iterator found = cache.find(std_word);
881 + if (found != cache.end()) {
882 + // We must return an existing hentry for the same word if we've previously
883 + // handed one out. Hunspell will compare pointers in some cases to see if
884 + // two words it has found are the same.
885 + return found->second;
886 + }
887 +
888 + short word_len = static_cast<short>(strlen(word));
889 +
890 + // We can get a number of prefixes per word. There will normally be only one,
891 + // but if not, there will be a linked list of "hentry"s for the "homonym"s
892 + // for the word.
893 + struct hentry* first_he = NULL;
894 + struct hentry* prev_he = NULL; // For making linked list.
895 + for (int i = 0; i < affix_count; i++) {
896 + struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);
897 + if (!he)
898 + break;
899 + if (i == 0)
900 + first_he = he;
901 + if (prev_he)
902 + prev_he->next_homonym = he;
903 + prev_he = he;
904 + }
905 +
906 + cache[std_word] = first_he; // Save this word in the cache for later.
907 + return first_he;
908 +}
909 +
910 +hentry* HashMgr::GetHentryFromHEntryCache(char* word) {
911 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
912 + std::string std_word(word);
913 + HEntryCache::iterator found = cache.find(std_word);
914 + if (found != cache.end())
915 + return found->second;
916 + else
917 + return NULL;
918 +}
919 +#endif
920 +
921 int HashMgr::is_aliasf() {
922 return (aliasf != NULL);
923 }
924 Index: src/hunspell/hunspell.cxx
925 ===================================================================
926 --- src/hunspell/hunspell.cxx» (revision 48261)
927 +++ src/hunspell/hunspell.cxx» (working copy)
928 @@ -7,18 +7,35 @@
929
930 #include "hunspell.hxx"
931 #include "hunspell.h"
932 +#ifndef HUNSPELL_CHROME_CLIENT
933 #include "config.h"
934 +#endif
935 #include "csutil.hxx"
936
937 +#ifdef HUNSPELL_CHROME_CLIENT
938 +Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
939 +#else
940 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
941 +#endif
942 {
943 encoding = NULL;
944 csconv = NULL;
945 utf8 = 0;
946 complexprefixes = 0;
947 +#ifndef HUNSPELL_CHROME_CLIENT
948 affixpath = mystrdup(affpath);
949 +#endif
950 maxdic = 0;
951
952 +#ifdef HUNSPELL_CHROME_CLIENT
953 + bdict_reader = new hunspell::BDictReader;
954 + bdict_reader->Init(bdict_data, bdict_length);
955 +
956 + pHMgr[0] = new HashMgr(bdict_reader);
957 + if (pHMgr[0]) maxdic = 1;
958 +
959 + pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
960 +#else
961 /* first set up the hash manager */
962 pHMgr[0] = new HashMgr(dpath, affpath, key);
963 if (pHMgr[0]) maxdic = 1;
964 @@ -26,6 +43,7 @@
965 /* next set up the affix manager */
966 /* it needs access to the hash manager lookup methods */
967 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
968 +#endif
969
970 /* get the preferred try string and the dictionary */
971 /* encoding from the Affix Manager for that dictionary */
972 @@ -56,10 +74,17 @@
973 csconv= NULL;
974 if (encoding) free(encoding);
975 encoding = NULL;
976 +
977 +#ifdef HUNSPELL_CHROME_CLIENT
978 + if (bdict_reader) delete bdict_reader;
979 + bdict_reader = NULL;
980 +#else
981 if (affixpath) free(affixpath);
982 affixpath = NULL;
983 +#endif
984 }
985
986 +#ifndef HUNSPELL_CHROME_CLIENT
987 // load extra dictionaries
988 int Hunspell::add_dic(const char * dpath, const char * key) {
989 if (maxdic == MAXDIC || !affixpath) return 1;
990 @@ -67,6 +92,7 @@
991 if (pHMgr[maxdic]) maxdic++; else return 1;
992 return 0;
993 }
994 +#endif
995
996 // make a copy of src at destination while removing all leading
997 // blanks and removing any trailing periods after recording
998 @@ -319,6 +345,9 @@
999
1000 int Hunspell::spell(const char * word, int * info, char ** root)
1001 {
1002 +#ifdef HUNSPELL_CHROME_CLIENT
1003 + if (pHMgr) pHMgr[0]->EmptyHentryCache();
1004 +#endif
1005 struct hentry * rv=NULL;
1006 // need larger vector. For example, Turkish capital letter I converted a
1007 // 2-byte UTF-8 character (dotless i) by mkallsmall.
1008 @@ -567,6 +596,13 @@
1009 word = w2;
1010 } else word = w;
1011
1012 +#ifdef HUNSPELL_CHROME_CLIENT
1013 + // We need to check the word length if its valid to make coverity (Event
1014 + // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.
1015 + if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAX WORDLEN))
1016 + return NULL;
1017 +#endif
1018 +
1019 // word reversing wrapper for complex prefixes
1020 if (complexprefixes) {
1021 if (word != w2) {
1022 @@ -657,6 +693,9 @@
1023
1024 int Hunspell::suggest(char*** slst, const char * word)
1025 {
1026 +#ifdef HUNSPELL_CHROME_CLIENT
1027 + if (pHMgr) pHMgr[0]->EmptyHentryCache();
1028 +#endif
1029 int onlycmpdsug = 0;
1030 char cw[MAXWORDUTF8LEN];
1031 char wspace[MAXWORDUTF8LEN];
1032 @@ -1874,13 +1913,21 @@
1033
1034 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1035 {
1036 +#ifdef HUNSPELL_CHROME_CLIENT
1037 + return NULL;
1038 +#else
1039 return (Hunhandle*)(new Hunspell(affpath, dpath));
1040 +#endif
1041 }
1042
1043 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1044 const char * key)
1045 {
1046 +#ifdef HUNSPELL_CHROME_CLIENT
1047 + return NULL;
1048 +#else
1049 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1050 +#endif
1051 }
1052
1053 void Hunspell_destroy(Hunhandle *pHunspell)
139 Index: src/hunspell/hashmgr.hxx 1054 Index: src/hunspell/hashmgr.hxx
140 =================================================================== 1055 ===================================================================
141 --- src/hunspell/hashmgr.hxx» (revision 3811) 1056 --- src/hunspell/hashmgr.hxx» (revision 48261)
142 +++ src/hunspell/hashmgr.hxx (working copy) 1057 +++ src/hunspell/hashmgr.hxx (working copy)
143 @@ -25,7 +25,7 @@ 1058 @@ -8,10 +8,25 @@
1059 #include "htypes.hxx"
1060 #include "filemgr.hxx"
1061
1062 +#ifdef HUNSPELL_CHROME_CLIENT
1063 +#include <string>
1064 +#include <map>
1065 +
1066 +#include "base/stl_util-inl.h"
1067 +#include "base/string_piece.h"
1068 +#include "third_party/hunspell/google/bdict_reader.h"
1069 +#endif
1070 +
1071 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
1072
1073 class LIBHUNSPELL_DLL_EXPORTED HashMgr
1074 {
1075 +#ifdef HUNSPELL_CHROME_CLIENT
1076 + // Not owned by this class, owned by the Hunspell object.
1077 + hunspell::BDictReader* bdict_reader;
1078 + std::map<base::StringPiece, int> custom_word_to_affix_id_map_;
1079 + std::vector<std::string*> pointer_to_strings_;
1080 +#endif
1081 int tablesize;
1082 struct hentry ** tableptr;
1083 int userword;
1084 @@ -34,7 +49,23 @@
144 1085
145 1086
146 public: 1087 public:
147 - HashMgr(const char * tpath, const char * apath); 1088 +#ifdef HUNSPELL_CHROME_CLIENT
148 + HashMgr(FILE* t_handle, FILE* a_handle); 1089 + HashMgr(hunspell::BDictReader* reader);
1090 +
1091 + // Return the hentry corresponding to the given word. Returns NULL if the
1092 + // word is not there in the cache.
1093 + hentry* GetHentryFromHEntryCache(char* word);
1094 +
1095 + // Called before we do a new operation. This will empty the cache of pointers
1096 + // to hentries that we have cached. In Chrome, we make these on-demand, but
1097 + // they must live as long as the single spellcheck operation that they're par t
1098 + // of since Hunspell will save pointers to various ones as it works.
1099 + //
1100 + // This function allows that cache to be emptied and not grow infinitely.
1101 + void EmptyHentryCache();
1102 +#else
1103 HashMgr(const char * tpath, const char * apath, const char * key = NULL);
1104 +#endif
149 ~HashMgr(); 1105 ~HashMgr();
150 1106
151 struct hentry * lookup(const char *) const; 1107 struct hentry * lookup(const char *) const;
152 @@ -46,9 +46,9 @@ 1108 @@ -59,6 +90,40 @@
153 1109 int al, const char * desc, bool onlyupcase);
154 1110 int load_config(const char * affpath, const char * key);
155 private: 1111 int parse_aliasf(char * line, FileMgr * af);
156 - int load_tables(const char * tpath); 1112 +
157 + int load_tables(FILE* t_handle); 1113 +#ifdef HUNSPELL_CHROME_CLIENT
158 int add_word(const char * word, int wl, unsigned short * ap, int al, const ch ar * desc); 1114 + // Loads the AF lines from a BDICT.
159 - int load_config(const char * affpath); 1115 + // A BDICT file compresses its AF lines to save memory.
160 + int load_config(FILE* aff_handle); 1116 + // This function decompresses each AF line and call parse_aliasf().
161 int parse_aliasf(char * line, FILE * af); 1117 + int LoadAFLines();
162 #ifdef HUNSPELL_EXPERIMENTAL 1118 +
163 int parse_aliasm(char * line, FILE * af); 1119 + // Helper functions that create a new hentry struct, initialize it, and
164 Index: src/hunspell/hunspell.cxx 1120 + // delete it.
165 =================================================================== 1121 + // These functions encapsulate non-trivial operations in creating and
166 --- src/hunspell/hunspell.cxx» (revision 3811) 1122 + // initializing a hentry struct from BDICT data to avoid changing code so muc h
167 +++ src/hunspell/hunspell.cxx» (working copy) 1123 + // even when a hentry struct is changed.
168 @@ -20,7 +20,7 @@ 1124 + hentry* InitHashEntry(hentry* entry,
169 #endif 1125 + size_t item_size,
170 #endif 1126 + const char* word,
171 1127 + int word_length,
172 -Hunspell::Hunspell(const char * affpath, const char * dpath) 1128 + int affix_index) const;
173 +Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle) 1129 + hentry* CreateHashEntry(const char* word,
174 { 1130 + int word_length,
175 encoding = NULL; 1131 + int affix_index) const;
176 csconv = NULL; 1132 + void DeleteHashEntry(hentry* entry) const;
177 @@ -28,11 +28,11 @@ 1133 +
178 complexprefixes = 0; 1134 + // Converts the list of affix IDs to a linked list of hentry structures. The
179 1135 + // hentry structures will point to the given word. The returned pointer will
180 /* first set up the hash manager */ 1136 + // be a statically allocated variable that will change for the next call. The
181 - pHMgr = new HashMgr(dpath, affpath); 1137 + // |word| buffer must be the same.
182 + pHMgr = new HashMgr(dic_handle, aff_handle); 1138 + hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const;
183 1139 +
184 /* next set up the affix manager */ 1140 + // See EmptyHentryCache above. Note that each one is actually a linked list
185 /* it needs access to the hash manager lookup methods */ 1141 + // followed by the homonym pointer.
186 - pAMgr = new AffixMgr(affpath,pHMgr); 1142 + typedef std::map<std::string, hentry*> HEntryCache;
187 + pAMgr = new AffixMgr(aff_handle, pHMgr); 1143 + HEntryCache hentry_cache;
188 1144 +#endif
189 /* get the preferred try string and the dictionary */ 1145 +
190 /* encoding from the Affix Manager for that dictionary */ 1146 int add_hidden_capitalized_word(char * word, int wbl, int wcl,
191 @@ -1694,9 +1694,9 @@ 1147 unsigned short * flags, int al, char * dp, int captype);
192 1148 int parse_aliasm(char * line, FileMgr * af);
193 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
194
195 -Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
196 +Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle)
197 {
198 - return (Hunhandle*)(new Hunspell(affpath, dpath));
199 + return (Hunhandle*)(new Hunspell(aff_handle, dic_handle));
200 }
201
202 void Hunspell_destroy(Hunhandle *pHunspell)
203 Index: src/hunspell/hunspell.hxx 1149 Index: src/hunspell/hunspell.hxx
204 =================================================================== 1150 ===================================================================
205 --- src/hunspell/hunspell.hxx» (revision 3811) 1151 --- src/hunspell/hunspell.hxx» (revision 48261)
206 +++ src/hunspell/hunspell.hxx (working copy) 1152 +++ src/hunspell/hunspell.hxx (working copy)
207 @@ -48,7 +48,7 @@ 1153 @@ -5,6 +5,10 @@
1154 #include "suggestmgr.hxx"
1155 #include "langnum.hxx"
1156
1157 +#ifdef HUNSPELL_CHROME_CLIENT
1158 +#include "third_party/hunspell/google/bdict_reader.h"
1159 +#endif
1160 +
1161 #define SPELL_COMPOUND (1 << 0)
1162 #define SPELL_FORBIDDEN (1 << 1)
1163 #define SPELL_ALLCAP (1 << 2)
1164 @@ -26,7 +30,9 @@
1165 HashMgr* pHMgr[MAXDIC];
1166 int maxdic;
1167 SuggestMgr* pSMgr;
1168 +#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.
1169 char * affixpath;
1170 +#endif
1171 char * encoding;
1172 struct cs_info * csconv;
1173 int langnum;
1174 @@ -34,17 +40,28 @@
1175 int complexprefixes;
1176 char** wordbreak;
1177
1178 +#ifdef HUNSPELL_CHROME_CLIENT
1179 + // Not owned by us, owned by the Hunspell object.
1180 + hunspell::BDictReader* bdict_reader;
1181 +#endif
1182 +
1183 public:
1184
1185 /* Hunspell(aff, dic) - constructor of Hunspell class
208 * input: path of affix file and dictionary file 1186 * input: path of affix file and dictionary file
209 */ 1187 */
210 1188
211 - Hunspell(const char * affpath, const char * dpath); 1189 +#ifdef HUNSPELL_CHROME_CLIENT
212 + Hunspell(FILE* aff_handle, FILE* dic_handle); 1190 + Hunspell(const unsigned char* bdict_data, size_t bdict_length);
1191 +#else
1192 Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
1193 +#endif
1194 ~Hunspell();
1195
1196 +#ifndef HUNSPELL_CHROME_CLIENT
1197 /* load extra dictionaries (only dic files) */
1198 int add_dic(const char * dpath, const char * key = NULL);
1199 +#endif
1200
1201 /* spell(word) - spellcheck word
1202 * output: 0 = bad word, not 0 = good word
1203 Index: src/hunspell/license.hunspell
1204 ===================================================================
1205 --- src/hunspell/license.hunspell» (revision 48261)
1206 +++ src/hunspell/license.hunspell» (working copy)
1207 @@ -56,4 +56,6 @@
1208 *
1209 * ***** END LICENSE BLOCK ***** */
1210
1211 +#ifndef HUNSPELL_CHROME_CLIENT
1212 #include "config.h"
1213 +#endif
OLDNEW
« no previous file with comments | « third_party/hunspell/README.chromium ('k') | third_party/hunspell/hunspell.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698