OLD | NEW |
| 1 Index: README.chromium |
| 2 =================================================================== |
| 3 --- README.chromium (revision 48261) |
| 4 +++ README.chromium (working copy) |
| 5 @@ -1,29 +1,15 @@ |
| 6 -This is a partial copy of Hunspell 1.1.5, with the following changes: |
| 7 -* '#include "config.h"' removed from src/hunspell/hunspell.hxx |
| 8 -* '#include "config.h"' removed from src/hunspell/license.hunspell |
| 9 -* Two unreferenced local variables removed from src/hunspell/suggestmgr.cxx |
| 10 -* src/hunspell/utf_info.cxx moved to src/hunspell/utf_info.hxx, and #include |
| 11 - reference in src/hunspell/csutil.cxx changed accordingly |
| 12 -* Change the input params of the constructors to receive a FILE* instead of |
| 13 - a file path. This is required to use hunspell in the sandbox. |
| 14 - The patch is in google.patch. |
| 15 +This is a partial copy of Hunspell 1.2.10 with the following changes: |
| 16 +* Remove '#include "config.h"' from src/hunspell/hunspell.hxx |
| 17 +* Remove '#include "config.h"' from src/hunspell/license.hunspell |
| 18 +* Change src/hunspell/filemgr.hxx and src/hunspell/filemgr.cxx to use |
| 19 + LineIterator. |
| 20 +* Add ScropedHashEntry, which creates temporary hentry objects, to |
| 21 + src/hunspell/suggestmgr.cxx |
| 22 +* Change the input params of the constructors to receive a BDICTReader instead |
| 23 + of a file path. |
| 24 +The patch is in google.patch. |
| 25 |
| 26 -The English dictionary distributed by Firefox has been checked in to the |
| 27 -dictionaries directory. It has several additions over the default |
| 28 -myspell/hunspell dictionary. |
| 29 - |
| 30 -* Workaround for non-ASCII characters |
| 31 - |
| 32 -Visual Studio on Japanese Windows assumes the source files to be |
| 33 -encoded in Shift_JIS. The compiler is unhappy with non-ASCII letters |
| 34 -in the source files of Hunspell. The same problem happens with other |
| 35 -CJK Windows as well. Here is the workaround for this problem: |
| 36 - |
| 37 -Convert 8-bit bytes to hexadecimal escaped forms by |
| 38 - |
| 39 - % perl -i -De 's/([\x80-\xff])/sprintf("\\x%02x", $1)/ge' src/*.cxx |
| 40 - |
| 41 - |
| 42 -Note that Hunspell upstream is going to fix this problem. We'll no |
| 43 -longer need the workaround if the problem is fixed in the upstream. |
| 44 - |
| 45 +All dictionaries used by Chromium has been checked in to the |
| 46 +'third_party/hunspell_dictionaries' directory. They have several additions over |
| 47 +the default myspell/hunspell dictionaries. |
| 48 +(See 'third_party/hunspell_dictionaries/README.chromium' for their details.) |
| 49 Index: src/hunspell/filemgr.cxx |
| 50 =================================================================== |
| 51 --- src/hunspell/filemgr.cxx (revision 48261) |
| 52 +++ src/hunspell/filemgr.cxx (working copy) |
| 53 @@ -7,6 +7,32 @@ |
| 54 |
| 55 #include "filemgr.hxx" |
| 56 |
| 57 +#ifdef HUNSPELL_CHROME_CLIENT |
| 58 +#include "third_party/hunspell/google/bdict_reader.h" |
| 59 + |
| 60 +FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) { |
| 61 +} |
| 62 + |
| 63 +FileMgr::~FileMgr() { |
| 64 +} |
| 65 + |
| 66 +char * FileMgr::getline() { |
| 67 + // Read one line from a BDICT file and store the line to our line buffer. |
| 68 + // To emulate the original FileMgr::getline(), this function returns |
| 69 + // the pointer to our line buffer if we can read a line without errors. |
| 70 + // Otherwise, this function returns NULL. |
| 71 + bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1); |
| 72 + return result ? line_ : NULL; |
| 73 +} |
| 74 + |
| 75 +int FileMgr::getlinenum() { |
| 76 + // This function is used only for displaying a line number that causes a |
| 77 + // parser error. For a BDICT file, providing a line number doesn't help |
| 78 + // identifying the place where causes a parser error so much since it is a |
| 79 + // binary file. So, we just return 0. |
| 80 + return 0; |
| 81 +} |
| 82 +#else |
| 83 int FileMgr::fail(const char * err, const char * par) { |
| 84 fprintf(stderr, err, par); |
| 85 return -1; |
| 86 @@ -47,3 +73,4 @@ |
| 87 int FileMgr::getlinenum() { |
| 88 return linenum; |
| 89 } |
| 90 +#endif |
| 91 Index: src/hunspell/suggestmgr.cxx |
| 92 =================================================================== |
| 93 --- src/hunspell/suggestmgr.cxx (revision 48261) |
| 94 +++ src/hunspell/suggestmgr.cxx (working copy) |
| 95 @@ -12,6 +12,99 @@ |
| 96 |
| 97 const w_char W_VLINE = { '\0', '|' }; |
| 98 |
| 99 +#ifdef HUNSPELL_CHROME_CLIENT |
| 100 +namespace { |
| 101 +// A simmple class which creates temporary hentry objects which can be |
| 102 +// available only in a scope. To conceal memory operations from SuggestMgr |
| 103 +// functions, this object automatically deletes all hentry objects created |
| 104 +// through CreateScopedHashEntry() calls in its destructor. |
| 105 +// So, the following snippet raises a memory error. |
| 106 +// |
| 107 +// hentry* bad_copy = NULL; |
| 108 +// { |
| 109 +// ScopedHashEntryFactory factory; |
| 110 +// hentry* scoped_copy = factory.CreateHashEntry(source); |
| 111 +// ... |
| 112 +// bad_copy = scoped_copy; |
| 113 +// } |
| 114 +// if (bad_copy->word[0]) // memory for scoped_copy has been deleted! |
| 115 +// |
| 116 +// As listed in the above snippet, it is simple to use this class. |
| 117 +// 1. Declare an instance of this ScopedHashEntryFactory, and; |
| 118 +// 2. Call its CreateHashEntry() member instead of using 'new hentry' or |
| 119 +// 'operator='. |
| 120 +// |
| 121 +// TODO(hbono): this implementation is slower than the previous one of brettw. |
| 122 +// We need to improve it? |
| 123 +// |
| 124 +class ScopedHashEntryFactory { |
| 125 + public: |
| 126 + ScopedHashEntryFactory(); |
| 127 + ~ScopedHashEntryFactory(); |
| 128 + |
| 129 + // Creates a temporary copy of the given hentry struct. |
| 130 + // The returned copy is available only while this object is available. |
| 131 + // NOTE: this function just calls memcpy() in creating a copy of the given |
| 132 + // hentry struct, i.e. it does NOT copy objects referred by pointers of the |
| 133 + // given hentry struct. |
| 134 + hentry* CreateScopedHashEntry(int index, const hentry* source); |
| 135 + |
| 136 + private: |
| 137 + // A struct which encapsulate the new hentry struct used by hunspell 1.2.8. |
| 138 + // The hentry struct used by hunspell 1.2.8 becomes a variable-length struct, |
| 139 + // i.e. it uses its 'word[1]' array member as a variable-length array. |
| 140 + // C/C++ doesn't check boundaries of a char array. For example, for a char |
| 141 + // array 'char word[1]', we cannot only access 'word[0]' but also access |
| 142 + // 'word[1]', 'word[2]', etc. |
| 143 + // To handle this new hentry struct, this we define a struct which combines |
| 144 + // three values (an hentry struct 'hentry', a char array 'word[kMaxWordLen]', |
| 145 + // and an unsigned short value 'astr') so that a HashEntryItem 'hash_item' |
| 146 + // satisfies the following equations: |
| 147 + // hash_item.entry->word[1] == hash_item->word[0]. |
| 148 + // hash_item.entry->word[2] == hash_item->word[1]. |
| 149 + // ... |
| 150 + // hash_item.entry->word[n] == hash_item->word[n-1]. |
| 151 + // ... |
| 152 + // hash_item.entry->word[kMaxWordLen] == hash_item->word[kMaxWordLen-1]. |
| 153 + enum { |
| 154 + kMaxWordLen = 128, |
| 155 + }; |
| 156 + struct HashEntryItem { |
| 157 + hentry entry; |
| 158 + char word[kMaxWordLen]; |
| 159 + unsigned short astr; |
| 160 + }; |
| 161 + |
| 162 + HashEntryItem hash_items_[MAX_ROOTS]; |
| 163 +}; |
| 164 + |
| 165 +ScopedHashEntryFactory::ScopedHashEntryFactory() { |
| 166 + memset(&hash_items_[0], 0, sizeof(hash_items_)); |
| 167 +} |
| 168 + |
| 169 +ScopedHashEntryFactory::~ScopedHashEntryFactory() { |
| 170 +} |
| 171 + |
| 172 +hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index, |
| 173 + const hentry* source) { |
| 174 + if (index >= MAX_ROOTS || source->blen >= kMaxWordLen) |
| 175 + return NULL; |
| 176 + |
| 177 + // Retrieve a HashEntryItem struct from our spool, initialize it, and |
| 178 + // returns the address of its 'hentry' member. |
| 179 + size_t source_size = sizeof(hentry) + source->blen + 1; |
| 180 + HashEntryItem* hash_item = &hash_items_[index]; |
| 181 + memcpy(&hash_item->entry, source, source_size); |
| 182 + if (source->astr) { |
| 183 + hash_item->astr = *source->astr; |
| 184 + hash_item->entry.astr = &hash_item->astr; |
| 185 + } |
| 186 + return &hash_item->entry; |
| 187 +} |
| 188 + |
| 189 +} // namespace |
| 190 +#endif |
| 191 + |
| 192 SuggestMgr::SuggestMgr(const char * tryme, int maxn, |
| 193 AffixMgr * aptr) |
| 194 { |
| 195 @@ -1029,6 +1122,11 @@ |
| 196 |
| 197 struct hentry* hp = NULL; |
| 198 int col = -1; |
| 199 + |
| 200 +#ifdef HUNSPELL_CHROME_CLIENT |
| 201 + ScopedHashEntryFactory hash_entry_factory; |
| 202 +#endif |
| 203 + |
| 204 phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; |
| 205 char target[MAXSWUTF8L]; |
| 206 char candidate[MAXSWUTF8L]; |
| 207 @@ -1066,7 +1164,11 @@ |
| 208 |
| 209 if (sc > scores[lp]) { |
| 210 scores[lp] = sc; |
| 211 +#ifdef HUNSPELL_CHROME_CLIENT |
| 212 + roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp); |
| 213 +#else |
| 214 roots[lp] = hp; |
| 215 +#endif |
| 216 lval = sc; |
| 217 for (j=0; j < MAX_ROOTS; j++) |
| 218 if (scores[j] < lval) { |
| 219 Index: src/hunspell/replist.hxx |
| 220 =================================================================== |
| 221 --- src/hunspell/replist.hxx (revision 48261) |
| 222 +++ src/hunspell/replist.hxx (working copy) |
| 223 @@ -2,6 +2,12 @@ |
| 224 #ifndef _REPLIST_HXX_ |
| 225 #define _REPLIST_HXX_ |
| 226 |
| 227 +#ifdef HUNSPELL_CHROME_CLIENT |
| 228 +// Compilation issues in spellchecker.cc think near is a macro, therefore |
| 229 +// removing it here solves that problem. |
| 230 +#undef near |
| 231 +#endif |
| 232 + |
| 233 #include "hunvisapi.h" |
| 234 |
| 235 #include "w_char.hxx" |
| 236 Index: src/hunspell/filemgr.hxx |
| 237 =================================================================== |
| 238 --- src/hunspell/filemgr.hxx (revision 48261) |
| 239 +++ src/hunspell/filemgr.hxx (working copy) |
| 240 @@ -7,6 +7,30 @@ |
| 241 #include "hunzip.hxx" |
| 242 #include <stdio.h> |
| 243 |
| 244 +#ifdef HUNSPELL_CHROME_CLIENT |
| 245 +namespace hunspell { |
| 246 +class LineIterator; |
| 247 +} // namespace hunspell |
| 248 + |
| 249 +// A class which encapsulates operations of reading a BDICT file. |
| 250 +// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is |
| 251 +// a binary file converted from a DIC file and an AFF file. (See |
| 252 +// "bdict_reader.h" for its format.) |
| 253 +// This class encapsulates the operations of reading a BDICT file and emulates |
| 254 +// the original FileMgr operations for AffixMgr so that it can read a BDICT |
| 255 +// file without so many changes. |
| 256 +class FileMgr { |
| 257 + public: |
| 258 + FileMgr(hunspell::LineIterator* iterator); |
| 259 + ~FileMgr(); |
| 260 + char* getline(); |
| 261 + int getlinenum(); |
| 262 + |
| 263 + protected: |
| 264 + hunspell::LineIterator* iterator_; |
| 265 + char line_[BUFSIZE + 50]; // input buffer |
| 266 +}; |
| 267 +#else |
| 268 class LIBHUNSPELL_DLL_EXPORTED FileMgr |
| 269 { |
| 270 protected: |
| 271 @@ -23,3 +47,4 @@ |
| 272 int getlinenum(); |
| 273 }; |
| 274 #endif |
| 275 +#endif |
1 Index: src/hunspell/affixmgr.cxx | 276 Index: src/hunspell/affixmgr.cxx |
2 =================================================================== | 277 =================================================================== |
3 --- src/hunspell/affixmgr.cxx» (revision 3811) | 278 --- src/hunspell/affixmgr.cxx» (revision 48261) |
4 +++ src/hunspell/affixmgr.cxx (working copy) | 279 +++ src/hunspell/affixmgr.cxx (working copy) |
5 @@ -25,7 +27,7 @@ | 280 @@ -14,8 +14,14 @@ |
6 #endif | 281 |
7 #endif | 282 #include "csutil.hxx" |
8 | 283 |
9 -AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) | 284 +#ifdef HUNSPELL_CHROME_CLIENT |
10 +AffixMgr::AffixMgr(FILE* aff_handle, HashMgr* ptr) | 285 +AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md) |
11 { | 286 +{ |
| 287 + bdict_reader = reader; |
| 288 +#else |
| 289 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char *
key) |
| 290 { |
| 291 +#endif |
12 // register hash manager and load affix data from aff file | 292 // register hash manager and load affix data from aff file |
13 pHMgr = ptr; | 293 pHMgr = ptr[0]; |
14 @@ -104,8 +106,8 @@ | 294 alldic = ptr; |
| 295 @@ -99,9 +105,17 @@ |
| 296 sFlag[i] = NULL; |
| 297 } |
| 298 |
| 299 +#ifdef HUNSPELL_CHROME_CLIENT |
| 300 + // Define dummy parameters for parse_file() to avoid changing the parameters |
| 301 + // of parse_file(). This may make it easier to merge the changes of the |
| 302 + // original hunspell. |
| 303 + const char* affpath = NULL; |
| 304 + const char* key = NULL; |
| 305 +#else |
| 306 for (int j=0; j < CONTSIZE; j++) { |
15 contclasses[j] = 0; | 307 contclasses[j] = 0; |
16 } | 308 } |
17 | 309 +#endif |
18 - if (parse_file(affpath)) { | 310 |
19 - HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); | 311 if (parse_file(affpath, key)) { |
20 + if (parse_file(aff_handle)) { | 312 HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); |
21 + HUNSPELL_WARNING(stderr, "Failure loading aff file\n"); | 313 @@ -252,6 +266,43 @@ |
22 wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM
"); | 314 char * line; // io buffers |
23 } | 315 char ft; // affix type |
24 | 316 |
25 @@ -232,7 +234,7 @@ | 317 +#ifdef HUNSPELL_CHROME_CLIENT |
26 | 318 + // open the affix file |
27 | 319 + // We're always UTF-8 |
28 // read in aff file and build up prefix and suffix entry objects | 320 + utf8 = 1; |
29 -int AffixMgr::parse_file(const char * affpath) | 321 + |
30 +int AffixMgr::parse_file(FILE* aff_handle) | 322 + // A BDICT file stores PFX and SFX lines in a special section and it provides |
31 { | 323 + // a special line iterator for reading PFX and SFX lines. |
32 | 324 + // We create a FileMgr object from this iterator and parse PFX and SFX lines |
33 // io buffers | 325 + // before parsing other lines. |
34 @@ -250,11 +252,12 @@ | 326 + hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator(); |
35 | 327 + FileMgr* iterator = new FileMgr(&affix_iterator); |
36 // open the affix file | 328 + if (!iterator) { |
37 FILE * afflst; | 329 + HUNSPELL_WARNING(stderr, |
38 - afflst = fopen(affpath,"r"); | 330 + "error: could not create a FileMgr from an affix line iterator.\n"); |
39 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); | 331 + return 1; |
40 if (!afflst) { | 332 + } |
41 - HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n
",affpath); | 333 + |
42 + HUNSPELL_WARNING(stderr, "error: could not open affix description file\n"); | 334 + while (line = iterator->getline()) { |
| 335 + ft = ' '; |
| 336 + if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P'; |
| 337 + if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S'; |
| 338 + if (ft != ' ') |
| 339 + parse_affix(line, ft, iterator, NULL); |
| 340 + } |
| 341 + delete iterator; |
| 342 + |
| 343 + // Create a FileMgr object for reading lines except PFX and SFX lines. |
| 344 + // We don't need to change the loop below since our FileMgr emulates the |
| 345 + // original one. |
| 346 + hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator(); |
| 347 + FileMgr * afflst = new FileMgr(&other_iterator); |
| 348 + if (!afflst) { |
| 349 + HUNSPELL_WARNING(stderr, |
| 350 + "error: could not create a FileMgr from an other line iterator.\n"); |
| 351 + return 1; |
| 352 + } |
| 353 +#else |
| 354 // checking flag duplication |
| 355 char dupflags[CONTSIZE]; |
| 356 char dupflags_ini = 1; |
| 357 @@ -265,6 +316,7 @@ |
| 358 HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n
",affpath); |
43 return 1; | 359 return 1; |
44 } | 360 } |
45 + fseek(afflst, 0, SEEK_SET); | 361 +#endif |
46 | 362 |
47 // step one is to parse the affix file building up the internal | 363 // step one is to parse the affix file building up the internal |
48 // affix data structures | 364 // affix data structures |
| 365 @@ -274,6 +326,7 @@ |
| 366 while ((line = afflst->getline())) { |
| 367 mychomp(line); |
| 368 |
| 369 +#ifndef HUNSPELL_CHROME_CLIENT |
| 370 /* remove byte order mark */ |
| 371 if (firstline) { |
| 372 firstline = 0; |
| 373 @@ -282,6 +335,7 @@ |
| 374 memmove(line, line+3, strlen(line+3)+1); |
| 375 } |
| 376 } |
| 377 +#endif |
| 378 |
| 379 /* parse in the keyboard string */ |
| 380 if (strncmp(line,"KEY",3) == 0) { |
| 381 @@ -517,6 +571,7 @@ |
| 382 } |
| 383 } |
| 384 |
| 385 +#ifndef HUNSPELL_CHROME_CLIENT |
| 386 /* parse in the typical fault correcting table */ |
| 387 if (strncmp(line,"REP",3) == 0) { |
| 388 if (parse_reptable(line, afflst)) { |
| 389 @@ -524,6 +579,7 @@ |
| 390 return 1; |
| 391 } |
| 392 } |
| 393 +#endif |
| 394 |
| 395 /* parse in the input conversion table */ |
| 396 if (strncmp(line,"ICONV",5) == 0) { |
| 397 @@ -634,6 +690,7 @@ |
| 398 checksharps=1; |
| 399 } |
| 400 |
| 401 +#ifndef HUNSPELL_CHROME_CLIENT // Chrome handled affixes above. |
| 402 /* parse this affix: P - prefix, S - suffix */ |
| 403 ft = ' '; |
| 404 if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P'; |
| 405 @@ -650,6 +707,7 @@ |
| 406 return 1; |
| 407 } |
| 408 } |
| 409 +#endif |
| 410 |
| 411 } |
| 412 delete afflst; |
| 413 @@ -1247,6 +1305,26 @@ |
| 414 const char * r; |
| 415 int lenr, lenp; |
| 416 |
| 417 +#ifdef HUNSPELL_CHROME_CLIENT |
| 418 + const char *pattern, *pattern2; |
| 419 + hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator
(); |
| 420 + while (iterator.GetNext(&pattern, &pattern2)) { |
| 421 + r = word; |
| 422 + lenr = strlen(pattern2); |
| 423 + lenp = strlen(pattern); |
| 424 + |
| 425 + // search every occurence of the pattern in the word |
| 426 + while ((r=strstr(r, pattern)) != NULL) { |
| 427 + strcpy(candidate, word); |
| 428 + if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break; |
| 429 + strcpy(candidate+(r-word), pattern2); |
| 430 + strcpy(candidate+(r-word)+lenr, r+lenp); |
| 431 + if (candidate_check(candidate,strlen(candidate))) return 1; |
| 432 + r++; // search for the next letter |
| 433 + } |
| 434 + } |
| 435 + |
| 436 +#else |
| 437 if ((wl < 2) || !numrep) return 0; |
| 438 |
| 439 for (int i=0; i < numrep; i++ ) { |
| 440 @@ -1263,6 +1341,7 @@ |
| 441 r++; // search for the next letter |
| 442 } |
| 443 } |
| 444 +#endif |
| 445 return 0; |
| 446 } |
| 447 |
| 448 @@ -3332,6 +3411,7 @@ |
| 449 return 0; |
| 450 } |
| 451 |
| 452 +#ifndef HUNSPELL_CHROME_CLIENT |
| 453 /* parse in the typical fault correcting table */ |
| 454 int AffixMgr::parse_reptable(char * line, FileMgr * af) |
| 455 { |
| 456 @@ -3407,6 +3487,7 @@ |
| 457 } |
| 458 return 0; |
| 459 } |
| 460 +#endif |
| 461 |
| 462 /* parse in the typical fault correcting table */ |
| 463 int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const
char * keyword) |
| 464 @@ -4010,6 +4091,7 @@ |
| 465 case 1: { |
| 466 np++; |
| 467 aflag = pHMgr->decode_flag(piece); |
| 468 +#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates. |
| 469 if (((at == 'S') && (dupflags[aflag] & dupSFX)) || |
| 470 ((at == 'P') && (dupflags[aflag] & dupPFX))) { |
| 471 HUNSPELL_WARNING(stderr, "error: line %d: multiple defi
nitions of an affix flag\n", |
| 472 @@ -4017,6 +4099,7 @@ |
| 473 // return 1; XXX permissive mode for bad dictionaries |
| 474 } |
| 475 dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX); |
| 476 +#endif |
| 477 break; |
| 478 } |
| 479 // piece 3 - is cross product indicator |
49 Index: src/hunspell/affixmgr.hxx | 480 Index: src/hunspell/affixmgr.hxx |
50 =================================================================== | 481 =================================================================== |
51 --- src/hunspell/affixmgr.hxx» (revision 3811) | 482 --- src/hunspell/affixmgr.hxx» (revision 48261) |
52 +++ src/hunspell/affixmgr.hxx (working copy) | 483 +++ src/hunspell/affixmgr.hxx (working copy) |
53 @@ -93,7 +93,7 @@ | 484 @@ -18,6 +18,40 @@ |
54 | 485 class PfxEntry; |
| 486 class SfxEntry; |
| 487 |
| 488 +#ifdef HUNSPELL_CHROME_CLIENT |
| 489 + |
| 490 +#include <vector> |
| 491 + |
| 492 +// This class provides an implementation of the contclasses array in AffixMgr |
| 493 +// that is normally a large static array. We should almost never need more than |
| 494 +// 256 elements, so this class only allocates that much to start off with. If |
| 495 +// elements higher than that are actually used, we'll automatically expand. |
| 496 +class ContClasses { |
| 497 + public: |
| 498 + ContClasses() { |
| 499 + // Pre-allocate a buffer so that typically, we'll never have to resize. |
| 500 + EnsureSizeIs(256); |
| 501 + } |
| 502 + |
| 503 + char& operator[](size_t index) { |
| 504 + EnsureSizeIs(index + 1); |
| 505 + return data[index]; |
| 506 + } |
| 507 + |
| 508 + void EnsureSizeIs(size_t new_size) { |
| 509 + if (data.size() >= new_size) |
| 510 + return; // Nothing to do. |
| 511 + |
| 512 + size_t old_size = data.size(); |
| 513 + data.resize(new_size); |
| 514 + memset(&data[old_size], 0, new_size - old_size); |
| 515 + } |
| 516 + |
| 517 + std::vector<char> data; |
| 518 +}; |
| 519 + |
| 520 +#endif // HUNSPELL_CHROME_CLIENT |
| 521 + |
| 522 class LIBHUNSPELL_DLL_EXPORTED AffixMgr |
| 523 { |
| 524 |
| 525 @@ -98,12 +132,20 @@ |
| 526 int fullstrip; |
| 527 |
| 528 int havecontclass; // boolean variable |
| 529 +#ifdef HUNSPELL_CHROME_CLIENT |
| 530 + ContClasses contclasses; |
| 531 +#else |
| 532 char contclasses[CONTSIZE]; // flags of possible continuing cl
asses (twofold affix) |
| 533 +#endif |
| 534 |
55 public: | 535 public: |
56 | 536 |
57 - AffixMgr(const char * affpath, HashMgr * ptr); | 537 +#ifdef HUNSPELL_CHROME_CLIENT |
58 + AffixMgr(FILE* aff_handle, HashMgr * ptr); | 538 + AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md); |
| 539 +#else |
| 540 AffixMgr(const char * affpath, HashMgr** ptr, int * md, |
| 541 const char * key = NULL); |
| 542 +#endif |
59 ~AffixMgr(); | 543 ~AffixMgr(); |
60 struct hentry * affix_check(const char * word, int len, | 544 struct hentry * affix_check(const char * word, int len, |
61 const unsigned short needflag = (unsigned short) 0, char in_compoun
d = IN_CPD_NOT); | 545 const unsigned short needflag = (unsigned short) 0, |
62 @@ -179,7 +179,7 @@ | 546 @@ -202,6 +244,10 @@ |
63 int get_checksharps(void); | 547 int get_fullstrip() const; |
64 | 548 |
65 private: | 549 private: |
66 - int parse_file(const char * affpath); | 550 +#ifdef HUNSPELL_CHROME_CLIENT |
67 + int parse_file(FILE* aff_handle); | 551 + // Not owned by us, owned by the Hunspell object. |
68 // int parse_string(char * line, char ** out, const char * name); | 552 + hunspell::BDictReader* bdict_reader; |
69 int parse_flag(char * line, unsigned short * out, const char * name); | 553 +#endif |
70 int parse_num(char * line, int * out, const char * name); | 554 int parse_file(const char * affpath, const char * key); |
| 555 int parse_flag(char * line, unsigned short * out, FileMgr * af); |
| 556 int parse_num(char * line, int * out, FileMgr * af); |
| 557 Index: src/hunspell/htypes.hxx |
| 558 =================================================================== |
| 559 --- src/hunspell/htypes.hxx» (revision 48261) |
| 560 +++ src/hunspell/htypes.hxx» (working copy) |
| 561 @@ -1,6 +1,16 @@ |
| 562 #ifndef _HTYPES_HXX_ |
| 563 #define _HTYPES_HXX_ |
| 564 |
| 565 +#ifdef HUNSPELL_CHROME_CLIENT |
| 566 +// This is a workaround for preventing errors in parsing Turkish BDICs, which |
| 567 +// contain very long AF lines (~ 12,000 chars). |
| 568 +// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse |
| 569 +// longer lines than MAXDELEN. |
| 570 +#define MAXDELEN (8192 * 2) |
| 571 +#else |
| 572 +#define MAXDELEN 8192 |
| 573 +#endif // HUNSPELL_CHROME_CLIENT |
| 574 + |
| 575 #define ROTATE_LEN 5 |
| 576 |
| 577 #define ROTATE(v,q) \ |
71 Index: src/hunspell/hashmgr.cxx | 578 Index: src/hunspell/hashmgr.cxx |
72 =================================================================== | 579 =================================================================== |
73 --- src/hunspell/hashmgr.cxx» (revision 3811) | 580 --- src/hunspell/hashmgr.cxx» (revision 48261) |
74 +++ src/hunspell/hashmgr.cxx (working copy) | 581 +++ src/hunspell/hashmgr.cxx (working copy) |
75 @@ -29,7 +31,7 @@ | 582 @@ -12,8 +12,14 @@ |
76 | 583 |
77 // build a hash table from a munched word list | 584 // build a hash table from a munched word list |
78 | 585 |
79 -HashMgr::HashMgr(const char * tpath, const char * apath) | 586 +#ifdef HUNSPELL_CHROME_CLIENT |
80 +HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) | 587 +HashMgr::HashMgr(hunspell::BDictReader* reader) |
81 { | 588 +{ |
| 589 + bdict_reader = reader; |
| 590 +#else |
| 591 HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) |
| 592 { |
| 593 +#endif |
82 tablesize = 0; | 594 tablesize = 0; |
83 tableptr = NULL; | 595 tableptr = NULL; |
84 @@ -43,8 +45,8 @@ | 596 flag_mode = FLAG_CHAR; |
85 aliasf = NULL; | 597 @@ -31,8 +37,14 @@ |
86 numaliasm = 0; | 598 numaliasm = 0; |
87 aliasm = NULL; | 599 aliasm = NULL; |
88 - load_config(apath); | 600 forbiddenword = FORBIDDENWORD; // forbidden word signing flag |
89 - int ec = load_tables(tpath); | 601 +#ifdef HUNSPELL_CHROME_CLIENT |
90 + load_config(aff_handle); | 602 + // No tables to load, just the AF lines. |
91 + int ec = load_tables(dic_handle); | 603 + load_config(NULL, NULL); |
| 604 + int ec = LoadAFLines(); |
| 605 +#else |
| 606 load_config(apath, key); |
| 607 int ec = load_tables(tpath, key); |
| 608 +#endif |
92 if (ec) { | 609 if (ec) { |
93 /* error condition - what should we do here */ | 610 /* error condition - what should we do here */ |
94 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); | 611 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); |
95 @@ -240,7 +242,7 @@ | 612 @@ -91,15 +103,59 @@ |
| 613 if (ignorechars) free(ignorechars); |
| 614 if (ignorechars_utf16) free(ignorechars_utf16); |
| 615 |
| 616 +#ifdef HUNSPELL_CHROME_CLIENT |
| 617 + EmptyHentryCache(); |
| 618 + for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); |
| 619 + it != pointer_to_strings_.end(); ++it) { |
| 620 + delete *it; |
| 621 + } |
| 622 +#endif |
| 623 + |
| 624 #ifdef MOZILLA_CLIENT |
| 625 delete [] csconv; |
| 626 #endif |
| 627 } |
| 628 |
| 629 +#ifdef HUNSPELL_CHROME_CLIENT |
| 630 +void HashMgr::EmptyHentryCache() { |
| 631 + // We need to delete each cache entry, and each additional one in the linked |
| 632 + // list of homonyms. |
| 633 + for (HEntryCache::iterator i = hentry_cache.begin(); |
| 634 + i != hentry_cache.end(); ++i) { |
| 635 + hentry* cur = i->second; |
| 636 + while (cur) { |
| 637 + hentry* next = cur->next_homonym; |
| 638 + DeleteHashEntry(cur); |
| 639 + cur = next; |
| 640 + } |
| 641 + } |
| 642 + hentry_cache.clear(); |
| 643 +} |
| 644 +#endif |
| 645 + |
| 646 // lookup a root word in the hashtable |
| 647 |
| 648 struct hentry * HashMgr::lookup(const char *word) const |
| 649 { |
| 650 +#ifdef HUNSPELL_CHROME_CLIENT |
| 651 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
| 652 + int affix_count = bdict_reader->FindWord(word, affix_ids); |
| 653 + if (affix_count == 0) { // look for custom added word |
| 654 + std::map<base::StringPiece, int>::const_iterator iter = |
| 655 + custom_word_to_affix_id_map_.find(word); |
| 656 + if (iter != custom_word_to_affix_id_map_.end()) { |
| 657 + affix_count = 1; |
| 658 + affix_ids[0] = iter->second; |
| 659 + } |
| 660 + } |
| 661 + |
| 662 + static const int kMaxWordLen = 128; |
| 663 + static char word_buf[kMaxWordLen]; |
| 664 + // To take account of null-termination, we use upto 127. |
| 665 + strncpy(word_buf, word, kMaxWordLen - 1); |
| 666 + |
| 667 + return AffixIDsToHentry(word_buf, affix_ids, affix_count); |
| 668 +#else |
| 669 struct hentry * dp; |
| 670 if (tableptr) { |
| 671 dp = tableptr[hash(word)]; |
| 672 @@ -109,12 +165,14 @@ |
| 673 } |
| 674 } |
| 675 return NULL; |
| 676 +#endif |
| 677 } |
| 678 |
| 679 // add a word to the hash table (private) |
| 680 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff
, |
| 681 int al, const char * desc, bool onlyupcase) |
| 682 { |
| 683 +#ifndef HUNSPELL_CHROME_CLIENT |
| 684 bool upcasehomonym = false; |
| 685 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; |
| 686 // variable-length hash record with word and optional fields |
| 687 @@ -206,6 +264,17 @@ |
| 688 » if (hp->astr) free(hp->astr); |
| 689 » free(hp); |
| 690 } |
| 691 +#else |
| 692 + std::map<base::StringPiece, int>::iterator iter = |
| 693 + custom_word_to_affix_id_map_.find(word); |
| 694 + if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added |
| 695 + std::string* new_string_word = new std::string(word); |
| 696 + pointer_to_strings_.push_back(new_string_word); |
| 697 + base::StringPiece sp(*(new_string_word)); |
| 698 + custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words |
| 699 + return 1; |
| 700 + } |
| 701 +#endif |
| 702 return 0; |
| 703 } |
| 704 |
| 705 @@ -339,6 +408,43 @@ |
| 706 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); |
| 707 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const |
| 708 { |
| 709 +#ifdef HUNSPELL_CHROME_CLIENT |
| 710 + // Return NULL if dictionary is not valid. |
| 711 + if (!bdict_reader->IsValid()) |
| 712 + return NULL; |
| 713 + |
| 714 + // This function is only ever called by one place and not nested. We can |
| 715 + // therefore keep static state between calls and use |col| as a "reset" flag |
| 716 + // to avoid changing the API. It is set to -1 for the first call. |
| 717 + static hunspell::WordIterator word_iterator = |
| 718 + bdict_reader->GetAllWordIterator(); |
| 719 + if (col < 0) { |
| 720 + col = 1; |
| 721 + word_iterator = bdict_reader->GetAllWordIterator(); |
| 722 + } |
| 723 + |
| 724 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
| 725 + static const int kMaxWordLen = 128; |
| 726 + static char word[kMaxWordLen]; |
| 727 + int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids); |
| 728 + if (affix_count == 0) |
| 729 + return NULL; |
| 730 + short word_len = static_cast<short>(strlen(word)); |
| 731 + |
| 732 + // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct, |
| 733 + // i.e. a struct which uses its array 'word[1]' as a variable-length array. |
| 734 + // As noted above, this function is not nested. So, we just use a static |
| 735 + // struct which consists of an hentry and a char[kMaxWordLen], and initialize |
| 736 + // the static struct and return it for now. |
| 737 + // No need to create linked lists for the extra affixes. |
| 738 + static struct { |
| 739 + hentry entry; |
| 740 + char word[kMaxWordLen]; |
| 741 + } hash_entry; |
| 742 + |
| 743 + return InitHashEntry(&hash_entry.entry, sizeof(hash_entry), |
| 744 + &word[0], word_len, affix_ids[0]); |
| 745 +#else |
| 746 if (hp && hp->next != NULL) return hp->next; |
| 747 for (col++; col < tablesize; col++) { |
| 748 if (tableptr[col]) return tableptr[col]; |
| 749 @@ -346,11 +452,13 @@ |
| 750 // null at end and reset to start |
| 751 col = -1; |
| 752 return NULL; |
| 753 +#endif |
96 } | 754 } |
97 | 755 |
98 // load a munched word list and build a hash table on the fly | 756 // load a munched word list and build a hash table on the fly |
99 -int HashMgr::load_tables(const char * tpath) | 757 int HashMgr::load_tables(const char * tpath, const char * key) |
100 +int HashMgr::load_tables(FILE* t_handle) | 758 { |
101 { | 759 +#ifndef HUNSPELL_CHROME_CLIENT |
102 int wl, al; | 760 int al; |
103 char * ap; | 761 char * ap; |
104 @@ -248,8 +250,9 @@ | 762 char * dp; |
105 unsigned short * flags; | 763 @@ -470,6 +578,7 @@ |
106 | 764 } |
107 // raw dictionary - munched file | 765 |
108 - FILE * rawdict = fopen(tpath, "r"); | 766 delete dict; |
109 + FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); | 767 +#endif |
110 if (rawdict == NULL) return 1; | 768 return 0; |
111 + fseek(rawdict, 0, SEEK_SET); | 769 } |
112 | 770 |
113 // first read the first line of file to get hash table size */ | 771 @@ -478,6 +587,9 @@ |
114 char ts[MAXDELEN]; | 772 |
115 @@ -442,7 +445,7 @@ | 773 int HashMgr::hash(const char * word) const |
116 } | 774 { |
117 | 775 +#ifdef HUNSPELL_CHROME_CLIENT |
118 // read in aff file and set flag mode | 776 + return 0; |
119 -int HashMgr::load_config(const char * affpath) | 777 +#else |
120 +int HashMgr::load_config(FILE* aff_handle) | 778 long hv = 0; |
121 { | 779 for (int i=0; i < 4 && *word != 0; i++) |
| 780 hv = (hv << 8) | (*word++); |
| 781 @@ -486,6 +598,7 @@ |
| 782 hv ^= (*word++); |
| 783 } |
| 784 return (unsigned long) hv % tablesize; |
| 785 +#endif |
| 786 } |
| 787 |
| 788 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
{ |
| 789 @@ -607,7 +720,12 @@ |
122 int firstline = 1; | 790 int firstline = 1; |
123 | |
124 @@ -451,11 +454,12 @@ | |
125 | 791 |
126 // open the affix file | 792 // open the affix file |
127 FILE * afflst; | 793 +#ifdef HUNSPELL_CHROME_CLIENT |
128 - afflst = fopen(affpath,"r"); | 794 + hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); |
129 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); | 795 + FileMgr * afflst = new FileMgr(&iterator); |
| 796 +#else |
| 797 FileMgr * afflst = new FileMgr(affpath, key); |
| 798 +#endif |
130 if (!afflst) { | 799 if (!afflst) { |
131 - HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\
n",affpath); | 800 HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\
n",affpath); |
132 + HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n")
; | |
133 return 1; | 801 return 1; |
134 } | 802 @@ -802,6 +920,121 @@ |
135 + fseek(afflst, 0, SEEK_SET); | 803 return 0; |
136 | 804 } |
137 // read in each line ignoring any that do not | 805 |
138 // start with a known line type indicator | 806 +#ifdef HUNSPELL_CHROME_CLIENT |
| 807 +int HashMgr::LoadAFLines() |
| 808 +{ |
| 809 + utf8 = 1; // We always use UTF-8. |
| 810 + |
| 811 + // Read in all the AF lines which tell us the rules for each affix group ID. |
| 812 + hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator(); |
| 813 + FileMgr afflst(&iterator); |
| 814 + while (char* line = afflst.getline()) { |
| 815 + int rv = parse_aliasf(line, &afflst); |
| 816 + if (rv) |
| 817 + return rv; |
| 818 + } |
| 819 + |
| 820 + return 0; |
| 821 +} |
| 822 + |
| 823 +hentry* HashMgr::InitHashEntry(hentry* entry, |
| 824 + size_t item_size, |
| 825 + const char* word, |
| 826 + int word_length, |
| 827 + int affix_index) const { |
| 828 + // Return if the given buffer doesn't have enough space for a hentry struct |
| 829 + // or the given word is too long. |
| 830 + // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is |
| 831 + // better to return an error if the given word is too long and prevent |
| 832 + // an unexpected result caused by a long word. |
| 833 + const int kMaxWordLen = 128; |
| 834 + if (item_size < sizeof(hentry) + word_length + 1 || |
| 835 + word_length >= kMaxWordLen) |
| 836 + return NULL; |
| 837 + |
| 838 + // Initialize a hentry struct with the given parameters, and |
| 839 + // append the given string at the end of this hentry struct. |
| 840 + memset(entry, 0, item_size); |
| 841 + FileMgr af(NULL); |
| 842 + entry->alen = static_cast<short>( |
| 843 + const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af)); |
| 844 + entry->blen = static_cast<unsigned char>(word_length); |
| 845 + memcpy(&entry->word, word, word_length); |
| 846 + |
| 847 + return entry; |
| 848 +} |
| 849 + |
| 850 +hentry* HashMgr::CreateHashEntry(const char* word, |
| 851 + int word_length, |
| 852 + int affix_index) const { |
| 853 + // Return if the given word is too long. |
| 854 + // (See the comment in HashMgr::InitHashEntry().) |
| 855 + const int kMaxWordLen = 128; |
| 856 + if (word_length >= kMaxWordLen) |
| 857 + return NULL; |
| 858 + |
| 859 + const size_t kEntrySize = sizeof(hentry) + word_length + 1; |
| 860 + struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize)); |
| 861 + if (entry) |
| 862 + InitHashEntry(entry, kEntrySize, word, word_length, affix_index); |
| 863 + |
| 864 + return entry; |
| 865 +} |
| 866 + |
| 867 +void HashMgr::DeleteHashEntry(hentry* entry) const { |
| 868 + free(entry); |
| 869 +} |
| 870 + |
| 871 +hentry* HashMgr::AffixIDsToHentry(char* word, |
| 872 + int* affix_ids, |
| 873 + int affix_count) const |
| 874 +{ |
| 875 + if (affix_count == 0) |
| 876 + return NULL; |
| 877 + |
| 878 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; |
| 879 + std::string std_word(word); |
| 880 + HEntryCache::iterator found = cache.find(std_word); |
| 881 + if (found != cache.end()) { |
| 882 + // We must return an existing hentry for the same word if we've previously |
| 883 + // handed one out. Hunspell will compare pointers in some cases to see if |
| 884 + // two words it has found are the same. |
| 885 + return found->second; |
| 886 + } |
| 887 + |
| 888 + short word_len = static_cast<short>(strlen(word)); |
| 889 + |
| 890 + // We can get a number of prefixes per word. There will normally be only one, |
| 891 + // but if not, there will be a linked list of "hentry"s for the "homonym"s |
| 892 + // for the word. |
| 893 + struct hentry* first_he = NULL; |
| 894 + struct hentry* prev_he = NULL; // For making linked list. |
| 895 + for (int i = 0; i < affix_count; i++) { |
| 896 + struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]); |
| 897 + if (!he) |
| 898 + break; |
| 899 + if (i == 0) |
| 900 + first_he = he; |
| 901 + if (prev_he) |
| 902 + prev_he->next_homonym = he; |
| 903 + prev_he = he; |
| 904 + } |
| 905 + |
| 906 + cache[std_word] = first_he; // Save this word in the cache for later. |
| 907 + return first_he; |
| 908 +} |
| 909 + |
| 910 +hentry* HashMgr::GetHentryFromHEntryCache(char* word) { |
| 911 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; |
| 912 + std::string std_word(word); |
| 913 + HEntryCache::iterator found = cache.find(std_word); |
| 914 + if (found != cache.end()) |
| 915 + return found->second; |
| 916 + else |
| 917 + return NULL; |
| 918 +} |
| 919 +#endif |
| 920 + |
| 921 int HashMgr::is_aliasf() { |
| 922 return (aliasf != NULL); |
| 923 } |
| 924 Index: src/hunspell/hunspell.cxx |
| 925 =================================================================== |
| 926 --- src/hunspell/hunspell.cxx» (revision 48261) |
| 927 +++ src/hunspell/hunspell.cxx» (working copy) |
| 928 @@ -7,18 +7,35 @@ |
| 929 |
| 930 #include "hunspell.hxx" |
| 931 #include "hunspell.h" |
| 932 +#ifndef HUNSPELL_CHROME_CLIENT |
| 933 #include "config.h" |
| 934 +#endif |
| 935 #include "csutil.hxx" |
| 936 |
| 937 +#ifdef HUNSPELL_CHROME_CLIENT |
| 938 +Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length) |
| 939 +#else |
| 940 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) |
| 941 +#endif |
| 942 { |
| 943 encoding = NULL; |
| 944 csconv = NULL; |
| 945 utf8 = 0; |
| 946 complexprefixes = 0; |
| 947 +#ifndef HUNSPELL_CHROME_CLIENT |
| 948 affixpath = mystrdup(affpath); |
| 949 +#endif |
| 950 maxdic = 0; |
| 951 |
| 952 +#ifdef HUNSPELL_CHROME_CLIENT |
| 953 + bdict_reader = new hunspell::BDictReader; |
| 954 + bdict_reader->Init(bdict_data, bdict_length); |
| 955 + |
| 956 + pHMgr[0] = new HashMgr(bdict_reader); |
| 957 + if (pHMgr[0]) maxdic = 1; |
| 958 + |
| 959 + pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic); |
| 960 +#else |
| 961 /* first set up the hash manager */ |
| 962 pHMgr[0] = new HashMgr(dpath, affpath, key); |
| 963 if (pHMgr[0]) maxdic = 1; |
| 964 @@ -26,6 +43,7 @@ |
| 965 /* next set up the affix manager */ |
| 966 /* it needs access to the hash manager lookup methods */ |
| 967 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); |
| 968 +#endif |
| 969 |
| 970 /* get the preferred try string and the dictionary */ |
| 971 /* encoding from the Affix Manager for that dictionary */ |
| 972 @@ -56,10 +74,17 @@ |
| 973 csconv= NULL; |
| 974 if (encoding) free(encoding); |
| 975 encoding = NULL; |
| 976 + |
| 977 +#ifdef HUNSPELL_CHROME_CLIENT |
| 978 + if (bdict_reader) delete bdict_reader; |
| 979 + bdict_reader = NULL; |
| 980 +#else |
| 981 if (affixpath) free(affixpath); |
| 982 affixpath = NULL; |
| 983 +#endif |
| 984 } |
| 985 |
| 986 +#ifndef HUNSPELL_CHROME_CLIENT |
| 987 // load extra dictionaries |
| 988 int Hunspell::add_dic(const char * dpath, const char * key) { |
| 989 if (maxdic == MAXDIC || !affixpath) return 1; |
| 990 @@ -67,6 +92,7 @@ |
| 991 if (pHMgr[maxdic]) maxdic++; else return 1; |
| 992 return 0; |
| 993 } |
| 994 +#endif |
| 995 |
| 996 // make a copy of src at destination while removing all leading |
| 997 // blanks and removing any trailing periods after recording |
| 998 @@ -319,6 +345,9 @@ |
| 999 |
| 1000 int Hunspell::spell(const char * word, int * info, char ** root) |
| 1001 { |
| 1002 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1003 + if (pHMgr) pHMgr[0]->EmptyHentryCache(); |
| 1004 +#endif |
| 1005 struct hentry * rv=NULL; |
| 1006 // need larger vector. For example, Turkish capital letter I converted a |
| 1007 // 2-byte UTF-8 character (dotless i) by mkallsmall. |
| 1008 @@ -567,6 +596,13 @@ |
| 1009 word = w2; |
| 1010 } else word = w; |
| 1011 |
| 1012 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1013 + // We need to check the word length if its valid to make coverity (Event |
| 1014 + // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy. |
| 1015 + if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAX
WORDLEN)) |
| 1016 + return NULL; |
| 1017 +#endif |
| 1018 + |
| 1019 // word reversing wrapper for complex prefixes |
| 1020 if (complexprefixes) { |
| 1021 if (word != w2) { |
| 1022 @@ -657,6 +693,9 @@ |
| 1023 |
| 1024 int Hunspell::suggest(char*** slst, const char * word) |
| 1025 { |
| 1026 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1027 + if (pHMgr) pHMgr[0]->EmptyHentryCache(); |
| 1028 +#endif |
| 1029 int onlycmpdsug = 0; |
| 1030 char cw[MAXWORDUTF8LEN]; |
| 1031 char wspace[MAXWORDUTF8LEN]; |
| 1032 @@ -1874,13 +1913,21 @@ |
| 1033 |
| 1034 Hunhandle *Hunspell_create(const char * affpath, const char * dpath) |
| 1035 { |
| 1036 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1037 + return NULL; |
| 1038 +#else |
| 1039 return (Hunhandle*)(new Hunspell(affpath, dpath)); |
| 1040 +#endif |
| 1041 } |
| 1042 |
| 1043 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, |
| 1044 const char * key) |
| 1045 { |
| 1046 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1047 + return NULL; |
| 1048 +#else |
| 1049 return (Hunhandle*)(new Hunspell(affpath, dpath, key)); |
| 1050 +#endif |
| 1051 } |
| 1052 |
| 1053 void Hunspell_destroy(Hunhandle *pHunspell) |
139 Index: src/hunspell/hashmgr.hxx | 1054 Index: src/hunspell/hashmgr.hxx |
140 =================================================================== | 1055 =================================================================== |
141 --- src/hunspell/hashmgr.hxx» (revision 3811) | 1056 --- src/hunspell/hashmgr.hxx» (revision 48261) |
142 +++ src/hunspell/hashmgr.hxx (working copy) | 1057 +++ src/hunspell/hashmgr.hxx (working copy) |
143 @@ -25,7 +25,7 @@ | 1058 @@ -8,10 +8,25 @@ |
| 1059 #include "htypes.hxx" |
| 1060 #include "filemgr.hxx" |
| 1061 |
| 1062 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1063 +#include <string> |
| 1064 +#include <map> |
| 1065 + |
| 1066 +#include "base/stl_util-inl.h" |
| 1067 +#include "base/string_piece.h" |
| 1068 +#include "third_party/hunspell/google/bdict_reader.h" |
| 1069 +#endif |
| 1070 + |
| 1071 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; |
| 1072 |
| 1073 class LIBHUNSPELL_DLL_EXPORTED HashMgr |
| 1074 { |
| 1075 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1076 + // Not owned by this class, owned by the Hunspell object. |
| 1077 + hunspell::BDictReader* bdict_reader; |
| 1078 + std::map<base::StringPiece, int> custom_word_to_affix_id_map_; |
| 1079 + std::vector<std::string*> pointer_to_strings_; |
| 1080 +#endif |
| 1081 int tablesize; |
| 1082 struct hentry ** tableptr; |
| 1083 int userword; |
| 1084 @@ -34,7 +49,23 @@ |
144 | 1085 |
145 | 1086 |
146 public: | 1087 public: |
147 - HashMgr(const char * tpath, const char * apath); | 1088 +#ifdef HUNSPELL_CHROME_CLIENT |
148 + HashMgr(FILE* t_handle, FILE* a_handle); | 1089 + HashMgr(hunspell::BDictReader* reader); |
| 1090 + |
| 1091 + // Return the hentry corresponding to the given word. Returns NULL if the |
| 1092 + // word is not there in the cache. |
| 1093 + hentry* GetHentryFromHEntryCache(char* word); |
| 1094 + |
| 1095 + // Called before we do a new operation. This will empty the cache of pointers |
| 1096 + // to hentries that we have cached. In Chrome, we make these on-demand, but |
| 1097 + // they must live as long as the single spellcheck operation that they're par
t |
| 1098 + // of since Hunspell will save pointers to various ones as it works. |
| 1099 + // |
| 1100 + // This function allows that cache to be emptied and not grow infinitely. |
| 1101 + void EmptyHentryCache(); |
| 1102 +#else |
| 1103 HashMgr(const char * tpath, const char * apath, const char * key = NULL); |
| 1104 +#endif |
149 ~HashMgr(); | 1105 ~HashMgr(); |
150 | 1106 |
151 struct hentry * lookup(const char *) const; | 1107 struct hentry * lookup(const char *) const; |
152 @@ -46,9 +46,9 @@ | 1108 @@ -59,6 +90,40 @@ |
153 | 1109 int al, const char * desc, bool onlyupcase); |
154 | 1110 int load_config(const char * affpath, const char * key); |
155 private: | 1111 int parse_aliasf(char * line, FileMgr * af); |
156 - int load_tables(const char * tpath); | 1112 + |
157 + int load_tables(FILE* t_handle); | 1113 +#ifdef HUNSPELL_CHROME_CLIENT |
158 int add_word(const char * word, int wl, unsigned short * ap, int al, const ch
ar * desc); | 1114 + // Loads the AF lines from a BDICT. |
159 - int load_config(const char * affpath); | 1115 + // A BDICT file compresses its AF lines to save memory. |
160 + int load_config(FILE* aff_handle); | 1116 + // This function decompresses each AF line and call parse_aliasf(). |
161 int parse_aliasf(char * line, FILE * af); | 1117 + int LoadAFLines(); |
162 #ifdef HUNSPELL_EXPERIMENTAL | 1118 + |
163 int parse_aliasm(char * line, FILE * af); | 1119 + // Helper functions that create a new hentry struct, initialize it, and |
164 Index: src/hunspell/hunspell.cxx | 1120 + // delete it. |
165 =================================================================== | 1121 + // These functions encapsulate non-trivial operations in creating and |
166 --- src/hunspell/hunspell.cxx» (revision 3811) | 1122 + // initializing a hentry struct from BDICT data to avoid changing code so muc
h |
167 +++ src/hunspell/hunspell.cxx» (working copy) | 1123 + // even when a hentry struct is changed. |
168 @@ -20,7 +20,7 @@ | 1124 + hentry* InitHashEntry(hentry* entry, |
169 #endif | 1125 + size_t item_size, |
170 #endif | 1126 + const char* word, |
171 | 1127 + int word_length, |
172 -Hunspell::Hunspell(const char * affpath, const char * dpath) | 1128 + int affix_index) const; |
173 +Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle) | 1129 + hentry* CreateHashEntry(const char* word, |
174 { | 1130 + int word_length, |
175 encoding = NULL; | 1131 + int affix_index) const; |
176 csconv = NULL; | 1132 + void DeleteHashEntry(hentry* entry) const; |
177 @@ -28,11 +28,11 @@ | 1133 + |
178 complexprefixes = 0; | 1134 + // Converts the list of affix IDs to a linked list of hentry structures. The |
179 | 1135 + // hentry structures will point to the given word. The returned pointer will |
180 /* first set up the hash manager */ | 1136 + // be a statically allocated variable that will change for the next call. The |
181 - pHMgr = new HashMgr(dpath, affpath); | 1137 + // |word| buffer must be the same. |
182 + pHMgr = new HashMgr(dic_handle, aff_handle); | 1138 + hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const; |
183 | 1139 + |
184 /* next set up the affix manager */ | 1140 + // See EmptyHentryCache above. Note that each one is actually a linked list |
185 /* it needs access to the hash manager lookup methods */ | 1141 + // followed by the homonym pointer. |
186 - pAMgr = new AffixMgr(affpath,pHMgr); | 1142 + typedef std::map<std::string, hentry*> HEntryCache; |
187 + pAMgr = new AffixMgr(aff_handle, pHMgr); | 1143 + HEntryCache hentry_cache; |
188 | 1144 +#endif |
189 /* get the preferred try string and the dictionary */ | 1145 + |
190 /* encoding from the Affix Manager for that dictionary */ | 1146 int add_hidden_capitalized_word(char * word, int wbl, int wcl, |
191 @@ -1694,9 +1694,9 @@ | 1147 unsigned short * flags, int al, char * dp, int captype); |
192 | 1148 int parse_aliasm(char * line, FileMgr * af); |
193 #endif // END OF HUNSPELL_EXPERIMENTAL CODE | |
194 | |
195 -Hunhandle *Hunspell_create(const char * affpath, const char * dpath) | |
196 +Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle) | |
197 { | |
198 - return (Hunhandle*)(new Hunspell(affpath, dpath)); | |
199 + return (Hunhandle*)(new Hunspell(aff_handle, dic_handle)); | |
200 } | |
201 | |
202 void Hunspell_destroy(Hunhandle *pHunspell) | |
203 Index: src/hunspell/hunspell.hxx | 1149 Index: src/hunspell/hunspell.hxx |
204 =================================================================== | 1150 =================================================================== |
205 --- src/hunspell/hunspell.hxx» (revision 3811) | 1151 --- src/hunspell/hunspell.hxx» (revision 48261) |
206 +++ src/hunspell/hunspell.hxx (working copy) | 1152 +++ src/hunspell/hunspell.hxx (working copy) |
207 @@ -48,7 +48,7 @@ | 1153 @@ -5,6 +5,10 @@ |
| 1154 #include "suggestmgr.hxx" |
| 1155 #include "langnum.hxx" |
| 1156 |
| 1157 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1158 +#include "third_party/hunspell/google/bdict_reader.h" |
| 1159 +#endif |
| 1160 + |
| 1161 #define SPELL_COMPOUND (1 << 0) |
| 1162 #define SPELL_FORBIDDEN (1 << 1) |
| 1163 #define SPELL_ALLCAP (1 << 2) |
| 1164 @@ -26,7 +30,9 @@ |
| 1165 HashMgr* pHMgr[MAXDIC]; |
| 1166 int maxdic; |
| 1167 SuggestMgr* pSMgr; |
| 1168 +#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead. |
| 1169 char * affixpath; |
| 1170 +#endif |
| 1171 char * encoding; |
| 1172 struct cs_info * csconv; |
| 1173 int langnum; |
| 1174 @@ -34,17 +40,28 @@ |
| 1175 int complexprefixes; |
| 1176 char** wordbreak; |
| 1177 |
| 1178 +#ifdef HUNSPELL_CHROME_CLIENT |
| 1179 + // Not owned by us, owned by the Hunspell object. |
| 1180 + hunspell::BDictReader* bdict_reader; |
| 1181 +#endif |
| 1182 + |
| 1183 public: |
| 1184 |
| 1185 /* Hunspell(aff, dic) - constructor of Hunspell class |
208 * input: path of affix file and dictionary file | 1186 * input: path of affix file and dictionary file |
209 */ | 1187 */ |
210 | 1188 |
211 - Hunspell(const char * affpath, const char * dpath); | 1189 +#ifdef HUNSPELL_CHROME_CLIENT |
212 + Hunspell(FILE* aff_handle, FILE* dic_handle); | 1190 + Hunspell(const unsigned char* bdict_data, size_t bdict_length); |
| 1191 +#else |
| 1192 Hunspell(const char * affpath, const char * dpath, const char * key = NULL); |
| 1193 +#endif |
| 1194 ~Hunspell(); |
| 1195 |
| 1196 +#ifndef HUNSPELL_CHROME_CLIENT |
| 1197 /* load extra dictionaries (only dic files) */ |
| 1198 int add_dic(const char * dpath, const char * key = NULL); |
| 1199 +#endif |
| 1200 |
| 1201 /* spell(word) - spellcheck word |
| 1202 * output: 0 = bad word, not 0 = good word |
| 1203 Index: src/hunspell/license.hunspell |
| 1204 =================================================================== |
| 1205 --- src/hunspell/license.hunspell» (revision 48261) |
| 1206 +++ src/hunspell/license.hunspell» (working copy) |
| 1207 @@ -56,4 +56,6 @@ |
| 1208 * |
| 1209 * ***** END LICENSE BLOCK ***** */ |
| 1210 |
| 1211 +#ifndef HUNSPELL_CHROME_CLIENT |
| 1212 #include "config.h" |
| 1213 +#endif |
OLD | NEW |