third_party/hunspell/google.patch - Issue 2239005: Merges our hunspell change to hunspell 1.2.10....

Side by Side Diff: third_party/hunspell/google.patch

Issue 2239005: Merges our hunspell change to hunspell 1.2.10.... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/deps/

Patch Set: '' Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	1 Index: README.chromium

	2 ===================================================================

	3 --- README.chromium (revision 48261)

	4 +++ README.chromium (working copy)

	5 @@ -1,29 +1,15 @@

	6 -This is a partial copy of Hunspell 1.1.5, with the following changes:

	7 -* '#include "config.h"' removed from src/hunspell/hunspell.hxx

	8 -* '#include "config.h"' removed from src/hunspell/license.hunspell

	9 -* Two unreferenced local variables removed from src/hunspell/suggestmgr.cxx

	10 -* src/hunspell/utf_info.cxx moved to src/hunspell/utf_info.hxx, and #include

	11 - reference in src/hunspell/csutil.cxx changed accordingly

	12 -* Change the input params of the constructors to receive a FILE* instead of

	13 - a file path. This is required to use hunspell in the sandbox.

	14 - The patch is in google.patch.

	15 +This is a partial copy of Hunspell 1.2.10 with the following changes:

	16 +* Remove '#include "config.h"' from src/hunspell/hunspell.hxx

	17 +* Remove '#include "config.h"' from src/hunspell/license.hunspell

	18 +* Change src/hunspell/filemgr.hxx and src/hunspell/filemgr.cxx to use

	19 + LineIterator.

	20 +* Add ScropedHashEntry, which creates temporary hentry objects, to

	21 + src/hunspell/suggestmgr.cxx

	22 +* Change the input params of the constructors to receive a BDICTReader instead

	23 + of a file path.

	24 +The patch is in google.patch.

	25

	26 -The English dictionary distributed by Firefox has been checked in to the

	27 -dictionaries directory. It has several additions over the default

	28 -myspell/hunspell dictionary.

	29 -

	30 -* Workaround for non-ASCII characters

	31 -

	32 -Visual Studio on Japanese Windows assumes the source files to be

	33 -encoded in Shift_JIS. The compiler is unhappy with non-ASCII letters

	34 -in the source files of Hunspell. The same problem happens with other

	35 -CJK Windows as well. Here is the workaround for this problem:

	36 -

	37 -Convert 8-bit bytes to hexadecimal escaped forms by

	38 -

	39 - % perl -i -De 's/([\x80-\xff])/sprintf("\\x%02x", $1)/ge' src/*.cxx

	40 -

	41 -

	42 -Note that Hunspell upstream is going to fix this problem. We'll no

	43 -longer need the workaround if the problem is fixed in the upstream.

	44 -

	45 +All dictionaries used by Chromium has been checked in to the

	46 +'third_party/hunspell_dictionaries' directory. They have several additions over

	47 +the default myspell/hunspell dictionaries.

	48 +(See 'third_party/hunspell_dictionaries/README.chromium' for their details.)

	49 Index: src/hunspell/filemgr.cxx

	50 ===================================================================

	51 --- src/hunspell/filemgr.cxx (revision 48261)

	52 +++ src/hunspell/filemgr.cxx (working copy)

	53 @@ -7,6 +7,32 @@

	54

	55 #include "filemgr.hxx"

	56

	57 +#ifdef HUNSPELL_CHROME_CLIENT

	58 +#include "third_party/hunspell/google/bdict_reader.h"

	59 +

	60 +FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) {

	61 +}

	62 +

	63 +FileMgr::~FileMgr() {

	64 +}

	65 +

	66 +char * FileMgr::getline() {

	67 + // Read one line from a BDICT file and store the line to our line buffer.

	68 + // To emulate the original FileMgr::getline(), this function returns

	69 + // the pointer to our line buffer if we can read a line without errors.

	70 + // Otherwise, this function returns NULL.

	71 + bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1);

	72 + return result ? line_ : NULL;

	73 +}

	74 +

	75 +int FileMgr::getlinenum() {

	76 + // This function is used only for displaying a line number that causes a

	77 + // parser error. For a BDICT file, providing a line number doesn't help

	78 + // identifying the place where causes a parser error so much since it is a

	79 + // binary file. So, we just return 0.

	80 + return 0;

	81 +}

	82 +#else

	83 int FileMgr::fail(const char * err, const char * par) {

	84 fprintf(stderr, err, par);

	85 return -1;

	86 @@ -47,3 +73,4 @@

	87 int FileMgr::getlinenum() {

	88 return linenum;

	89 }

	90 +#endif

	91 Index: src/hunspell/suggestmgr.cxx

	92 ===================================================================

	93 --- src/hunspell/suggestmgr.cxx (revision 48261)

	94 +++ src/hunspell/suggestmgr.cxx (working copy)

	95 @@ -12,6 +12,99 @@

	96

	97 const w_char W_VLINE = { '\0', '\|' };

	98

	99 +#ifdef HUNSPELL_CHROME_CLIENT

	100 +namespace {

	101 +// A simmple class which creates temporary hentry objects which can be

	102 +// available only in a scope. To conceal memory operations from SuggestMgr

	103 +// functions, this object automatically deletes all hentry objects created

	104 +// through CreateScopedHashEntry() calls in its destructor.

	105 +// So, the following snippet raises a memory error.

	106 +//

	107 +// hentry* bad_copy = NULL;

	108 +// {

	109 +// ScopedHashEntryFactory factory;

	110 +// hentry* scoped_copy = factory.CreateHashEntry(source);

	111 +// ...

	112 +// bad_copy = scoped_copy;

	113 +// }

	114 +// if (bad_copy->word[0]) // memory for scoped_copy has been deleted!

	115 +//

	116 +// As listed in the above snippet, it is simple to use this class.

	117 +// 1. Declare an instance of this ScopedHashEntryFactory, and;

	118 +// 2. Call its CreateHashEntry() member instead of using 'new hentry' or

	119 +// 'operator='.

	120 +//

	121 +// TODO(hbono): this implementation is slower than the previous one of brettw.

	122 +// We need to improve it?

	123 +//

	124 +class ScopedHashEntryFactory {

	125 + public:

	126 + ScopedHashEntryFactory();

	127 + ~ScopedHashEntryFactory();

	128 +

	129 + // Creates a temporary copy of the given hentry struct.

	130 + // The returned copy is available only while this object is available.

	131 + // NOTE: this function just calls memcpy() in creating a copy of the given

	132 + // hentry struct, i.e. it does NOT copy objects referred by pointers of the

	133 + // given hentry struct.

	134 + hentry* CreateScopedHashEntry(int index, const hentry* source);

	135 +

	136 + private:

	137 + // A struct which encapsulate the new hentry struct used by hunspell 1.2.8.

	138 + // The hentry struct used by hunspell 1.2.8 becomes a variable-length struct,

	139 + // i.e. it uses its 'word[1]' array member as a variable-length array.

	140 + // C/C++ doesn't check boundaries of a char array. For example, for a char

	141 + // array 'char word[1]', we cannot only access 'word[0]' but also access

	142 + // 'word[1]', 'word[2]', etc.

	143 + // To handle this new hentry struct, this we define a struct which combines

	144 + // three values (an hentry struct 'hentry', a char array 'word[kMaxWordLen]',

	145 + // and an unsigned short value 'astr') so that a HashEntryItem 'hash_item'

	146 + // satisfies the following equations:

	147 + // hash_item.entry->word[1] == hash_item->word[0].

	148 + // hash_item.entry->word[2] == hash_item->word[1].

	149 + // ...

	150 + // hash_item.entry->word[n] == hash_item->word[n-1].

	151 + // ...

	152 + // hash_item.entry->word[kMaxWordLen] == hash_item->word[kMaxWordLen-1].

	153 + enum {

	154 + kMaxWordLen = 128,

	155 + };

	156 + struct HashEntryItem {

	157 + hentry entry;

	158 + char word[kMaxWordLen];

	159 + unsigned short astr;

	160 + };

	161 +

	162 + HashEntryItem hash_items_[MAX_ROOTS];

	163 +};

	164 +

	165 +ScopedHashEntryFactory::ScopedHashEntryFactory() {

	166 + memset(&hash_items_[0], 0, sizeof(hash_items_));

	167 +}

	168 +

	169 +ScopedHashEntryFactory::~ScopedHashEntryFactory() {

	170 +}

	171 +

	172 +hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index,

	173 + const hentry* source) {

	174 + if (index >= MAX_ROOTS \|\| source->blen >= kMaxWordLen)

	175 + return NULL;

	176 +

	177 + // Retrieve a HashEntryItem struct from our spool, initialize it, and

	178 + // returns the address of its 'hentry' member.

	179 + size_t source_size = sizeof(hentry) + source->blen + 1;

	180 + HashEntryItem* hash_item = &hash_items_[index];

	181 + memcpy(&hash_item->entry, source, source_size);

	182 + if (source->astr) {

	183 + hash_item->astr = *source->astr;

	184 + hash_item->entry.astr = &hash_item->astr;

	185 + }

	186 + return &hash_item->entry;

	187 +}

	188 +

	189 +} // namespace

	190 +#endif

	191 +

	192 SuggestMgr::SuggestMgr(const char * tryme, int maxn,

	193 AffixMgr * aptr)

	194 {

	195 @@ -1029,6 +1122,11 @@

	196

	197 struct hentry* hp = NULL;

	198 int col = -1;

	199 +

	200 +#ifdef HUNSPELL_CHROME_CLIENT

	201 + ScopedHashEntryFactory hash_entry_factory;

	202 +#endif

	203 +

	204 phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;

	205 char target[MAXSWUTF8L];

	206 char candidate[MAXSWUTF8L];

	207 @@ -1066,7 +1164,11 @@

	208

	209 if (sc > scores[lp]) {

	210 scores[lp] = sc;

	211 +#ifdef HUNSPELL_CHROME_CLIENT

	212 + roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp);

	213 +#else

	214 roots[lp] = hp;

	215 +#endif

	216 lval = sc;

	217 for (j=0; j < MAX_ROOTS; j++)

	218 if (scores[j] < lval) {

	219 Index: src/hunspell/replist.hxx

	220 ===================================================================

	221 --- src/hunspell/replist.hxx (revision 48261)

	222 +++ src/hunspell/replist.hxx (working copy)

	223 @@ -2,6 +2,12 @@

	224 #ifndef _REPLIST_HXX_

	225 #define _REPLIST_HXX_

	226

	227 +#ifdef HUNSPELL_CHROME_CLIENT

	228 +// Compilation issues in spellchecker.cc think near is a macro, therefore

	229 +// removing it here solves that problem.

	230 +#undef near

	231 +#endif

	232 +

	233 #include "hunvisapi.h"

	234

	235 #include "w_char.hxx"

	236 Index: src/hunspell/filemgr.hxx

	237 ===================================================================

	238 --- src/hunspell/filemgr.hxx (revision 48261)

	239 +++ src/hunspell/filemgr.hxx (working copy)

	240 @@ -7,6 +7,30 @@

	241 #include "hunzip.hxx"

	242 #include <stdio.h>

	243

	244 +#ifdef HUNSPELL_CHROME_CLIENT

	245 +namespace hunspell {

	246 +class LineIterator;

	247 +} // namespace hunspell

	248 +

	249 +// A class which encapsulates operations of reading a BDICT file.

	250 +// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is

	251 +// a binary file converted from a DIC file and an AFF file. (See

	252 +// "bdict_reader.h" for its format.)

	253 +// This class encapsulates the operations of reading a BDICT file and emulates

	254 +// the original FileMgr operations for AffixMgr so that it can read a BDICT

	255 +// file without so many changes.

	256 +class FileMgr {

	257 + public:

	258 + FileMgr(hunspell::LineIterator* iterator);

	259 + ~FileMgr();

	260 + char* getline();

	261 + int getlinenum();

	262 +

	263 + protected:

	264 + hunspell::LineIterator* iterator_;

	265 + char line_[BUFSIZE + 50]; // input buffer

	266 +};

	267 +#else

	268 class LIBHUNSPELL_DLL_EXPORTED FileMgr

	269 {

	270 protected:

	271 @@ -23,3 +47,4 @@

	272 int getlinenum();

	273 };

	274 #endif

	275 +#endif

1 Index: src/hunspell/affixmgr.cxx	276 Index: src/hunspell/affixmgr.cxx

2 ===================================================================	277 ===================================================================

3 --- src/hunspell/affixmgr.cxx» (revision 3811)	278 --- src/hunspell/affixmgr.cxx» (revision 48261)

4 +++ src/hunspell/affixmgr.cxx (working copy)	279 +++ src/hunspell/affixmgr.cxx (working copy)

5 @@ -25,7 +27,7 @@	280 @@ -14,8 +14,14 @@

6 #endif	281

7 #endif	282 #include "csutil.hxx"

8	283

9 -AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)	284 +#ifdef HUNSPELL_CHROME_CLIENT

10 +AffixMgr::AffixMgr(FILE* aff_handle, HashMgr* ptr)	285 +AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md)

11 {	286 +{

	287 + bdict_reader = reader;

	288 +#else

	289 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)

	290 {

	291 +#endif

12 // register hash manager and load affix data from aff file	292 // register hash manager and load affix data from aff file

13 pHMgr = ptr;	293 pHMgr = ptr[0];

14 @@ -104,8 +106,8 @@	294 alldic = ptr;

	295 @@ -99,9 +105,17 @@

	296 sFlag[i] = NULL;

	297 }

	298

	299 +#ifdef HUNSPELL_CHROME_CLIENT

	300 + // Define dummy parameters for parse_file() to avoid changing the parameters

	301 + // of parse_file(). This may make it easier to merge the changes of the

	302 + // original hunspell.

	303 + const char* affpath = NULL;

	304 + const char* key = NULL;

	305 +#else

	306 for (int j=0; j < CONTSIZE; j++) {

15 contclasses[j] = 0;	307 contclasses[j] = 0;

16 }	308 }

17	309 +#endif

18 - if (parse_file(affpath)) {	310

19 - HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);	311 if (parse_file(affpath, key)) {

20 + if (parse_file(aff_handle)) {	312 HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);

21 + HUNSPELL_WARNING(stderr, "Failure loading aff file\n");	313 @@ -252,6 +266,43 @@

22 wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM ");	314 char * line; // io buffers

23 }	315 char ft; // affix type

24	316

25 @@ -232,7 +234,7 @@	317 +#ifdef HUNSPELL_CHROME_CLIENT

26	318 + // open the affix file

27	319 + // We're always UTF-8

28 // read in aff file and build up prefix and suffix entry objects	320 + utf8 = 1;

29 -int AffixMgr::parse_file(const char * affpath)	321 +

30 +int AffixMgr::parse_file(FILE* aff_handle)	322 + // A BDICT file stores PFX and SFX lines in a special section and it provides

31 {	323 + // a special line iterator for reading PFX and SFX lines.

32	324 + // We create a FileMgr object from this iterator and parse PFX and SFX lines

33 // io buffers	325 + // before parsing other lines.

34 @@ -250,11 +252,12 @@	326 + hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator();

35	327 + FileMgr* iterator = new FileMgr(&affix_iterator);

36 // open the affix file	328 + if (!iterator) {

37 FILE * afflst;	329 + HUNSPELL_WARNING(stderr,

38 - afflst = fopen(affpath,"r");	330 + "error: could not create a FileMgr from an affix line iterator.\n");

39 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r");	331 + return 1;

40 if (!afflst) {	332 + }

41 - HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n ",affpath);	333 +

42 + HUNSPELL_WARNING(stderr, "error: could not open affix description file\n");	334 + while (line = iterator->getline()) {

	335 + ft = ' ';

	336 + if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';

	337 + if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';

	338 + if (ft != ' ')

	339 + parse_affix(line, ft, iterator, NULL);

	340 + }

	341 + delete iterator;

	342 +

	343 + // Create a FileMgr object for reading lines except PFX and SFX lines.

	344 + // We don't need to change the loop below since our FileMgr emulates the

	345 + // original one.

	346 + hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator();

	347 + FileMgr * afflst = new FileMgr(&other_iterator);

	348 + if (!afflst) {

	349 + HUNSPELL_WARNING(stderr,

	350 + "error: could not create a FileMgr from an other line iterator.\n");

	351 + return 1;

	352 + }

	353 +#else

	354 // checking flag duplication

	355 char dupflags[CONTSIZE];

	356 char dupflags_ini = 1;

	357 @@ -265,6 +316,7 @@

	358 HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n ",affpath);

43 return 1;	359 return 1;

44 }	360 }

45 + fseek(afflst, 0, SEEK_SET);	361 +#endif

46	362

47 // step one is to parse the affix file building up the internal	363 // step one is to parse the affix file building up the internal

48 // affix data structures	364 // affix data structures

	365 @@ -274,6 +326,7 @@

	366 while ((line = afflst->getline())) {

	367 mychomp(line);

	368

	369 +#ifndef HUNSPELL_CHROME_CLIENT

	370 /* remove byte order mark */

	371 if (firstline) {

	372 firstline = 0;

	373 @@ -282,6 +335,7 @@

	374 memmove(line, line+3, strlen(line+3)+1);

	375 }

	376 }

	377 +#endif

	378

	379 /* parse in the keyboard string */

	380 if (strncmp(line,"KEY",3) == 0) {

	381 @@ -517,6 +571,7 @@

	382 }

	383 }

	384

	385 +#ifndef HUNSPELL_CHROME_CLIENT

	386 /* parse in the typical fault correcting table */

	387 if (strncmp(line,"REP",3) == 0) {

	388 if (parse_reptable(line, afflst)) {

	389 @@ -524,6 +579,7 @@

	390 return 1;

	391 }

	392 }

	393 +#endif

	394

	395 /* parse in the input conversion table */

	396 if (strncmp(line,"ICONV",5) == 0) {

	397 @@ -634,6 +690,7 @@

	398 checksharps=1;

	399 }

	400

	401 +#ifndef HUNSPELL_CHROME_CLIENT // Chrome handled affixes above.

	402 /* parse this affix: P - prefix, S - suffix */

	403 ft = ' ';

	404 if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';

	405 @@ -650,6 +707,7 @@

	406 return 1;

	407 }

	408 }

	409 +#endif

	410

	411 }

	412 delete afflst;

	413 @@ -1247,6 +1305,26 @@

	414 const char * r;

	415 int lenr, lenp;

	416

	417 +#ifdef HUNSPELL_CHROME_CLIENT

	418 + const char pattern, pattern2;

	419 + hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator ();

	420 + while (iterator.GetNext(&pattern, &pattern2)) {

	421 + r = word;

	422 + lenr = strlen(pattern2);

	423 + lenp = strlen(pattern);

	424 +

	425 + // search every occurence of the pattern in the word

	426 + while ((r=strstr(r, pattern)) != NULL) {

	427 + strcpy(candidate, word);

	428 + if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;

	429 + strcpy(candidate+(r-word), pattern2);

	430 + strcpy(candidate+(r-word)+lenr, r+lenp);

	431 + if (candidate_check(candidate,strlen(candidate))) return 1;

	432 + r++; // search for the next letter

	433 + }

	434 + }

	435 +

	436 +#else

	437 if ((wl < 2) \|\| !numrep) return 0;

	438

	439 for (int i=0; i < numrep; i++ ) {

	440 @@ -1263,6 +1341,7 @@

	441 r++; // search for the next letter

	442 }

	443 }

	444 +#endif

	445 return 0;

	446 }

	447

	448 @@ -3332,6 +3411,7 @@

	449 return 0;

	450 }

	451

	452 +#ifndef HUNSPELL_CHROME_CLIENT

	453 /* parse in the typical fault correcting table */

	454 int AffixMgr::parse_reptable(char * line, FileMgr * af)

	455 {

	456 @@ -3407,6 +3487,7 @@

	457 }

	458 return 0;

	459 }

	460 +#endif

	461

	462 /* parse in the typical fault correcting table */

	463 int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)

	464 @@ -4010,6 +4091,7 @@

	465 case 1: {

	466 np++;

	467 aflag = pHMgr->decode_flag(piece);

	468 +#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.

	469 if (((at == 'S') && (dupflags[aflag] & dupSFX)) \|\|

	470 ((at == 'P') && (dupflags[aflag] & dupPFX))) {

	471 HUNSPELL_WARNING(stderr, "error: line %d: multiple defi nitions of an affix flag\n",

	472 @@ -4017,6 +4099,7 @@

	473 // return 1; XXX permissive mode for bad dictionaries

	474 }

	475 dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);

	476 +#endif

	477 break;

	478 }

	479 // piece 3 - is cross product indicator

49 Index: src/hunspell/affixmgr.hxx	480 Index: src/hunspell/affixmgr.hxx

50 ===================================================================	481 ===================================================================

51 --- src/hunspell/affixmgr.hxx» (revision 3811)	482 --- src/hunspell/affixmgr.hxx» (revision 48261)

52 +++ src/hunspell/affixmgr.hxx (working copy)	483 +++ src/hunspell/affixmgr.hxx (working copy)

53 @@ -93,7 +93,7 @@	484 @@ -18,6 +18,40 @@

54	485 class PfxEntry;

	486 class SfxEntry;

	487

	488 +#ifdef HUNSPELL_CHROME_CLIENT

	489 +

	490 +#include <vector>

	491 +

	492 +// This class provides an implementation of the contclasses array in AffixMgr

	493 +// that is normally a large static array. We should almost never need more than

	494 +// 256 elements, so this class only allocates that much to start off with. If

	495 +// elements higher than that are actually used, we'll automatically expand.

	496 +class ContClasses {

	497 + public:

	498 + ContClasses() {

	499 + // Pre-allocate a buffer so that typically, we'll never have to resize.

	500 + EnsureSizeIs(256);

	501 + }

	502 +

	503 + char& operator[](size_t index) {

	504 + EnsureSizeIs(index + 1);

	505 + return data[index];

	506 + }

	507 +

	508 + void EnsureSizeIs(size_t new_size) {

	509 + if (data.size() >= new_size)

	510 + return; // Nothing to do.

	511 +

	512 + size_t old_size = data.size();

	513 + data.resize(new_size);

	514 + memset(&data[old_size], 0, new_size - old_size);

	515 + }

	516 +

	517 + std::vector<char> data;

	518 +};

	519 +

	520 +#endif // HUNSPELL_CHROME_CLIENT

	521 +

	522 class LIBHUNSPELL_DLL_EXPORTED AffixMgr

	523 {

	524

	525 @@ -98,12 +132,20 @@

	526 int fullstrip;

	527

	528 int havecontclass; // boolean variable

	529 +#ifdef HUNSPELL_CHROME_CLIENT

	530 + ContClasses contclasses;

	531 +#else

	532 char contclasses[CONTSIZE]; // flags of possible continuing cl asses (twofold affix)

	533 +#endif

	534

55 public:	535 public:

56	536

57 - AffixMgr(const char * affpath, HashMgr * ptr);	537 +#ifdef HUNSPELL_CHROME_CLIENT

58 + AffixMgr(FILE* aff_handle, HashMgr * ptr);	538 + AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md);

	539 +#else

	540 AffixMgr(const char * affpath, HashMgr** ptr, int * md,

	541 const char * key = NULL);

	542 +#endif

59 ~AffixMgr();	543 ~AffixMgr();

60 struct hentry * affix_check(const char * word, int len,	544 struct hentry * affix_check(const char * word, int len,

61 const unsigned short needflag = (unsigned short) 0, char in_compoun d = IN_CPD_NOT);	545 const unsigned short needflag = (unsigned short) 0,

62 @@ -179,7 +179,7 @@	546 @@ -202,6 +244,10 @@

63 int get_checksharps(void);	547 int get_fullstrip() const;

64	548

65 private:	549 private:

66 - int parse_file(const char * affpath);	550 +#ifdef HUNSPELL_CHROME_CLIENT

67 + int parse_file(FILE* aff_handle);	551 + // Not owned by us, owned by the Hunspell object.

68 // int parse_string(char * line, char ** out, const char * name);	552 + hunspell::BDictReader* bdict_reader;

69 int parse_flag(char * line, unsigned short * out, const char * name);	553 +#endif

70 int parse_num(char * line, int * out, const char * name);	554 int parse_file(const char * affpath, const char * key);

	555 int parse_flag(char * line, unsigned short * out, FileMgr * af);

	556 int parse_num(char * line, int * out, FileMgr * af);

	557 Index: src/hunspell/htypes.hxx

	558 ===================================================================

	559 --- src/hunspell/htypes.hxx» (revision 48261)

	560 +++ src/hunspell/htypes.hxx» (working copy)

	561 @@ -1,6 +1,16 @@

	562 #ifndef _HTYPES_HXX_

	563 #define _HTYPES_HXX_

	564

	565 +#ifdef HUNSPELL_CHROME_CLIENT

	566 +// This is a workaround for preventing errors in parsing Turkish BDICs, which

	567 +// contain very long AF lines (~ 12,000 chars).

	568 +// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse

	569 +// longer lines than MAXDELEN.

	570 +#define MAXDELEN (8192 * 2)

	571 +#else

	572 +#define MAXDELEN 8192

	573 +#endif // HUNSPELL_CHROME_CLIENT

	574 +

	575 #define ROTATE_LEN 5

	576

	577 #define ROTATE(v,q) \

71 Index: src/hunspell/hashmgr.cxx	578 Index: src/hunspell/hashmgr.cxx

72 ===================================================================	579 ===================================================================

73 --- src/hunspell/hashmgr.cxx» (revision 3811)	580 --- src/hunspell/hashmgr.cxx» (revision 48261)

74 +++ src/hunspell/hashmgr.cxx (working copy)	581 +++ src/hunspell/hashmgr.cxx (working copy)

75 @@ -29,7 +31,7 @@	582 @@ -12,8 +12,14 @@

76	583

77 // build a hash table from a munched word list	584 // build a hash table from a munched word list

78	585

79 -HashMgr::HashMgr(const char * tpath, const char * apath)	586 +#ifdef HUNSPELL_CHROME_CLIENT

80 +HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle)	587 +HashMgr::HashMgr(hunspell::BDictReader* reader)

81 {	588 +{

	589 + bdict_reader = reader;

	590 +#else

	591 HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)

	592 {

	593 +#endif

82 tablesize = 0;	594 tablesize = 0;

83 tableptr = NULL;	595 tableptr = NULL;

84 @@ -43,8 +45,8 @@	596 flag_mode = FLAG_CHAR;

85 aliasf = NULL;	597 @@ -31,8 +37,14 @@

86 numaliasm = 0;	598 numaliasm = 0;

87 aliasm = NULL;	599 aliasm = NULL;

88 - load_config(apath);	600 forbiddenword = FORBIDDENWORD; // forbidden word signing flag

89 - int ec = load_tables(tpath);	601 +#ifdef HUNSPELL_CHROME_CLIENT

90 + load_config(aff_handle);	602 + // No tables to load, just the AF lines.

91 + int ec = load_tables(dic_handle);	603 + load_config(NULL, NULL);

	604 + int ec = LoadAFLines();

	605 +#else

	606 load_config(apath, key);

	607 int ec = load_tables(tpath, key);

	608 +#endif

92 if (ec) {	609 if (ec) {

93 /* error condition - what should we do here */	610 /* error condition - what should we do here */

94 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);	611 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);

95 @@ -240,7 +242,7 @@	612 @@ -91,15 +103,59 @@

	613 if (ignorechars) free(ignorechars);

	614 if (ignorechars_utf16) free(ignorechars_utf16);

	615

	616 +#ifdef HUNSPELL_CHROME_CLIENT

	617 + EmptyHentryCache();

	618 + for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();

	619 + it != pointer_to_strings_.end(); ++it) {

	620 + delete *it;

	621 + }

	622 +#endif

	623 +

	624 #ifdef MOZILLA_CLIENT

	625 delete [] csconv;

	626 #endif

	627 }

	628

	629 +#ifdef HUNSPELL_CHROME_CLIENT

	630 +void HashMgr::EmptyHentryCache() {

	631 + // We need to delete each cache entry, and each additional one in the linked

	632 + // list of homonyms.

	633 + for (HEntryCache::iterator i = hentry_cache.begin();

	634 + i != hentry_cache.end(); ++i) {

	635 + hentry* cur = i->second;

	636 + while (cur) {

	637 + hentry* next = cur->next_homonym;

	638 + DeleteHashEntry(cur);

	639 + cur = next;

	640 + }

	641 + }

	642 + hentry_cache.clear();

	643 +}

	644 +#endif

	645 +

	646 // lookup a root word in the hashtable

	647

	648 struct hentry * HashMgr::lookup(const char *word) const

	649 {

	650 +#ifdef HUNSPELL_CHROME_CLIENT

	651 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];

	652 + int affix_count = bdict_reader->FindWord(word, affix_ids);

	653 + if (affix_count == 0) { // look for custom added word

	654 + std::map<base::StringPiece, int>::const_iterator iter =

	655 + custom_word_to_affix_id_map_.find(word);

	656 + if (iter != custom_word_to_affix_id_map_.end()) {

	657 + affix_count = 1;

	658 + affix_ids[0] = iter->second;

	659 + }

	660 + }

	661 +

	662 + static const int kMaxWordLen = 128;

	663 + static char word_buf[kMaxWordLen];

	664 + // To take account of null-termination, we use upto 127.

	665 + strncpy(word_buf, word, kMaxWordLen - 1);

	666 +

	667 + return AffixIDsToHentry(word_buf, affix_ids, affix_count);

	668 +#else

	669 struct hentry * dp;

	670 if (tableptr) {

	671 dp = tableptr[hash(word)];

	672 @@ -109,12 +165,14 @@

	673 }

	674 }

	675 return NULL;

	676 +#endif

	677 }

	678

	679 // add a word to the hash table (private)

	680 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff ,

	681 int al, const char * desc, bool onlyupcase)

	682 {

	683 +#ifndef HUNSPELL_CHROME_CLIENT

	684 bool upcasehomonym = false;

	685 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;

	686 // variable-length hash record with word and optional fields

	687 @@ -206,6 +264,17 @@

	688 » if (hp->astr) free(hp->astr);

	689 » free(hp);

	690 }

	691 +#else

	692 + std::map<base::StringPiece, int>::iterator iter =

	693 + custom_word_to_affix_id_map_.find(word);

	694 + if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added

	695 + std::string* new_string_word = new std::string(word);

	696 + pointer_to_strings_.push_back(new_string_word);

	697 + base::StringPiece sp(*(new_string_word));

	698 + custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words

	699 + return 1;

	700 + }

	701 +#endif

	702 return 0;

	703 }

	704

	705 @@ -339,6 +408,43 @@

	706 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);

	707 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

	708 {

	709 +#ifdef HUNSPELL_CHROME_CLIENT

	710 + // Return NULL if dictionary is not valid.

	711 + if (!bdict_reader->IsValid())

	712 + return NULL;

	713 +

	714 + // This function is only ever called by one place and not nested. We can

	715 + // therefore keep static state between calls and use \|col\| as a "reset" flag

	716 + // to avoid changing the API. It is set to -1 for the first call.

	717 + static hunspell::WordIterator word_iterator =

	718 + bdict_reader->GetAllWordIterator();

	719 + if (col < 0) {

	720 + col = 1;

	721 + word_iterator = bdict_reader->GetAllWordIterator();

	722 + }

	723 +

	724 + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];

	725 + static const int kMaxWordLen = 128;

	726 + static char word[kMaxWordLen];

	727 + int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);

	728 + if (affix_count == 0)

	729 + return NULL;

	730 + short word_len = static_cast<short>(strlen(word));

	731 +

	732 + // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,

	733 + // i.e. a struct which uses its array 'word[1]' as a variable-length array.

	734 + // As noted above, this function is not nested. So, we just use a static

	735 + // struct which consists of an hentry and a char[kMaxWordLen], and initialize

	736 + // the static struct and return it for now.

	737 + // No need to create linked lists for the extra affixes.

	738 + static struct {

	739 + hentry entry;

	740 + char word[kMaxWordLen];

	741 + } hash_entry;

	742 +

	743 + return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),

	744 + &word[0], word_len, affix_ids[0]);

	745 +#else

	746 if (hp && hp->next != NULL) return hp->next;

	747 for (col++; col < tablesize; col++) {

	748 if (tableptr[col]) return tableptr[col];

	749 @@ -346,11 +452,13 @@

	750 // null at end and reset to start

	751 col = -1;

	752 return NULL;

	753 +#endif

96 }	754 }

97	755

98 // load a munched word list and build a hash table on the fly	756 // load a munched word list and build a hash table on the fly

99 -int HashMgr::load_tables(const char * tpath)	757 int HashMgr::load_tables(const char * tpath, const char * key)

100 +int HashMgr::load_tables(FILE* t_handle)	758 {

101 {	759 +#ifndef HUNSPELL_CHROME_CLIENT

102 int wl, al;	760 int al;

103 char * ap;	761 char * ap;

104 @@ -248,8 +250,9 @@	762 char * dp;

105 unsigned short * flags;	763 @@ -470,6 +578,7 @@

106	764 }

107 // raw dictionary - munched file	765

108 - FILE * rawdict = fopen(tpath, "r");	766 delete dict;

109 + FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r");	767 +#endif

110 if (rawdict == NULL) return 1;	768 return 0;

111 + fseek(rawdict, 0, SEEK_SET);	769 }

112	770

113 // first read the first line of file to get hash table size */	771 @@ -478,6 +587,9 @@

114 char ts[MAXDELEN];	772

115 @@ -442,7 +445,7 @@	773 int HashMgr::hash(const char * word) const

116 }	774 {

117	775 +#ifdef HUNSPELL_CHROME_CLIENT

118 // read in aff file and set flag mode	776 + return 0;

119 -int HashMgr::load_config(const char * affpath)	777 +#else

120 +int HashMgr::load_config(FILE* aff_handle)	778 long hv = 0;

121 {	779 for (int i=0; i < 4 && *word != 0; i++)

	780 hv = (hv << 8) \| (*word++);

	781 @@ -486,6 +598,7 @@

	782 hv ^= (*word++);

	783 }

	784 return (unsigned long) hv % tablesize;

	785 +#endif

	786 }

	787

	788 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {

	789 @@ -607,7 +720,12 @@

122 int firstline = 1;	790 int firstline = 1;

123

124 @@ -451,11 +454,12 @@

125	791

126 // open the affix file	792 // open the affix file

127 FILE * afflst;	793 +#ifdef HUNSPELL_CHROME_CLIENT

128 - afflst = fopen(affpath,"r");	794 + hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();

129 + afflst = _fdopen(_dup(_fileno(aff_handle)), "r");	795 + FileMgr * afflst = new FileMgr(&iterator);

	796 +#else

	797 FileMgr * afflst = new FileMgr(affpath, key);

	798 +#endif

130 if (!afflst) {	799 if (!afflst) {

131 - HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\ n",affpath);	800 HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\ n",affpath);

132 + HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n") ;

133 return 1;	801 return 1;

134 }	802 @@ -802,6 +920,121 @@

135 + fseek(afflst, 0, SEEK_SET);	803 return 0;

136	804 }

137 // read in each line ignoring any that do not	805

138 // start with a known line type indicator	806 +#ifdef HUNSPELL_CHROME_CLIENT

	807 +int HashMgr::LoadAFLines()

	808 +{

	809 + utf8 = 1; // We always use UTF-8.

	810 +

	811 + // Read in all the AF lines which tell us the rules for each affix group ID.

	812 + hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();

	813 + FileMgr afflst(&iterator);

	814 + while (char* line = afflst.getline()) {

	815 + int rv = parse_aliasf(line, &afflst);

	816 + if (rv)

	817 + return rv;

	818 + }

	819 +

	820 + return 0;

	821 +}

	822 +

	823 +hentry* HashMgr::InitHashEntry(hentry* entry,

	824 + size_t item_size,

	825 + const char* word,

	826 + int word_length,

	827 + int affix_index) const {

	828 + // Return if the given buffer doesn't have enough space for a hentry struct

	829 + // or the given word is too long.

	830 + // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is

	831 + // better to return an error if the given word is too long and prevent

	832 + // an unexpected result caused by a long word.

	833 + const int kMaxWordLen = 128;

	834 + if (item_size < sizeof(hentry) + word_length + 1 \|\|

	835 + word_length >= kMaxWordLen)

	836 + return NULL;

	837 +

	838 + // Initialize a hentry struct with the given parameters, and

	839 + // append the given string at the end of this hentry struct.

	840 + memset(entry, 0, item_size);

	841 + FileMgr af(NULL);

	842 + entry->alen = static_cast<short>(

	843 + const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));

	844 + entry->blen = static_cast<unsigned char>(word_length);

	845 + memcpy(&entry->word, word, word_length);

	846 +

	847 + return entry;

	848 +}

	849 +

	850 +hentry* HashMgr::CreateHashEntry(const char* word,

	851 + int word_length,

	852 + int affix_index) const {

	853 + // Return if the given word is too long.

	854 + // (See the comment in HashMgr::InitHashEntry().)

	855 + const int kMaxWordLen = 128;

	856 + if (word_length >= kMaxWordLen)

	857 + return NULL;

	858 +

	859 + const size_t kEntrySize = sizeof(hentry) + word_length + 1;

	860 + struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));

	861 + if (entry)

	862 + InitHashEntry(entry, kEntrySize, word, word_length, affix_index);

	863 +

	864 + return entry;

	865 +}

	866 +

	867 +void HashMgr::DeleteHashEntry(hentry* entry) const {

	868 + free(entry);

	869 +}

	870 +

	871 +hentry* HashMgr::AffixIDsToHentry(char* word,

	872 + int* affix_ids,

	873 + int affix_count) const

	874 +{

	875 + if (affix_count == 0)

	876 + return NULL;

	877 +

	878 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;

	879 + std::string std_word(word);

	880 + HEntryCache::iterator found = cache.find(std_word);

	881 + if (found != cache.end()) {

	882 + // We must return an existing hentry for the same word if we've previously

	883 + // handed one out. Hunspell will compare pointers in some cases to see if

	884 + // two words it has found are the same.

	885 + return found->second;

	886 + }

	887 +

	888 + short word_len = static_cast<short>(strlen(word));

	889 +

	890 + // We can get a number of prefixes per word. There will normally be only one,

	891 + // but if not, there will be a linked list of "hentry"s for the "homonym"s

	892 + // for the word.

	893 + struct hentry* first_he = NULL;

	894 + struct hentry* prev_he = NULL; // For making linked list.

	895 + for (int i = 0; i < affix_count; i++) {

	896 + struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);

	897 + if (!he)

	898 + break;

	899 + if (i == 0)

	900 + first_he = he;

	901 + if (prev_he)

	902 + prev_he->next_homonym = he;

	903 + prev_he = he;

	904 + }

	905 +

	906 + cache[std_word] = first_he; // Save this word in the cache for later.

	907 + return first_he;

	908 +}

	909 +

	910 +hentry* HashMgr::GetHentryFromHEntryCache(char* word) {

	911 + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;

	912 + std::string std_word(word);

	913 + HEntryCache::iterator found = cache.find(std_word);

	914 + if (found != cache.end())

	915 + return found->second;

	916 + else

	917 + return NULL;

	918 +}

	919 +#endif

	920 +

	921 int HashMgr::is_aliasf() {

	922 return (aliasf != NULL);

	923 }

	924 Index: src/hunspell/hunspell.cxx

	925 ===================================================================

	926 --- src/hunspell/hunspell.cxx» (revision 48261)

	927 +++ src/hunspell/hunspell.cxx» (working copy)

	928 @@ -7,18 +7,35 @@

	929

	930 #include "hunspell.hxx"

	931 #include "hunspell.h"

	932 +#ifndef HUNSPELL_CHROME_CLIENT

	933 #include "config.h"

	934 +#endif

	935 #include "csutil.hxx"

	936

	937 +#ifdef HUNSPELL_CHROME_CLIENT

	938 +Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)

	939 +#else

	940 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)

	941 +#endif

	942 {

	943 encoding = NULL;

	944 csconv = NULL;

	945 utf8 = 0;

	946 complexprefixes = 0;

	947 +#ifndef HUNSPELL_CHROME_CLIENT

	948 affixpath = mystrdup(affpath);

	949 +#endif

	950 maxdic = 0;

	951

	952 +#ifdef HUNSPELL_CHROME_CLIENT

	953 + bdict_reader = new hunspell::BDictReader;

	954 + bdict_reader->Init(bdict_data, bdict_length);

	955 +

	956 + pHMgr[0] = new HashMgr(bdict_reader);

	957 + if (pHMgr[0]) maxdic = 1;

	958 +

	959 + pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);

	960 +#else

	961 /* first set up the hash manager */

	962 pHMgr[0] = new HashMgr(dpath, affpath, key);

	963 if (pHMgr[0]) maxdic = 1;

	964 @@ -26,6 +43,7 @@

	965 /* next set up the affix manager */

	966 /* it needs access to the hash manager lookup methods */

	967 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);

	968 +#endif

	969

	970 /* get the preferred try string and the dictionary */

	971 /* encoding from the Affix Manager for that dictionary */

	972 @@ -56,10 +74,17 @@

	973 csconv= NULL;

	974 if (encoding) free(encoding);

	975 encoding = NULL;

	976 +

	977 +#ifdef HUNSPELL_CHROME_CLIENT

	978 + if (bdict_reader) delete bdict_reader;

	979 + bdict_reader = NULL;

	980 +#else

	981 if (affixpath) free(affixpath);

	982 affixpath = NULL;

	983 +#endif

	984 }

	985

	986 +#ifndef HUNSPELL_CHROME_CLIENT

	987 // load extra dictionaries

	988 int Hunspell::add_dic(const char * dpath, const char * key) {

	989 if (maxdic == MAXDIC \|\| !affixpath) return 1;

	990 @@ -67,6 +92,7 @@

	991 if (pHMgr[maxdic]) maxdic++; else return 1;

	992 return 0;

	993 }

	994 +#endif

	995

	996 // make a copy of src at destination while removing all leading

	997 // blanks and removing any trailing periods after recording

	998 @@ -319,6 +345,9 @@

	999

	1000 int Hunspell::spell(const char * word, int * info, char ** root)

	1001 {

	1002 +#ifdef HUNSPELL_CHROME_CLIENT

	1003 + if (pHMgr) pHMgr[0]->EmptyHentryCache();

	1004 +#endif

	1005 struct hentry * rv=NULL;

	1006 // need larger vector. For example, Turkish capital letter I converted a

	1007 // 2-byte UTF-8 character (dotless i) by mkallsmall.

	1008 @@ -567,6 +596,13 @@

	1009 word = w2;

	1010 } else word = w;

	1011

	1012 +#ifdef HUNSPELL_CHROME_CLIENT

	1013 + // We need to check the word length if its valid to make coverity (Event

	1014 + // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.

	1015 + if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) \|\| (!utf8 && strlen(word) >= MAX WORDLEN))

	1016 + return NULL;

	1017 +#endif

	1018 +

	1019 // word reversing wrapper for complex prefixes

	1020 if (complexprefixes) {

	1021 if (word != w2) {

	1022 @@ -657,6 +693,9 @@

	1023

	1024 int Hunspell::suggest(char*** slst, const char * word)

	1025 {

	1026 +#ifdef HUNSPELL_CHROME_CLIENT

	1027 + if (pHMgr) pHMgr[0]->EmptyHentryCache();

	1028 +#endif

	1029 int onlycmpdsug = 0;

	1030 char cw[MAXWORDUTF8LEN];

	1031 char wspace[MAXWORDUTF8LEN];

	1032 @@ -1874,13 +1913,21 @@

	1033

	1034 Hunhandle Hunspell_create(const char affpath, const char * dpath)

	1035 {

	1036 +#ifdef HUNSPELL_CHROME_CLIENT

	1037 + return NULL;

	1038 +#else

	1039 return (Hunhandle*)(new Hunspell(affpath, dpath));

	1040 +#endif

	1041 }

	1042

	1043 Hunhandle Hunspell_create_key(const char affpath, const char * dpath,

	1044 const char * key)

	1045 {

	1046 +#ifdef HUNSPELL_CHROME_CLIENT

	1047 + return NULL;

	1048 +#else

	1049 return (Hunhandle*)(new Hunspell(affpath, dpath, key));

	1050 +#endif

	1051 }

	1052

	1053 void Hunspell_destroy(Hunhandle *pHunspell)

139 Index: src/hunspell/hashmgr.hxx	1054 Index: src/hunspell/hashmgr.hxx

140 ===================================================================	1055 ===================================================================

141 --- src/hunspell/hashmgr.hxx» (revision 3811)	1056 --- src/hunspell/hashmgr.hxx» (revision 48261)

142 +++ src/hunspell/hashmgr.hxx (working copy)	1057 +++ src/hunspell/hashmgr.hxx (working copy)

143 @@ -25,7 +25,7 @@	1058 @@ -8,10 +8,25 @@

	1059 #include "htypes.hxx"

	1060 #include "filemgr.hxx"

	1061

	1062 +#ifdef HUNSPELL_CHROME_CLIENT

	1063 +#include <string>

	1064 +#include <map>

	1065 +

	1066 +#include "base/stl_util-inl.h"

	1067 +#include "base/string_piece.h"

	1068 +#include "third_party/hunspell/google/bdict_reader.h"

	1069 +#endif

	1070 +

	1071 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };

	1072

	1073 class LIBHUNSPELL_DLL_EXPORTED HashMgr

	1074 {

	1075 +#ifdef HUNSPELL_CHROME_CLIENT

	1076 + // Not owned by this class, owned by the Hunspell object.

	1077 + hunspell::BDictReader* bdict_reader;

	1078 + std::map<base::StringPiece, int> custom_word_to_affix_id_map_;

	1079 + std::vector<std::string*> pointer_to_strings_;

	1080 +#endif

	1081 int tablesize;

	1082 struct hentry ** tableptr;

	1083 int userword;

	1084 @@ -34,7 +49,23 @@

144	1085

145	1086

146 public:	1087 public:

147 - HashMgr(const char * tpath, const char * apath);	1088 +#ifdef HUNSPELL_CHROME_CLIENT

148 + HashMgr(FILE* t_handle, FILE* a_handle);	1089 + HashMgr(hunspell::BDictReader* reader);

	1090 +

	1091 + // Return the hentry corresponding to the given word. Returns NULL if the

	1092 + // word is not there in the cache.

	1093 + hentry* GetHentryFromHEntryCache(char* word);

	1094 +

	1095 + // Called before we do a new operation. This will empty the cache of pointers

	1096 + // to hentries that we have cached. In Chrome, we make these on-demand, but

	1097 + // they must live as long as the single spellcheck operation that they're par t

	1098 + // of since Hunspell will save pointers to various ones as it works.

	1099 + //

	1100 + // This function allows that cache to be emptied and not grow infinitely.

	1101 + void EmptyHentryCache();

	1102 +#else

	1103 HashMgr(const char * tpath, const char * apath, const char * key = NULL);

	1104 +#endif

149 ~HashMgr();	1105 ~HashMgr();

150	1106

151 struct hentry * lookup(const char *) const;	1107 struct hentry * lookup(const char *) const;

152 @@ -46,9 +46,9 @@	1108 @@ -59,6 +90,40 @@

153	1109 int al, const char * desc, bool onlyupcase);

154	1110 int load_config(const char * affpath, const char * key);

155 private:	1111 int parse_aliasf(char * line, FileMgr * af);

156 - int load_tables(const char * tpath);	1112 +

157 + int load_tables(FILE* t_handle);	1113 +#ifdef HUNSPELL_CHROME_CLIENT

158 int add_word(const char * word, int wl, unsigned short * ap, int al, const ch ar * desc);	1114 + // Loads the AF lines from a BDICT.

159 - int load_config(const char * affpath);	1115 + // A BDICT file compresses its AF lines to save memory.

160 + int load_config(FILE* aff_handle);	1116 + // This function decompresses each AF line and call parse_aliasf().

161 int parse_aliasf(char * line, FILE * af);	1117 + int LoadAFLines();

162 #ifdef HUNSPELL_EXPERIMENTAL	1118 +

163 int parse_aliasm(char * line, FILE * af);	1119 + // Helper functions that create a new hentry struct, initialize it, and

164 Index: src/hunspell/hunspell.cxx	1120 + // delete it.

165 ===================================================================	1121 + // These functions encapsulate non-trivial operations in creating and

166 --- src/hunspell/hunspell.cxx» (revision 3811)	1122 + // initializing a hentry struct from BDICT data to avoid changing code so muc h

167 +++ src/hunspell/hunspell.cxx» (working copy)	1123 + // even when a hentry struct is changed.

168 @@ -20,7 +20,7 @@	1124 + hentry* InitHashEntry(hentry* entry,

169 #endif	1125 + size_t item_size,

170 #endif	1126 + const char* word,

171	1127 + int word_length,

172 -Hunspell::Hunspell(const char * affpath, const char * dpath)	1128 + int affix_index) const;

173 +Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle)	1129 + hentry* CreateHashEntry(const char* word,

174 {	1130 + int word_length,

175 encoding = NULL;	1131 + int affix_index) const;

176 csconv = NULL;	1132 + void DeleteHashEntry(hentry* entry) const;

177 @@ -28,11 +28,11 @@	1133 +

178 complexprefixes = 0;	1134 + // Converts the list of affix IDs to a linked list of hentry structures. The

179	1135 + // hentry structures will point to the given word. The returned pointer will

180 /* first set up the hash manager */	1136 + // be a statically allocated variable that will change for the next call. The

181 - pHMgr = new HashMgr(dpath, affpath);	1137 + // \|word\| buffer must be the same.

182 + pHMgr = new HashMgr(dic_handle, aff_handle);	1138 + hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const;

183	1139 +

184 /* next set up the affix manager */	1140 + // See EmptyHentryCache above. Note that each one is actually a linked list

185 /* it needs access to the hash manager lookup methods */	1141 + // followed by the homonym pointer.

186 - pAMgr = new AffixMgr(affpath,pHMgr);	1142 + typedef std::map<std::string, hentry*> HEntryCache;

187 + pAMgr = new AffixMgr(aff_handle, pHMgr);	1143 + HEntryCache hentry_cache;

188	1144 +#endif

189 /* get the preferred try string and the dictionary */	1145 +

190 /* encoding from the Affix Manager for that dictionary */	1146 int add_hidden_capitalized_word(char * word, int wbl, int wcl,

191 @@ -1694,9 +1694,9 @@	1147 unsigned short * flags, int al, char * dp, int captype);

192	1148 int parse_aliasm(char * line, FileMgr * af);

193 #endif // END OF HUNSPELL_EXPERIMENTAL CODE

194

195 -Hunhandle Hunspell_create(const char affpath, const char * dpath)

196 +Hunhandle Hunspell_create(FILE aff_handle, FILE* dic_handle)

197 {

198 - return (Hunhandle*)(new Hunspell(affpath, dpath));

199 + return (Hunhandle*)(new Hunspell(aff_handle, dic_handle));

200 }

201

202 void Hunspell_destroy(Hunhandle *pHunspell)

203 Index: src/hunspell/hunspell.hxx	1149 Index: src/hunspell/hunspell.hxx

204 ===================================================================	1150 ===================================================================

205 --- src/hunspell/hunspell.hxx» (revision 3811)	1151 --- src/hunspell/hunspell.hxx» (revision 48261)

206 +++ src/hunspell/hunspell.hxx (working copy)	1152 +++ src/hunspell/hunspell.hxx (working copy)

207 @@ -48,7 +48,7 @@	1153 @@ -5,6 +5,10 @@

	1154 #include "suggestmgr.hxx"

	1155 #include "langnum.hxx"

	1156

	1157 +#ifdef HUNSPELL_CHROME_CLIENT

	1158 +#include "third_party/hunspell/google/bdict_reader.h"

	1159 +#endif

	1160 +

	1161 #define SPELL_COMPOUND (1 << 0)

	1162 #define SPELL_FORBIDDEN (1 << 1)

	1163 #define SPELL_ALLCAP (1 << 2)

	1164 @@ -26,7 +30,9 @@

	1165 HashMgr* pHMgr[MAXDIC];

	1166 int maxdic;

	1167 SuggestMgr* pSMgr;

	1168 +#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.

	1169 char * affixpath;

	1170 +#endif

	1171 char * encoding;

	1172 struct cs_info * csconv;

	1173 int langnum;

	1174 @@ -34,17 +40,28 @@

	1175 int complexprefixes;

	1176 char** wordbreak;

	1177

	1178 +#ifdef HUNSPELL_CHROME_CLIENT

	1179 + // Not owned by us, owned by the Hunspell object.

	1180 + hunspell::BDictReader* bdict_reader;

	1181 +#endif

	1182 +

	1183 public:

	1184

	1185 /* Hunspell(aff, dic) - constructor of Hunspell class

208 * input: path of affix file and dictionary file	1186 * input: path of affix file and dictionary file

209 */	1187 */

210	1188

211 - Hunspell(const char * affpath, const char * dpath);	1189 +#ifdef HUNSPELL_CHROME_CLIENT

212 + Hunspell(FILE* aff_handle, FILE* dic_handle);	1190 + Hunspell(const unsigned char* bdict_data, size_t bdict_length);

	1191 +#else

	1192 Hunspell(const char * affpath, const char * dpath, const char * key = NULL);

	1193 +#endif

	1194 ~Hunspell();

	1195

	1196 +#ifndef HUNSPELL_CHROME_CLIENT

	1197 /* load extra dictionaries (only dic files) */

	1198 int add_dic(const char * dpath, const char * key = NULL);

	1199 +#endif

	1200

	1201 /* spell(word) - spellcheck word

	1202 * output: 0 = bad word, not 0 = good word

	1203 Index: src/hunspell/license.hunspell

	1204 ===================================================================

	1205 --- src/hunspell/license.hunspell» (revision 48261)

	1206 +++ src/hunspell/license.hunspell» (working copy)

	1207 @@ -56,4 +56,6 @@

	1208 *

	1209 * *** END LICENSE BLOCK *** */

	1210

	1211 +#ifndef HUNSPELL_CHROME_CLIENT

	1212 #include "config.h"

	1213 +#endif

OLD	NEW

« no previous file with comments | « third_party/hunspell/README.chromium ('k') | third_party/hunspell/hunspell.gyp » ('j') | no next file with comments »