Index: third_party/hunspell/google.patch |
=================================================================== |
--- third_party/hunspell/google.patch (revision 50428) |
+++ third_party/hunspell/google.patch (working copy) |
@@ -1,212 +1,1213 @@ |
+Index: README.chromium |
+=================================================================== |
+--- README.chromium (revision 48261) |
++++ README.chromium (working copy) |
+@@ -1,29 +1,15 @@ |
+-This is a partial copy of Hunspell 1.1.5, with the following changes: |
+-* '#include "config.h"' removed from src/hunspell/hunspell.hxx |
+-* '#include "config.h"' removed from src/hunspell/license.hunspell |
+-* Two unreferenced local variables removed from src/hunspell/suggestmgr.cxx |
+-* src/hunspell/utf_info.cxx moved to src/hunspell/utf_info.hxx, and #include |
+- reference in src/hunspell/csutil.cxx changed accordingly |
+-* Change the input params of the constructors to receive a FILE* instead of |
+- a file path. This is required to use hunspell in the sandbox. |
+- The patch is in google.patch. |
++This is a partial copy of Hunspell 1.2.10 with the following changes: |
++* Remove '#include "config.h"' from src/hunspell/hunspell.hxx |
++* Remove '#include "config.h"' from src/hunspell/license.hunspell |
++* Change src/hunspell/filemgr.hxx and src/hunspell/filemgr.cxx to use |
++ LineIterator. |
++* Add ScropedHashEntry, which creates temporary hentry objects, to |
++ src/hunspell/suggestmgr.cxx |
++* Change the input params of the constructors to receive a BDICTReader instead |
++ of a file path. |
++The patch is in google.patch. |
+ |
+-The English dictionary distributed by Firefox has been checked in to the |
+-dictionaries directory. It has several additions over the default |
+-myspell/hunspell dictionary. |
+- |
+-* Workaround for non-ASCII characters |
+- |
+-Visual Studio on Japanese Windows assumes the source files to be |
+-encoded in Shift_JIS. The compiler is unhappy with non-ASCII letters |
+-in the source files of Hunspell. The same problem happens with other |
+-CJK Windows as well. Here is the workaround for this problem: |
+- |
+-Convert 8-bit bytes to hexadecimal escaped forms by |
+- |
+- % perl -i -De 's/([\x80-\xff])/sprintf("\\x%02x", $1)/ge' src/*.cxx |
+- |
+- |
+-Note that Hunspell upstream is going to fix this problem. We'll no |
+-longer need the workaround if the problem is fixed in the upstream. |
+- |
++All dictionaries used by Chromium has been checked in to the |
++'third_party/hunspell_dictionaries' directory. They have several additions over |
++the default myspell/hunspell dictionaries. |
++(See 'third_party/hunspell_dictionaries/README.chromium' for their details.) |
+Index: src/hunspell/filemgr.cxx |
+=================================================================== |
+--- src/hunspell/filemgr.cxx (revision 48261) |
++++ src/hunspell/filemgr.cxx (working copy) |
+@@ -7,6 +7,32 @@ |
+ |
+ #include "filemgr.hxx" |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++#include "third_party/hunspell/google/bdict_reader.h" |
++ |
++FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) { |
++} |
++ |
++FileMgr::~FileMgr() { |
++} |
++ |
++char * FileMgr::getline() { |
++ // Read one line from a BDICT file and store the line to our line buffer. |
++ // To emulate the original FileMgr::getline(), this function returns |
++ // the pointer to our line buffer if we can read a line without errors. |
++ // Otherwise, this function returns NULL. |
++ bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1); |
++ return result ? line_ : NULL; |
++} |
++ |
++int FileMgr::getlinenum() { |
++ // This function is used only for displaying a line number that causes a |
++ // parser error. For a BDICT file, providing a line number doesn't help |
++ // identifying the place where causes a parser error so much since it is a |
++ // binary file. So, we just return 0. |
++ return 0; |
++} |
++#else |
+ int FileMgr::fail(const char * err, const char * par) { |
+ fprintf(stderr, err, par); |
+ return -1; |
+@@ -47,3 +73,4 @@ |
+ int FileMgr::getlinenum() { |
+ return linenum; |
+ } |
++#endif |
+Index: src/hunspell/suggestmgr.cxx |
+=================================================================== |
+--- src/hunspell/suggestmgr.cxx (revision 48261) |
++++ src/hunspell/suggestmgr.cxx (working copy) |
+@@ -12,6 +12,99 @@ |
+ |
+ const w_char W_VLINE = { '\0', '|' }; |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++namespace { |
++// A simmple class which creates temporary hentry objects which can be |
++// available only in a scope. To conceal memory operations from SuggestMgr |
++// functions, this object automatically deletes all hentry objects created |
++// through CreateScopedHashEntry() calls in its destructor. |
++// So, the following snippet raises a memory error. |
++// |
++// hentry* bad_copy = NULL; |
++// { |
++// ScopedHashEntryFactory factory; |
++// hentry* scoped_copy = factory.CreateHashEntry(source); |
++// ... |
++// bad_copy = scoped_copy; |
++// } |
++// if (bad_copy->word[0]) // memory for scoped_copy has been deleted! |
++// |
++// As listed in the above snippet, it is simple to use this class. |
++// 1. Declare an instance of this ScopedHashEntryFactory, and; |
++// 2. Call its CreateHashEntry() member instead of using 'new hentry' or |
++// 'operator='. |
++// |
++// TODO(hbono): this implementation is slower than the previous one of brettw. |
++// We need to improve it? |
++// |
++class ScopedHashEntryFactory { |
++ public: |
++ ScopedHashEntryFactory(); |
++ ~ScopedHashEntryFactory(); |
++ |
++ // Creates a temporary copy of the given hentry struct. |
++ // The returned copy is available only while this object is available. |
++ // NOTE: this function just calls memcpy() in creating a copy of the given |
++ // hentry struct, i.e. it does NOT copy objects referred by pointers of the |
++ // given hentry struct. |
++ hentry* CreateScopedHashEntry(int index, const hentry* source); |
++ |
++ private: |
++ // A struct which encapsulate the new hentry struct used by hunspell 1.2.8. |
++ // The hentry struct used by hunspell 1.2.8 becomes a variable-length struct, |
++ // i.e. it uses its 'word[1]' array member as a variable-length array. |
++ // C/C++ doesn't check boundaries of a char array. For example, for a char |
++ // array 'char word[1]', we cannot only access 'word[0]' but also access |
++ // 'word[1]', 'word[2]', etc. |
++ // To handle this new hentry struct, this we define a struct which combines |
++ // three values (an hentry struct 'hentry', a char array 'word[kMaxWordLen]', |
++ // and an unsigned short value 'astr') so that a HashEntryItem 'hash_item' |
++ // satisfies the following equations: |
++ // hash_item.entry->word[1] == hash_item->word[0]. |
++ // hash_item.entry->word[2] == hash_item->word[1]. |
++ // ... |
++ // hash_item.entry->word[n] == hash_item->word[n-1]. |
++ // ... |
++ // hash_item.entry->word[kMaxWordLen] == hash_item->word[kMaxWordLen-1]. |
++ enum { |
++ kMaxWordLen = 128, |
++ }; |
++ struct HashEntryItem { |
++ hentry entry; |
++ char word[kMaxWordLen]; |
++ unsigned short astr; |
++ }; |
++ |
++ HashEntryItem hash_items_[MAX_ROOTS]; |
++}; |
++ |
++ScopedHashEntryFactory::ScopedHashEntryFactory() { |
++ memset(&hash_items_[0], 0, sizeof(hash_items_)); |
++} |
++ |
++ScopedHashEntryFactory::~ScopedHashEntryFactory() { |
++} |
++ |
++hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index, |
++ const hentry* source) { |
++ if (index >= MAX_ROOTS || source->blen >= kMaxWordLen) |
++ return NULL; |
++ |
++ // Retrieve a HashEntryItem struct from our spool, initialize it, and |
++ // returns the address of its 'hentry' member. |
++ size_t source_size = sizeof(hentry) + source->blen + 1; |
++ HashEntryItem* hash_item = &hash_items_[index]; |
++ memcpy(&hash_item->entry, source, source_size); |
++ if (source->astr) { |
++ hash_item->astr = *source->astr; |
++ hash_item->entry.astr = &hash_item->astr; |
++ } |
++ return &hash_item->entry; |
++} |
++ |
++} // namespace |
++#endif |
++ |
+ SuggestMgr::SuggestMgr(const char * tryme, int maxn, |
+ AffixMgr * aptr) |
+ { |
+@@ -1029,6 +1122,11 @@ |
+ |
+ struct hentry* hp = NULL; |
+ int col = -1; |
++ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ ScopedHashEntryFactory hash_entry_factory; |
++#endif |
++ |
+ phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; |
+ char target[MAXSWUTF8L]; |
+ char candidate[MAXSWUTF8L]; |
+@@ -1066,7 +1164,11 @@ |
+ |
+ if (sc > scores[lp]) { |
+ scores[lp] = sc; |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp); |
++#else |
+ roots[lp] = hp; |
++#endif |
+ lval = sc; |
+ for (j=0; j < MAX_ROOTS; j++) |
+ if (scores[j] < lval) { |
+Index: src/hunspell/replist.hxx |
+=================================================================== |
+--- src/hunspell/replist.hxx (revision 48261) |
++++ src/hunspell/replist.hxx (working copy) |
+@@ -2,6 +2,12 @@ |
+ #ifndef _REPLIST_HXX_ |
+ #define _REPLIST_HXX_ |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++// Compilation issues in spellchecker.cc think near is a macro, therefore |
++// removing it here solves that problem. |
++#undef near |
++#endif |
++ |
+ #include "hunvisapi.h" |
+ |
+ #include "w_char.hxx" |
+Index: src/hunspell/filemgr.hxx |
+=================================================================== |
+--- src/hunspell/filemgr.hxx (revision 48261) |
++++ src/hunspell/filemgr.hxx (working copy) |
+@@ -7,6 +7,30 @@ |
+ #include "hunzip.hxx" |
+ #include <stdio.h> |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++namespace hunspell { |
++class LineIterator; |
++} // namespace hunspell |
++ |
++// A class which encapsulates operations of reading a BDICT file. |
++// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is |
++// a binary file converted from a DIC file and an AFF file. (See |
++// "bdict_reader.h" for its format.) |
++// This class encapsulates the operations of reading a BDICT file and emulates |
++// the original FileMgr operations for AffixMgr so that it can read a BDICT |
++// file without so many changes. |
++class FileMgr { |
++ public: |
++ FileMgr(hunspell::LineIterator* iterator); |
++ ~FileMgr(); |
++ char* getline(); |
++ int getlinenum(); |
++ |
++ protected: |
++ hunspell::LineIterator* iterator_; |
++ char line_[BUFSIZE + 50]; // input buffer |
++}; |
++#else |
+ class LIBHUNSPELL_DLL_EXPORTED FileMgr |
+ { |
+ protected: |
+@@ -23,3 +47,4 @@ |
+ int getlinenum(); |
+ }; |
+ #endif |
++#endif |
Index: src/hunspell/affixmgr.cxx |
=================================================================== |
---- src/hunspell/affixmgr.cxx (revision 3811) |
+--- src/hunspell/affixmgr.cxx (revision 48261) |
+++ src/hunspell/affixmgr.cxx (working copy) |
-@@ -25,7 +27,7 @@ |
- #endif |
- #endif |
+@@ -14,8 +14,14 @@ |
--AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) |
-+AffixMgr::AffixMgr(FILE* aff_handle, HashMgr* ptr) |
+ #include "csutil.hxx" |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md) |
++{ |
++ bdict_reader = reader; |
++#else |
+ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) |
{ |
++#endif |
// register hash manager and load affix data from aff file |
- pHMgr = ptr; |
-@@ -104,8 +106,8 @@ |
- contclasses[j] = 0; |
+ pHMgr = ptr[0]; |
+ alldic = ptr; |
+@@ -99,9 +105,17 @@ |
+ sFlag[i] = NULL; |
} |
-- if (parse_file(affpath)) { |
-- HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); |
-+ if (parse_file(aff_handle)) { |
-+ HUNSPELL_WARNING(stderr, "Failure loading aff file\n"); |
- wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Define dummy parameters for parse_file() to avoid changing the parameters |
++ // of parse_file(). This may make it easier to merge the changes of the |
++ // original hunspell. |
++ const char* affpath = NULL; |
++ const char* key = NULL; |
++#else |
+ for (int j=0; j < CONTSIZE; j++) { |
+ contclasses[j] = 0; |
} |
- |
-@@ -232,7 +234,7 @@ |
++#endif |
- |
- // read in aff file and build up prefix and suffix entry objects |
--int AffixMgr::parse_file(const char * affpath) |
-+int AffixMgr::parse_file(FILE* aff_handle) |
- { |
- |
- // io buffers |
-@@ -250,11 +252,12 @@ |
+ if (parse_file(affpath, key)) { |
+ HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); |
+@@ -252,6 +266,43 @@ |
+ char * line; // io buffers |
+ char ft; // affix type |
- // open the affix file |
- FILE * afflst; |
-- afflst = fopen(affpath,"r"); |
-+ afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); |
- if (!afflst) { |
-- HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath); |
-+ HUNSPELL_WARNING(stderr, "error: could not open affix description file\n"); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // open the affix file |
++ // We're always UTF-8 |
++ utf8 = 1; |
++ |
++ // A BDICT file stores PFX and SFX lines in a special section and it provides |
++ // a special line iterator for reading PFX and SFX lines. |
++ // We create a FileMgr object from this iterator and parse PFX and SFX lines |
++ // before parsing other lines. |
++ hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator(); |
++ FileMgr* iterator = new FileMgr(&affix_iterator); |
++ if (!iterator) { |
++ HUNSPELL_WARNING(stderr, |
++ "error: could not create a FileMgr from an affix line iterator.\n"); |
++ return 1; |
++ } |
++ |
++ while (line = iterator->getline()) { |
++ ft = ' '; |
++ if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P'; |
++ if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S'; |
++ if (ft != ' ') |
++ parse_affix(line, ft, iterator, NULL); |
++ } |
++ delete iterator; |
++ |
++ // Create a FileMgr object for reading lines except PFX and SFX lines. |
++ // We don't need to change the loop below since our FileMgr emulates the |
++ // original one. |
++ hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator(); |
++ FileMgr * afflst = new FileMgr(&other_iterator); |
++ if (!afflst) { |
++ HUNSPELL_WARNING(stderr, |
++ "error: could not create a FileMgr from an other line iterator.\n"); |
++ return 1; |
++ } |
++#else |
+ // checking flag duplication |
+ char dupflags[CONTSIZE]; |
+ char dupflags_ini = 1; |
+@@ -265,6 +316,7 @@ |
+ HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath); |
return 1; |
} |
-+ fseek(afflst, 0, SEEK_SET); |
++#endif |
// step one is to parse the affix file building up the internal |
// affix data structures |
+@@ -274,6 +326,7 @@ |
+ while ((line = afflst->getline())) { |
+ mychomp(line); |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ /* remove byte order mark */ |
+ if (firstline) { |
+ firstline = 0; |
+@@ -282,6 +335,7 @@ |
+ memmove(line, line+3, strlen(line+3)+1); |
+ } |
+ } |
++#endif |
+ |
+ /* parse in the keyboard string */ |
+ if (strncmp(line,"KEY",3) == 0) { |
+@@ -517,6 +571,7 @@ |
+ } |
+ } |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ /* parse in the typical fault correcting table */ |
+ if (strncmp(line,"REP",3) == 0) { |
+ if (parse_reptable(line, afflst)) { |
+@@ -524,6 +579,7 @@ |
+ return 1; |
+ } |
+ } |
++#endif |
+ |
+ /* parse in the input conversion table */ |
+ if (strncmp(line,"ICONV",5) == 0) { |
+@@ -634,6 +690,7 @@ |
+ checksharps=1; |
+ } |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT // Chrome handled affixes above. |
+ /* parse this affix: P - prefix, S - suffix */ |
+ ft = ' '; |
+ if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P'; |
+@@ -650,6 +707,7 @@ |
+ return 1; |
+ } |
+ } |
++#endif |
+ |
+ } |
+ delete afflst; |
+@@ -1247,6 +1305,26 @@ |
+ const char * r; |
+ int lenr, lenp; |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ const char *pattern, *pattern2; |
++ hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator(); |
++ while (iterator.GetNext(&pattern, &pattern2)) { |
++ r = word; |
++ lenr = strlen(pattern2); |
++ lenp = strlen(pattern); |
++ |
++ // search every occurence of the pattern in the word |
++ while ((r=strstr(r, pattern)) != NULL) { |
++ strcpy(candidate, word); |
++ if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break; |
++ strcpy(candidate+(r-word), pattern2); |
++ strcpy(candidate+(r-word)+lenr, r+lenp); |
++ if (candidate_check(candidate,strlen(candidate))) return 1; |
++ r++; // search for the next letter |
++ } |
++ } |
++ |
++#else |
+ if ((wl < 2) || !numrep) return 0; |
+ |
+ for (int i=0; i < numrep; i++ ) { |
+@@ -1263,6 +1341,7 @@ |
+ r++; // search for the next letter |
+ } |
+ } |
++#endif |
+ return 0; |
+ } |
+ |
+@@ -3332,6 +3411,7 @@ |
+ return 0; |
+ } |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ /* parse in the typical fault correcting table */ |
+ int AffixMgr::parse_reptable(char * line, FileMgr * af) |
+ { |
+@@ -3407,6 +3487,7 @@ |
+ } |
+ return 0; |
+ } |
++#endif |
+ |
+ /* parse in the typical fault correcting table */ |
+ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword) |
+@@ -4010,6 +4091,7 @@ |
+ case 1: { |
+ np++; |
+ aflag = pHMgr->decode_flag(piece); |
++#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates. |
+ if (((at == 'S') && (dupflags[aflag] & dupSFX)) || |
+ ((at == 'P') && (dupflags[aflag] & dupPFX))) { |
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n", |
+@@ -4017,6 +4099,7 @@ |
+ // return 1; XXX permissive mode for bad dictionaries |
+ } |
+ dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX); |
++#endif |
+ break; |
+ } |
+ // piece 3 - is cross product indicator |
Index: src/hunspell/affixmgr.hxx |
=================================================================== |
---- src/hunspell/affixmgr.hxx (revision 3811) |
+--- src/hunspell/affixmgr.hxx (revision 48261) |
+++ src/hunspell/affixmgr.hxx (working copy) |
-@@ -93,7 +93,7 @@ |
- |
+@@ -18,6 +18,40 @@ |
+ class PfxEntry; |
+ class SfxEntry; |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ |
++#include <vector> |
++ |
++// This class provides an implementation of the contclasses array in AffixMgr |
++// that is normally a large static array. We should almost never need more than |
++// 256 elements, so this class only allocates that much to start off with. If |
++// elements higher than that are actually used, we'll automatically expand. |
++class ContClasses { |
++ public: |
++ ContClasses() { |
++ // Pre-allocate a buffer so that typically, we'll never have to resize. |
++ EnsureSizeIs(256); |
++ } |
++ |
++ char& operator[](size_t index) { |
++ EnsureSizeIs(index + 1); |
++ return data[index]; |
++ } |
++ |
++ void EnsureSizeIs(size_t new_size) { |
++ if (data.size() >= new_size) |
++ return; // Nothing to do. |
++ |
++ size_t old_size = data.size(); |
++ data.resize(new_size); |
++ memset(&data[old_size], 0, new_size - old_size); |
++ } |
++ |
++ std::vector<char> data; |
++}; |
++ |
++#endif // HUNSPELL_CHROME_CLIENT |
++ |
+ class LIBHUNSPELL_DLL_EXPORTED AffixMgr |
+ { |
+ |
+@@ -98,12 +132,20 @@ |
+ int fullstrip; |
+ |
+ int havecontclass; // boolean variable |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ ContClasses contclasses; |
++#else |
+ char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) |
++#endif |
+ |
public: |
- |
-- AffixMgr(const char * affpath, HashMgr * ptr); |
-+ AffixMgr(FILE* aff_handle, HashMgr * ptr); |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md); |
++#else |
+ AffixMgr(const char * affpath, HashMgr** ptr, int * md, |
+ const char * key = NULL); |
++#endif |
~AffixMgr(); |
struct hentry * affix_check(const char * word, int len, |
- const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT); |
-@@ -179,7 +179,7 @@ |
- int get_checksharps(void); |
+ const unsigned short needflag = (unsigned short) 0, |
+@@ -202,6 +244,10 @@ |
+ int get_fullstrip() const; |
private: |
-- int parse_file(const char * affpath); |
-+ int parse_file(FILE* aff_handle); |
- // int parse_string(char * line, char ** out, const char * name); |
- int parse_flag(char * line, unsigned short * out, const char * name); |
- int parse_num(char * line, int * out, const char * name); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Not owned by us, owned by the Hunspell object. |
++ hunspell::BDictReader* bdict_reader; |
++#endif |
+ int parse_file(const char * affpath, const char * key); |
+ int parse_flag(char * line, unsigned short * out, FileMgr * af); |
+ int parse_num(char * line, int * out, FileMgr * af); |
+Index: src/hunspell/htypes.hxx |
+=================================================================== |
+--- src/hunspell/htypes.hxx (revision 48261) |
++++ src/hunspell/htypes.hxx (working copy) |
+@@ -1,6 +1,16 @@ |
+ #ifndef _HTYPES_HXX_ |
+ #define _HTYPES_HXX_ |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++// This is a workaround for preventing errors in parsing Turkish BDICs, which |
++// contain very long AF lines (~ 12,000 chars). |
++// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse |
++// longer lines than MAXDELEN. |
++#define MAXDELEN (8192 * 2) |
++#else |
++#define MAXDELEN 8192 |
++#endif // HUNSPELL_CHROME_CLIENT |
++ |
+ #define ROTATE_LEN 5 |
+ |
+ #define ROTATE(v,q) \ |
Index: src/hunspell/hashmgr.cxx |
=================================================================== |
---- src/hunspell/hashmgr.cxx (revision 3811) |
+--- src/hunspell/hashmgr.cxx (revision 48261) |
+++ src/hunspell/hashmgr.cxx (working copy) |
-@@ -29,7 +31,7 @@ |
+@@ -12,8 +12,14 @@ |
// build a hash table from a munched word list |
--HashMgr::HashMgr(const char * tpath, const char * apath) |
-+HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle) |
++#ifdef HUNSPELL_CHROME_CLIENT |
++HashMgr::HashMgr(hunspell::BDictReader* reader) |
++{ |
++ bdict_reader = reader; |
++#else |
+ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) |
{ |
++#endif |
tablesize = 0; |
tableptr = NULL; |
-@@ -43,8 +45,8 @@ |
- aliasf = NULL; |
+ flag_mode = FLAG_CHAR; |
+@@ -31,8 +37,14 @@ |
numaliasm = 0; |
aliasm = NULL; |
-- load_config(apath); |
-- int ec = load_tables(tpath); |
-+ load_config(aff_handle); |
-+ int ec = load_tables(dic_handle); |
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // No tables to load, just the AF lines. |
++ load_config(NULL, NULL); |
++ int ec = LoadAFLines(); |
++#else |
+ load_config(apath, key); |
+ int ec = load_tables(tpath, key); |
++#endif |
if (ec) { |
/* error condition - what should we do here */ |
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); |
-@@ -240,7 +242,7 @@ |
+@@ -91,15 +103,59 @@ |
+ if (ignorechars) free(ignorechars); |
+ if (ignorechars_utf16) free(ignorechars_utf16); |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ EmptyHentryCache(); |
++ for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin(); |
++ it != pointer_to_strings_.end(); ++it) { |
++ delete *it; |
++ } |
++#endif |
++ |
+ #ifdef MOZILLA_CLIENT |
+ delete [] csconv; |
+ #endif |
} |
++#ifdef HUNSPELL_CHROME_CLIENT |
++void HashMgr::EmptyHentryCache() { |
++ // We need to delete each cache entry, and each additional one in the linked |
++ // list of homonyms. |
++ for (HEntryCache::iterator i = hentry_cache.begin(); |
++ i != hentry_cache.end(); ++i) { |
++ hentry* cur = i->second; |
++ while (cur) { |
++ hentry* next = cur->next_homonym; |
++ DeleteHashEntry(cur); |
++ cur = next; |
++ } |
++ } |
++ hentry_cache.clear(); |
++} |
++#endif |
++ |
+ // lookup a root word in the hashtable |
+ |
+ struct hentry * HashMgr::lookup(const char *word) const |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
++ int affix_count = bdict_reader->FindWord(word, affix_ids); |
++ if (affix_count == 0) { // look for custom added word |
++ std::map<base::StringPiece, int>::const_iterator iter = |
++ custom_word_to_affix_id_map_.find(word); |
++ if (iter != custom_word_to_affix_id_map_.end()) { |
++ affix_count = 1; |
++ affix_ids[0] = iter->second; |
++ } |
++ } |
++ |
++ static const int kMaxWordLen = 128; |
++ static char word_buf[kMaxWordLen]; |
++ // To take account of null-termination, we use upto 127. |
++ strncpy(word_buf, word, kMaxWordLen - 1); |
++ |
++ return AffixIDsToHentry(word_buf, affix_ids, affix_count); |
++#else |
+ struct hentry * dp; |
+ if (tableptr) { |
+ dp = tableptr[hash(word)]; |
+@@ -109,12 +165,14 @@ |
+ } |
+ } |
+ return NULL; |
++#endif |
+ } |
+ |
+ // add a word to the hash table (private) |
+ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, |
+ int al, const char * desc, bool onlyupcase) |
+ { |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ bool upcasehomonym = false; |
+ int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; |
+ // variable-length hash record with word and optional fields |
+@@ -206,6 +264,17 @@ |
+ if (hp->astr) free(hp->astr); |
+ free(hp); |
+ } |
++#else |
++ std::map<base::StringPiece, int>::iterator iter = |
++ custom_word_to_affix_id_map_.find(word); |
++ if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added |
++ std::string* new_string_word = new std::string(word); |
++ pointer_to_strings_.push_back(new_string_word); |
++ base::StringPiece sp(*(new_string_word)); |
++ custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words |
++ return 1; |
++ } |
++#endif |
+ return 0; |
+ } |
+ |
+@@ -339,6 +408,43 @@ |
+ // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); |
+ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Return NULL if dictionary is not valid. |
++ if (!bdict_reader->IsValid()) |
++ return NULL; |
++ |
++ // This function is only ever called by one place and not nested. We can |
++ // therefore keep static state between calls and use |col| as a "reset" flag |
++ // to avoid changing the API. It is set to -1 for the first call. |
++ static hunspell::WordIterator word_iterator = |
++ bdict_reader->GetAllWordIterator(); |
++ if (col < 0) { |
++ col = 1; |
++ word_iterator = bdict_reader->GetAllWordIterator(); |
++ } |
++ |
++ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
++ static const int kMaxWordLen = 128; |
++ static char word[kMaxWordLen]; |
++ int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids); |
++ if (affix_count == 0) |
++ return NULL; |
++ short word_len = static_cast<short>(strlen(word)); |
++ |
++ // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct, |
++ // i.e. a struct which uses its array 'word[1]' as a variable-length array. |
++ // As noted above, this function is not nested. So, we just use a static |
++ // struct which consists of an hentry and a char[kMaxWordLen], and initialize |
++ // the static struct and return it for now. |
++ // No need to create linked lists for the extra affixes. |
++ static struct { |
++ hentry entry; |
++ char word[kMaxWordLen]; |
++ } hash_entry; |
++ |
++ return InitHashEntry(&hash_entry.entry, sizeof(hash_entry), |
++ &word[0], word_len, affix_ids[0]); |
++#else |
+ if (hp && hp->next != NULL) return hp->next; |
+ for (col++; col < tablesize; col++) { |
+ if (tableptr[col]) return tableptr[col]; |
+@@ -346,11 +452,13 @@ |
+ // null at end and reset to start |
+ col = -1; |
+ return NULL; |
++#endif |
+ } |
+ |
// load a munched word list and build a hash table on the fly |
--int HashMgr::load_tables(const char * tpath) |
-+int HashMgr::load_tables(FILE* t_handle) |
+ int HashMgr::load_tables(const char * tpath, const char * key) |
{ |
- int wl, al; |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ int al; |
char * ap; |
-@@ -248,8 +250,9 @@ |
- unsigned short * flags; |
+ char * dp; |
+@@ -470,6 +578,7 @@ |
+ } |
- // raw dictionary - munched file |
-- FILE * rawdict = fopen(tpath, "r"); |
-+ FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r"); |
- if (rawdict == NULL) return 1; |
-+ fseek(rawdict, 0, SEEK_SET); |
+ delete dict; |
++#endif |
+ return 0; |
+ } |
- // first read the first line of file to get hash table size */ |
- char ts[MAXDELEN]; |
-@@ -442,7 +445,7 @@ |
+@@ -478,6 +587,9 @@ |
+ |
+ int HashMgr::hash(const char * word) const |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ return 0; |
++#else |
+ long hv = 0; |
+ for (int i=0; i < 4 && *word != 0; i++) |
+ hv = (hv << 8) | (*word++); |
+@@ -486,6 +598,7 @@ |
+ hv ^= (*word++); |
+ } |
+ return (unsigned long) hv % tablesize; |
++#endif |
} |
- // read in aff file and set flag mode |
--int HashMgr::load_config(const char * affpath) |
-+int HashMgr::load_config(FILE* aff_handle) |
- { |
+ int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) { |
+@@ -607,7 +720,12 @@ |
int firstline = 1; |
- |
-@@ -451,11 +454,12 @@ |
// open the affix file |
- FILE * afflst; |
-- afflst = fopen(affpath,"r"); |
-+ afflst = _fdopen(_dup(_fileno(aff_handle)), "r"); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator(); |
++ FileMgr * afflst = new FileMgr(&iterator); |
++#else |
+ FileMgr * afflst = new FileMgr(affpath, key); |
++#endif |
if (!afflst) { |
-- HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); |
-+ HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n"); |
+ HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); |
return 1; |
- } |
-+ fseek(afflst, 0, SEEK_SET); |
+@@ -802,6 +920,121 @@ |
+ return 0; |
+ } |
- // read in each line ignoring any that do not |
- // start with a known line type indicator |
-Index: src/hunspell/hashmgr.hxx |
-=================================================================== |
---- src/hunspell/hashmgr.hxx (revision 3811) |
-+++ src/hunspell/hashmgr.hxx (working copy) |
-@@ -25,7 +25,7 @@ |
- |
- |
- public: |
-- HashMgr(const char * tpath, const char * apath); |
-+ HashMgr(FILE* t_handle, FILE* a_handle); |
- ~HashMgr(); |
- |
- struct hentry * lookup(const char *) const; |
-@@ -46,9 +46,9 @@ |
- |
- |
- private: |
-- int load_tables(const char * tpath); |
-+ int load_tables(FILE* t_handle); |
- int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc); |
-- int load_config(const char * affpath); |
-+ int load_config(FILE* aff_handle); |
- int parse_aliasf(char * line, FILE * af); |
- #ifdef HUNSPELL_EXPERIMENTAL |
- int parse_aliasm(char * line, FILE * af); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++int HashMgr::LoadAFLines() |
++{ |
++ utf8 = 1; // We always use UTF-8. |
++ |
++ // Read in all the AF lines which tell us the rules for each affix group ID. |
++ hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator(); |
++ FileMgr afflst(&iterator); |
++ while (char* line = afflst.getline()) { |
++ int rv = parse_aliasf(line, &afflst); |
++ if (rv) |
++ return rv; |
++ } |
++ |
++ return 0; |
++} |
++ |
++hentry* HashMgr::InitHashEntry(hentry* entry, |
++ size_t item_size, |
++ const char* word, |
++ int word_length, |
++ int affix_index) const { |
++ // Return if the given buffer doesn't have enough space for a hentry struct |
++ // or the given word is too long. |
++ // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is |
++ // better to return an error if the given word is too long and prevent |
++ // an unexpected result caused by a long word. |
++ const int kMaxWordLen = 128; |
++ if (item_size < sizeof(hentry) + word_length + 1 || |
++ word_length >= kMaxWordLen) |
++ return NULL; |
++ |
++ // Initialize a hentry struct with the given parameters, and |
++ // append the given string at the end of this hentry struct. |
++ memset(entry, 0, item_size); |
++ FileMgr af(NULL); |
++ entry->alen = static_cast<short>( |
++ const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af)); |
++ entry->blen = static_cast<unsigned char>(word_length); |
++ memcpy(&entry->word, word, word_length); |
++ |
++ return entry; |
++} |
++ |
++hentry* HashMgr::CreateHashEntry(const char* word, |
++ int word_length, |
++ int affix_index) const { |
++ // Return if the given word is too long. |
++ // (See the comment in HashMgr::InitHashEntry().) |
++ const int kMaxWordLen = 128; |
++ if (word_length >= kMaxWordLen) |
++ return NULL; |
++ |
++ const size_t kEntrySize = sizeof(hentry) + word_length + 1; |
++ struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize)); |
++ if (entry) |
++ InitHashEntry(entry, kEntrySize, word, word_length, affix_index); |
++ |
++ return entry; |
++} |
++ |
++void HashMgr::DeleteHashEntry(hentry* entry) const { |
++ free(entry); |
++} |
++ |
++hentry* HashMgr::AffixIDsToHentry(char* word, |
++ int* affix_ids, |
++ int affix_count) const |
++{ |
++ if (affix_count == 0) |
++ return NULL; |
++ |
++ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; |
++ std::string std_word(word); |
++ HEntryCache::iterator found = cache.find(std_word); |
++ if (found != cache.end()) { |
++ // We must return an existing hentry for the same word if we've previously |
++ // handed one out. Hunspell will compare pointers in some cases to see if |
++ // two words it has found are the same. |
++ return found->second; |
++ } |
++ |
++ short word_len = static_cast<short>(strlen(word)); |
++ |
++ // We can get a number of prefixes per word. There will normally be only one, |
++ // but if not, there will be a linked list of "hentry"s for the "homonym"s |
++ // for the word. |
++ struct hentry* first_he = NULL; |
++ struct hentry* prev_he = NULL; // For making linked list. |
++ for (int i = 0; i < affix_count; i++) { |
++ struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]); |
++ if (!he) |
++ break; |
++ if (i == 0) |
++ first_he = he; |
++ if (prev_he) |
++ prev_he->next_homonym = he; |
++ prev_he = he; |
++ } |
++ |
++ cache[std_word] = first_he; // Save this word in the cache for later. |
++ return first_he; |
++} |
++ |
++hentry* HashMgr::GetHentryFromHEntryCache(char* word) { |
++ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache; |
++ std::string std_word(word); |
++ HEntryCache::iterator found = cache.find(std_word); |
++ if (found != cache.end()) |
++ return found->second; |
++ else |
++ return NULL; |
++} |
++#endif |
++ |
+ int HashMgr::is_aliasf() { |
+ return (aliasf != NULL); |
+ } |
Index: src/hunspell/hunspell.cxx |
=================================================================== |
---- src/hunspell/hunspell.cxx (revision 3811) |
+--- src/hunspell/hunspell.cxx (revision 48261) |
+++ src/hunspell/hunspell.cxx (working copy) |
-@@ -20,7 +20,7 @@ |
- #endif |
- #endif |
+@@ -7,18 +7,35 @@ |
--Hunspell::Hunspell(const char * affpath, const char * dpath) |
-+Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle) |
+ #include "hunspell.hxx" |
+ #include "hunspell.h" |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ #include "config.h" |
++#endif |
+ #include "csutil.hxx" |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length) |
++#else |
+ Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) |
++#endif |
{ |
encoding = NULL; |
csconv = NULL; |
-@@ -28,11 +28,11 @@ |
+ utf8 = 0; |
complexprefixes = 0; |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ affixpath = mystrdup(affpath); |
++#endif |
+ maxdic = 0; |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ bdict_reader = new hunspell::BDictReader; |
++ bdict_reader->Init(bdict_data, bdict_length); |
++ |
++ pHMgr[0] = new HashMgr(bdict_reader); |
++ if (pHMgr[0]) maxdic = 1; |
++ |
++ pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic); |
++#else |
/* first set up the hash manager */ |
-- pHMgr = new HashMgr(dpath, affpath); |
-+ pHMgr = new HashMgr(dic_handle, aff_handle); |
- |
+ pHMgr[0] = new HashMgr(dpath, affpath, key); |
+ if (pHMgr[0]) maxdic = 1; |
+@@ -26,6 +43,7 @@ |
/* next set up the affix manager */ |
/* it needs access to the hash manager lookup methods */ |
-- pAMgr = new AffixMgr(affpath,pHMgr); |
-+ pAMgr = new AffixMgr(aff_handle, pHMgr); |
+ pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); |
++#endif |
/* get the preferred try string and the dictionary */ |
/* encoding from the Affix Manager for that dictionary */ |
-@@ -1694,9 +1694,9 @@ |
+@@ -56,10 +74,17 @@ |
+ csconv= NULL; |
+ if (encoding) free(encoding); |
+ encoding = NULL; |
++ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ if (bdict_reader) delete bdict_reader; |
++ bdict_reader = NULL; |
++#else |
+ if (affixpath) free(affixpath); |
+ affixpath = NULL; |
++#endif |
+ } |
- #endif // END OF HUNSPELL_EXPERIMENTAL CODE |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ // load extra dictionaries |
+ int Hunspell::add_dic(const char * dpath, const char * key) { |
+ if (maxdic == MAXDIC || !affixpath) return 1; |
+@@ -67,6 +92,7 @@ |
+ if (pHMgr[maxdic]) maxdic++; else return 1; |
+ return 0; |
+ } |
++#endif |
--Hunhandle *Hunspell_create(const char * affpath, const char * dpath) |
-+Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle) |
+ // make a copy of src at destination while removing all leading |
+ // blanks and removing any trailing periods after recording |
+@@ -319,6 +345,9 @@ |
+ |
+ int Hunspell::spell(const char * word, int * info, char ** root) |
{ |
-- return (Hunhandle*)(new Hunspell(affpath, dpath)); |
-+ return (Hunhandle*)(new Hunspell(aff_handle, dic_handle)); |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ if (pHMgr) pHMgr[0]->EmptyHentryCache(); |
++#endif |
+ struct hentry * rv=NULL; |
+ // need larger vector. For example, Turkish capital letter I converted a |
+ // 2-byte UTF-8 character (dotless i) by mkallsmall. |
+@@ -567,6 +596,13 @@ |
+ word = w2; |
+ } else word = w; |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // We need to check the word length if its valid to make coverity (Event |
++ // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy. |
++ if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAXWORDLEN)) |
++ return NULL; |
++#endif |
++ |
+ // word reversing wrapper for complex prefixes |
+ if (complexprefixes) { |
+ if (word != w2) { |
+@@ -657,6 +693,9 @@ |
+ |
+ int Hunspell::suggest(char*** slst, const char * word) |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ if (pHMgr) pHMgr[0]->EmptyHentryCache(); |
++#endif |
+ int onlycmpdsug = 0; |
+ char cw[MAXWORDUTF8LEN]; |
+ char wspace[MAXWORDUTF8LEN]; |
+@@ -1874,13 +1913,21 @@ |
+ |
+ Hunhandle *Hunspell_create(const char * affpath, const char * dpath) |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ return NULL; |
++#else |
+ return (Hunhandle*)(new Hunspell(affpath, dpath)); |
++#endif |
} |
+ Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, |
+ const char * key) |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ return NULL; |
++#else |
+ return (Hunhandle*)(new Hunspell(affpath, dpath, key)); |
++#endif |
+ } |
+ |
void Hunspell_destroy(Hunhandle *pHunspell) |
+Index: src/hunspell/hashmgr.hxx |
+=================================================================== |
+--- src/hunspell/hashmgr.hxx (revision 48261) |
++++ src/hunspell/hashmgr.hxx (working copy) |
+@@ -8,10 +8,25 @@ |
+ #include "htypes.hxx" |
+ #include "filemgr.hxx" |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++#include <string> |
++#include <map> |
++ |
++#include "base/stl_util-inl.h" |
++#include "base/string_piece.h" |
++#include "third_party/hunspell/google/bdict_reader.h" |
++#endif |
++ |
+ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; |
+ |
+ class LIBHUNSPELL_DLL_EXPORTED HashMgr |
+ { |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Not owned by this class, owned by the Hunspell object. |
++ hunspell::BDictReader* bdict_reader; |
++ std::map<base::StringPiece, int> custom_word_to_affix_id_map_; |
++ std::vector<std::string*> pointer_to_strings_; |
++#endif |
+ int tablesize; |
+ struct hentry ** tableptr; |
+ int userword; |
+@@ -34,7 +49,23 @@ |
+ |
+ |
+ public: |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ HashMgr(hunspell::BDictReader* reader); |
++ |
++ // Return the hentry corresponding to the given word. Returns NULL if the |
++ // word is not there in the cache. |
++ hentry* GetHentryFromHEntryCache(char* word); |
++ |
++ // Called before we do a new operation. This will empty the cache of pointers |
++ // to hentries that we have cached. In Chrome, we make these on-demand, but |
++ // they must live as long as the single spellcheck operation that they're part |
++ // of since Hunspell will save pointers to various ones as it works. |
++ // |
++ // This function allows that cache to be emptied and not grow infinitely. |
++ void EmptyHentryCache(); |
++#else |
+ HashMgr(const char * tpath, const char * apath, const char * key = NULL); |
++#endif |
+ ~HashMgr(); |
+ |
+ struct hentry * lookup(const char *) const; |
+@@ -59,6 +90,40 @@ |
+ int al, const char * desc, bool onlyupcase); |
+ int load_config(const char * affpath, const char * key); |
+ int parse_aliasf(char * line, FileMgr * af); |
++ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Loads the AF lines from a BDICT. |
++ // A BDICT file compresses its AF lines to save memory. |
++ // This function decompresses each AF line and call parse_aliasf(). |
++ int LoadAFLines(); |
++ |
++ // Helper functions that create a new hentry struct, initialize it, and |
++ // delete it. |
++ // These functions encapsulate non-trivial operations in creating and |
++ // initializing a hentry struct from BDICT data to avoid changing code so much |
++ // even when a hentry struct is changed. |
++ hentry* InitHashEntry(hentry* entry, |
++ size_t item_size, |
++ const char* word, |
++ int word_length, |
++ int affix_index) const; |
++ hentry* CreateHashEntry(const char* word, |
++ int word_length, |
++ int affix_index) const; |
++ void DeleteHashEntry(hentry* entry) const; |
++ |
++ // Converts the list of affix IDs to a linked list of hentry structures. The |
++ // hentry structures will point to the given word. The returned pointer will |
++ // be a statically allocated variable that will change for the next call. The |
++ // |word| buffer must be the same. |
++ hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const; |
++ |
++ // See EmptyHentryCache above. Note that each one is actually a linked list |
++ // followed by the homonym pointer. |
++ typedef std::map<std::string, hentry*> HEntryCache; |
++ HEntryCache hentry_cache; |
++#endif |
++ |
+ int add_hidden_capitalized_word(char * word, int wbl, int wcl, |
+ unsigned short * flags, int al, char * dp, int captype); |
+ int parse_aliasm(char * line, FileMgr * af); |
Index: src/hunspell/hunspell.hxx |
=================================================================== |
---- src/hunspell/hunspell.hxx (revision 3811) |
+--- src/hunspell/hunspell.hxx (revision 48261) |
+++ src/hunspell/hunspell.hxx (working copy) |
-@@ -48,7 +48,7 @@ |
+@@ -5,6 +5,10 @@ |
+ #include "suggestmgr.hxx" |
+ #include "langnum.hxx" |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++#include "third_party/hunspell/google/bdict_reader.h" |
++#endif |
++ |
+ #define SPELL_COMPOUND (1 << 0) |
+ #define SPELL_FORBIDDEN (1 << 1) |
+ #define SPELL_ALLCAP (1 << 2) |
+@@ -26,7 +30,9 @@ |
+ HashMgr* pHMgr[MAXDIC]; |
+ int maxdic; |
+ SuggestMgr* pSMgr; |
++#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead. |
+ char * affixpath; |
++#endif |
+ char * encoding; |
+ struct cs_info * csconv; |
+ int langnum; |
+@@ -34,17 +40,28 @@ |
+ int complexprefixes; |
+ char** wordbreak; |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ // Not owned by us, owned by the Hunspell object. |
++ hunspell::BDictReader* bdict_reader; |
++#endif |
++ |
+ public: |
+ |
+ /* Hunspell(aff, dic) - constructor of Hunspell class |
* input: path of affix file and dictionary file |
*/ |
- |
-- Hunspell(const char * affpath, const char * dpath); |
-+ Hunspell(FILE* aff_handle, FILE* dic_handle); |
+ |
++#ifdef HUNSPELL_CHROME_CLIENT |
++ Hunspell(const unsigned char* bdict_data, size_t bdict_length); |
++#else |
+ Hunspell(const char * affpath, const char * dpath, const char * key = NULL); |
++#endif |
+ ~Hunspell(); |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ /* load extra dictionaries (only dic files) */ |
+ int add_dic(const char * dpath, const char * key = NULL); |
++#endif |
+ |
+ /* spell(word) - spellcheck word |
+ * output: 0 = bad word, not 0 = good word |
+Index: src/hunspell/license.hunspell |
+=================================================================== |
+--- src/hunspell/license.hunspell (revision 48261) |
++++ src/hunspell/license.hunspell (working copy) |
+@@ -56,4 +56,6 @@ |
+ * |
+ * ***** END LICENSE BLOCK ***** */ |
+ |
++#ifndef HUNSPELL_CHROME_CLIENT |
+ #include "config.h" |
++#endif |