| Index: third_party/hunspell/google.patch
|
| ===================================================================
|
| --- third_party/hunspell/google.patch (revision 50428)
|
| +++ third_party/hunspell/google.patch (working copy)
|
| @@ -1,212 +1,1213 @@
|
| +Index: README.chromium
|
| +===================================================================
|
| +--- README.chromium (revision 48261)
|
| ++++ README.chromium (working copy)
|
| +@@ -1,29 +1,15 @@
|
| +-This is a partial copy of Hunspell 1.1.5, with the following changes:
|
| +-* '#include "config.h"' removed from src/hunspell/hunspell.hxx
|
| +-* '#include "config.h"' removed from src/hunspell/license.hunspell
|
| +-* Two unreferenced local variables removed from src/hunspell/suggestmgr.cxx
|
| +-* src/hunspell/utf_info.cxx moved to src/hunspell/utf_info.hxx, and #include
|
| +- reference in src/hunspell/csutil.cxx changed accordingly
|
| +-* Change the input params of the constructors to receive a FILE* instead of
|
| +- a file path. This is required to use hunspell in the sandbox.
|
| +- The patch is in google.patch.
|
| ++This is a partial copy of Hunspell 1.2.10 with the following changes:
|
| ++* Remove '#include "config.h"' from src/hunspell/hunspell.hxx
|
| ++* Remove '#include "config.h"' from src/hunspell/license.hunspell
|
| ++* Change src/hunspell/filemgr.hxx and src/hunspell/filemgr.cxx to use
|
| ++ LineIterator.
|
| ++* Add ScropedHashEntry, which creates temporary hentry objects, to
|
| ++ src/hunspell/suggestmgr.cxx
|
| ++* Change the input params of the constructors to receive a BDICTReader instead
|
| ++ of a file path.
|
| ++The patch is in google.patch.
|
| +
|
| +-The English dictionary distributed by Firefox has been checked in to the
|
| +-dictionaries directory. It has several additions over the default
|
| +-myspell/hunspell dictionary.
|
| +-
|
| +-* Workaround for non-ASCII characters
|
| +-
|
| +-Visual Studio on Japanese Windows assumes the source files to be
|
| +-encoded in Shift_JIS. The compiler is unhappy with non-ASCII letters
|
| +-in the source files of Hunspell. The same problem happens with other
|
| +-CJK Windows as well. Here is the workaround for this problem:
|
| +-
|
| +-Convert 8-bit bytes to hexadecimal escaped forms by
|
| +-
|
| +- % perl -i -De 's/([\x80-\xff])/sprintf("\\x%02x", $1)/ge' src/*.cxx
|
| +-
|
| +-
|
| +-Note that Hunspell upstream is going to fix this problem. We'll no
|
| +-longer need the workaround if the problem is fixed in the upstream.
|
| +-
|
| ++All dictionaries used by Chromium has been checked in to the
|
| ++'third_party/hunspell_dictionaries' directory. They have several additions over
|
| ++the default myspell/hunspell dictionaries.
|
| ++(See 'third_party/hunspell_dictionaries/README.chromium' for their details.)
|
| +Index: src/hunspell/filemgr.cxx
|
| +===================================================================
|
| +--- src/hunspell/filemgr.cxx (revision 48261)
|
| ++++ src/hunspell/filemgr.cxx (working copy)
|
| +@@ -7,6 +7,32 @@
|
| +
|
| + #include "filemgr.hxx"
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++#include "third_party/hunspell/google/bdict_reader.h"
|
| ++
|
| ++FileMgr::FileMgr(hunspell::LineIterator* iterator) : iterator_(iterator) {
|
| ++}
|
| ++
|
| ++FileMgr::~FileMgr() {
|
| ++}
|
| ++
|
| ++char * FileMgr::getline() {
|
| ++ // Read one line from a BDICT file and store the line to our line buffer.
|
| ++ // To emulate the original FileMgr::getline(), this function returns
|
| ++ // the pointer to our line buffer if we can read a line without errors.
|
| ++ // Otherwise, this function returns NULL.
|
| ++ bool result = iterator_->AdvanceAndCopy(line_, BUFSIZE - 1);
|
| ++ return result ? line_ : NULL;
|
| ++}
|
| ++
|
| ++int FileMgr::getlinenum() {
|
| ++ // This function is used only for displaying a line number that causes a
|
| ++ // parser error. For a BDICT file, providing a line number doesn't help
|
| ++ // identifying the place where causes a parser error so much since it is a
|
| ++ // binary file. So, we just return 0.
|
| ++ return 0;
|
| ++}
|
| ++#else
|
| + int FileMgr::fail(const char * err, const char * par) {
|
| + fprintf(stderr, err, par);
|
| + return -1;
|
| +@@ -47,3 +73,4 @@
|
| + int FileMgr::getlinenum() {
|
| + return linenum;
|
| + }
|
| ++#endif
|
| +Index: src/hunspell/suggestmgr.cxx
|
| +===================================================================
|
| +--- src/hunspell/suggestmgr.cxx (revision 48261)
|
| ++++ src/hunspell/suggestmgr.cxx (working copy)
|
| +@@ -12,6 +12,99 @@
|
| +
|
| + const w_char W_VLINE = { '\0', '|' };
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++namespace {
|
| ++// A simmple class which creates temporary hentry objects which can be
|
| ++// available only in a scope. To conceal memory operations from SuggestMgr
|
| ++// functions, this object automatically deletes all hentry objects created
|
| ++// through CreateScopedHashEntry() calls in its destructor.
|
| ++// So, the following snippet raises a memory error.
|
| ++//
|
| ++// hentry* bad_copy = NULL;
|
| ++// {
|
| ++// ScopedHashEntryFactory factory;
|
| ++// hentry* scoped_copy = factory.CreateHashEntry(source);
|
| ++// ...
|
| ++// bad_copy = scoped_copy;
|
| ++// }
|
| ++// if (bad_copy->word[0]) // memory for scoped_copy has been deleted!
|
| ++//
|
| ++// As listed in the above snippet, it is simple to use this class.
|
| ++// 1. Declare an instance of this ScopedHashEntryFactory, and;
|
| ++// 2. Call its CreateHashEntry() member instead of using 'new hentry' or
|
| ++// 'operator='.
|
| ++//
|
| ++// TODO(hbono): this implementation is slower than the previous one of brettw.
|
| ++// We need to improve it?
|
| ++//
|
| ++class ScopedHashEntryFactory {
|
| ++ public:
|
| ++ ScopedHashEntryFactory();
|
| ++ ~ScopedHashEntryFactory();
|
| ++
|
| ++ // Creates a temporary copy of the given hentry struct.
|
| ++ // The returned copy is available only while this object is available.
|
| ++ // NOTE: this function just calls memcpy() in creating a copy of the given
|
| ++ // hentry struct, i.e. it does NOT copy objects referred by pointers of the
|
| ++ // given hentry struct.
|
| ++ hentry* CreateScopedHashEntry(int index, const hentry* source);
|
| ++
|
| ++ private:
|
| ++ // A struct which encapsulate the new hentry struct used by hunspell 1.2.8.
|
| ++ // The hentry struct used by hunspell 1.2.8 becomes a variable-length struct,
|
| ++ // i.e. it uses its 'word[1]' array member as a variable-length array.
|
| ++ // C/C++ doesn't check boundaries of a char array. For example, for a char
|
| ++ // array 'char word[1]', we cannot only access 'word[0]' but also access
|
| ++ // 'word[1]', 'word[2]', etc.
|
| ++ // To handle this new hentry struct, this we define a struct which combines
|
| ++ // three values (an hentry struct 'hentry', a char array 'word[kMaxWordLen]',
|
| ++ // and an unsigned short value 'astr') so that a HashEntryItem 'hash_item'
|
| ++ // satisfies the following equations:
|
| ++ // hash_item.entry->word[1] == hash_item->word[0].
|
| ++ // hash_item.entry->word[2] == hash_item->word[1].
|
| ++ // ...
|
| ++ // hash_item.entry->word[n] == hash_item->word[n-1].
|
| ++ // ...
|
| ++ // hash_item.entry->word[kMaxWordLen] == hash_item->word[kMaxWordLen-1].
|
| ++ enum {
|
| ++ kMaxWordLen = 128,
|
| ++ };
|
| ++ struct HashEntryItem {
|
| ++ hentry entry;
|
| ++ char word[kMaxWordLen];
|
| ++ unsigned short astr;
|
| ++ };
|
| ++
|
| ++ HashEntryItem hash_items_[MAX_ROOTS];
|
| ++};
|
| ++
|
| ++ScopedHashEntryFactory::ScopedHashEntryFactory() {
|
| ++ memset(&hash_items_[0], 0, sizeof(hash_items_));
|
| ++}
|
| ++
|
| ++ScopedHashEntryFactory::~ScopedHashEntryFactory() {
|
| ++}
|
| ++
|
| ++hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index,
|
| ++ const hentry* source) {
|
| ++ if (index >= MAX_ROOTS || source->blen >= kMaxWordLen)
|
| ++ return NULL;
|
| ++
|
| ++ // Retrieve a HashEntryItem struct from our spool, initialize it, and
|
| ++ // returns the address of its 'hentry' member.
|
| ++ size_t source_size = sizeof(hentry) + source->blen + 1;
|
| ++ HashEntryItem* hash_item = &hash_items_[index];
|
| ++ memcpy(&hash_item->entry, source, source_size);
|
| ++ if (source->astr) {
|
| ++ hash_item->astr = *source->astr;
|
| ++ hash_item->entry.astr = &hash_item->astr;
|
| ++ }
|
| ++ return &hash_item->entry;
|
| ++}
|
| ++
|
| ++} // namespace
|
| ++#endif
|
| ++
|
| + SuggestMgr::SuggestMgr(const char * tryme, int maxn,
|
| + AffixMgr * aptr)
|
| + {
|
| +@@ -1029,6 +1122,11 @@
|
| +
|
| + struct hentry* hp = NULL;
|
| + int col = -1;
|
| ++
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ ScopedHashEntryFactory hash_entry_factory;
|
| ++#endif
|
| ++
|
| + phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
|
| + char target[MAXSWUTF8L];
|
| + char candidate[MAXSWUTF8L];
|
| +@@ -1066,7 +1164,11 @@
|
| +
|
| + if (sc > scores[lp]) {
|
| + scores[lp] = sc;
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp);
|
| ++#else
|
| + roots[lp] = hp;
|
| ++#endif
|
| + lval = sc;
|
| + for (j=0; j < MAX_ROOTS; j++)
|
| + if (scores[j] < lval) {
|
| +Index: src/hunspell/replist.hxx
|
| +===================================================================
|
| +--- src/hunspell/replist.hxx (revision 48261)
|
| ++++ src/hunspell/replist.hxx (working copy)
|
| +@@ -2,6 +2,12 @@
|
| + #ifndef _REPLIST_HXX_
|
| + #define _REPLIST_HXX_
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++// Compilation issues in spellchecker.cc think near is a macro, therefore
|
| ++// removing it here solves that problem.
|
| ++#undef near
|
| ++#endif
|
| ++
|
| + #include "hunvisapi.h"
|
| +
|
| + #include "w_char.hxx"
|
| +Index: src/hunspell/filemgr.hxx
|
| +===================================================================
|
| +--- src/hunspell/filemgr.hxx (revision 48261)
|
| ++++ src/hunspell/filemgr.hxx (working copy)
|
| +@@ -7,6 +7,30 @@
|
| + #include "hunzip.hxx"
|
| + #include <stdio.h>
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++namespace hunspell {
|
| ++class LineIterator;
|
| ++} // namespace hunspell
|
| ++
|
| ++// A class which encapsulates operations of reading a BDICT file.
|
| ++// Chrome uses a BDICT file to compress hunspell dictionaries. A BDICT file is
|
| ++// a binary file converted from a DIC file and an AFF file. (See
|
| ++// "bdict_reader.h" for its format.)
|
| ++// This class encapsulates the operations of reading a BDICT file and emulates
|
| ++// the original FileMgr operations for AffixMgr so that it can read a BDICT
|
| ++// file without so many changes.
|
| ++class FileMgr {
|
| ++ public:
|
| ++ FileMgr(hunspell::LineIterator* iterator);
|
| ++ ~FileMgr();
|
| ++ char* getline();
|
| ++ int getlinenum();
|
| ++
|
| ++ protected:
|
| ++ hunspell::LineIterator* iterator_;
|
| ++ char line_[BUFSIZE + 50]; // input buffer
|
| ++};
|
| ++#else
|
| + class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
| + {
|
| + protected:
|
| +@@ -23,3 +47,4 @@
|
| + int getlinenum();
|
| + };
|
| + #endif
|
| ++#endif
|
| Index: src/hunspell/affixmgr.cxx
|
| ===================================================================
|
| ---- src/hunspell/affixmgr.cxx (revision 3811)
|
| +--- src/hunspell/affixmgr.cxx (revision 48261)
|
| +++ src/hunspell/affixmgr.cxx (working copy)
|
| -@@ -25,7 +27,7 @@
|
| - #endif
|
| - #endif
|
| +@@ -14,8 +14,14 @@
|
|
|
| --AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
|
| -+AffixMgr::AffixMgr(FILE* aff_handle, HashMgr* ptr)
|
| + #include "csutil.hxx"
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md)
|
| ++{
|
| ++ bdict_reader = reader;
|
| ++#else
|
| + AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
|
| {
|
| ++#endif
|
| // register hash manager and load affix data from aff file
|
| - pHMgr = ptr;
|
| -@@ -104,8 +106,8 @@
|
| - contclasses[j] = 0;
|
| + pHMgr = ptr[0];
|
| + alldic = ptr;
|
| +@@ -99,9 +105,17 @@
|
| + sFlag[i] = NULL;
|
| }
|
|
|
| -- if (parse_file(affpath)) {
|
| -- HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
|
| -+ if (parse_file(aff_handle)) {
|
| -+ HUNSPELL_WARNING(stderr, "Failure loading aff file\n");
|
| - wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Define dummy parameters for parse_file() to avoid changing the parameters
|
| ++ // of parse_file(). This may make it easier to merge the changes of the
|
| ++ // original hunspell.
|
| ++ const char* affpath = NULL;
|
| ++ const char* key = NULL;
|
| ++#else
|
| + for (int j=0; j < CONTSIZE; j++) {
|
| + contclasses[j] = 0;
|
| }
|
| -
|
| -@@ -232,7 +234,7 @@
|
| ++#endif
|
|
|
| -
|
| - // read in aff file and build up prefix and suffix entry objects
|
| --int AffixMgr::parse_file(const char * affpath)
|
| -+int AffixMgr::parse_file(FILE* aff_handle)
|
| - {
|
| -
|
| - // io buffers
|
| -@@ -250,11 +252,12 @@
|
| + if (parse_file(affpath, key)) {
|
| + HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
|
| +@@ -252,6 +266,43 @@
|
| + char * line; // io buffers
|
| + char ft; // affix type
|
|
|
| - // open the affix file
|
| - FILE * afflst;
|
| -- afflst = fopen(affpath,"r");
|
| -+ afflst = _fdopen(_dup(_fileno(aff_handle)), "r");
|
| - if (!afflst) {
|
| -- HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
|
| -+ HUNSPELL_WARNING(stderr, "error: could not open affix description file\n");
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // open the affix file
|
| ++ // We're always UTF-8
|
| ++ utf8 = 1;
|
| ++
|
| ++ // A BDICT file stores PFX and SFX lines in a special section and it provides
|
| ++ // a special line iterator for reading PFX and SFX lines.
|
| ++ // We create a FileMgr object from this iterator and parse PFX and SFX lines
|
| ++ // before parsing other lines.
|
| ++ hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator();
|
| ++ FileMgr* iterator = new FileMgr(&affix_iterator);
|
| ++ if (!iterator) {
|
| ++ HUNSPELL_WARNING(stderr,
|
| ++ "error: could not create a FileMgr from an affix line iterator.\n");
|
| ++ return 1;
|
| ++ }
|
| ++
|
| ++ while (line = iterator->getline()) {
|
| ++ ft = ' ';
|
| ++ if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
|
| ++ if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
|
| ++ if (ft != ' ')
|
| ++ parse_affix(line, ft, iterator, NULL);
|
| ++ }
|
| ++ delete iterator;
|
| ++
|
| ++ // Create a FileMgr object for reading lines except PFX and SFX lines.
|
| ++ // We don't need to change the loop below since our FileMgr emulates the
|
| ++ // original one.
|
| ++ hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator();
|
| ++ FileMgr * afflst = new FileMgr(&other_iterator);
|
| ++ if (!afflst) {
|
| ++ HUNSPELL_WARNING(stderr,
|
| ++ "error: could not create a FileMgr from an other line iterator.\n");
|
| ++ return 1;
|
| ++ }
|
| ++#else
|
| + // checking flag duplication
|
| + char dupflags[CONTSIZE];
|
| + char dupflags_ini = 1;
|
| +@@ -265,6 +316,7 @@
|
| + HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
|
| return 1;
|
| }
|
| -+ fseek(afflst, 0, SEEK_SET);
|
| ++#endif
|
|
|
| // step one is to parse the affix file building up the internal
|
| // affix data structures
|
| +@@ -274,6 +326,7 @@
|
| + while ((line = afflst->getline())) {
|
| + mychomp(line);
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + /* remove byte order mark */
|
| + if (firstline) {
|
| + firstline = 0;
|
| +@@ -282,6 +335,7 @@
|
| + memmove(line, line+3, strlen(line+3)+1);
|
| + }
|
| + }
|
| ++#endif
|
| +
|
| + /* parse in the keyboard string */
|
| + if (strncmp(line,"KEY",3) == 0) {
|
| +@@ -517,6 +571,7 @@
|
| + }
|
| + }
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + /* parse in the typical fault correcting table */
|
| + if (strncmp(line,"REP",3) == 0) {
|
| + if (parse_reptable(line, afflst)) {
|
| +@@ -524,6 +579,7 @@
|
| + return 1;
|
| + }
|
| + }
|
| ++#endif
|
| +
|
| + /* parse in the input conversion table */
|
| + if (strncmp(line,"ICONV",5) == 0) {
|
| +@@ -634,6 +690,7 @@
|
| + checksharps=1;
|
| + }
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT // Chrome handled affixes above.
|
| + /* parse this affix: P - prefix, S - suffix */
|
| + ft = ' ';
|
| + if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
|
| +@@ -650,6 +707,7 @@
|
| + return 1;
|
| + }
|
| + }
|
| ++#endif
|
| +
|
| + }
|
| + delete afflst;
|
| +@@ -1247,6 +1305,26 @@
|
| + const char * r;
|
| + int lenr, lenp;
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ const char *pattern, *pattern2;
|
| ++ hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator();
|
| ++ while (iterator.GetNext(&pattern, &pattern2)) {
|
| ++ r = word;
|
| ++ lenr = strlen(pattern2);
|
| ++ lenp = strlen(pattern);
|
| ++
|
| ++ // search every occurence of the pattern in the word
|
| ++ while ((r=strstr(r, pattern)) != NULL) {
|
| ++ strcpy(candidate, word);
|
| ++ if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
|
| ++ strcpy(candidate+(r-word), pattern2);
|
| ++ strcpy(candidate+(r-word)+lenr, r+lenp);
|
| ++ if (candidate_check(candidate,strlen(candidate))) return 1;
|
| ++ r++; // search for the next letter
|
| ++ }
|
| ++ }
|
| ++
|
| ++#else
|
| + if ((wl < 2) || !numrep) return 0;
|
| +
|
| + for (int i=0; i < numrep; i++ ) {
|
| +@@ -1263,6 +1341,7 @@
|
| + r++; // search for the next letter
|
| + }
|
| + }
|
| ++#endif
|
| + return 0;
|
| + }
|
| +
|
| +@@ -3332,6 +3411,7 @@
|
| + return 0;
|
| + }
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + /* parse in the typical fault correcting table */
|
| + int AffixMgr::parse_reptable(char * line, FileMgr * af)
|
| + {
|
| +@@ -3407,6 +3487,7 @@
|
| + }
|
| + return 0;
|
| + }
|
| ++#endif
|
| +
|
| + /* parse in the typical fault correcting table */
|
| + int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
|
| +@@ -4010,6 +4091,7 @@
|
| + case 1: {
|
| + np++;
|
| + aflag = pHMgr->decode_flag(piece);
|
| ++#ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.
|
| + if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
|
| + ((at == 'P') && (dupflags[aflag] & dupPFX))) {
|
| + HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
|
| +@@ -4017,6 +4099,7 @@
|
| + // return 1; XXX permissive mode for bad dictionaries
|
| + }
|
| + dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
|
| ++#endif
|
| + break;
|
| + }
|
| + // piece 3 - is cross product indicator
|
| Index: src/hunspell/affixmgr.hxx
|
| ===================================================================
|
| ---- src/hunspell/affixmgr.hxx (revision 3811)
|
| +--- src/hunspell/affixmgr.hxx (revision 48261)
|
| +++ src/hunspell/affixmgr.hxx (working copy)
|
| -@@ -93,7 +93,7 @@
|
| -
|
| +@@ -18,6 +18,40 @@
|
| + class PfxEntry;
|
| + class SfxEntry;
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++
|
| ++#include <vector>
|
| ++
|
| ++// This class provides an implementation of the contclasses array in AffixMgr
|
| ++// that is normally a large static array. We should almost never need more than
|
| ++// 256 elements, so this class only allocates that much to start off with. If
|
| ++// elements higher than that are actually used, we'll automatically expand.
|
| ++class ContClasses {
|
| ++ public:
|
| ++ ContClasses() {
|
| ++ // Pre-allocate a buffer so that typically, we'll never have to resize.
|
| ++ EnsureSizeIs(256);
|
| ++ }
|
| ++
|
| ++ char& operator[](size_t index) {
|
| ++ EnsureSizeIs(index + 1);
|
| ++ return data[index];
|
| ++ }
|
| ++
|
| ++ void EnsureSizeIs(size_t new_size) {
|
| ++ if (data.size() >= new_size)
|
| ++ return; // Nothing to do.
|
| ++
|
| ++ size_t old_size = data.size();
|
| ++ data.resize(new_size);
|
| ++ memset(&data[old_size], 0, new_size - old_size);
|
| ++ }
|
| ++
|
| ++ std::vector<char> data;
|
| ++};
|
| ++
|
| ++#endif // HUNSPELL_CHROME_CLIENT
|
| ++
|
| + class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
| + {
|
| +
|
| +@@ -98,12 +132,20 @@
|
| + int fullstrip;
|
| +
|
| + int havecontclass; // boolean variable
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ ContClasses contclasses;
|
| ++#else
|
| + char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
|
| ++#endif
|
| +
|
| public:
|
| -
|
| -- AffixMgr(const char * affpath, HashMgr * ptr);
|
| -+ AffixMgr(FILE* aff_handle, HashMgr * ptr);
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md);
|
| ++#else
|
| + AffixMgr(const char * affpath, HashMgr** ptr, int * md,
|
| + const char * key = NULL);
|
| ++#endif
|
| ~AffixMgr();
|
| struct hentry * affix_check(const char * word, int len,
|
| - const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT);
|
| -@@ -179,7 +179,7 @@
|
| - int get_checksharps(void);
|
| + const unsigned short needflag = (unsigned short) 0,
|
| +@@ -202,6 +244,10 @@
|
| + int get_fullstrip() const;
|
|
|
| private:
|
| -- int parse_file(const char * affpath);
|
| -+ int parse_file(FILE* aff_handle);
|
| - // int parse_string(char * line, char ** out, const char * name);
|
| - int parse_flag(char * line, unsigned short * out, const char * name);
|
| - int parse_num(char * line, int * out, const char * name);
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Not owned by us, owned by the Hunspell object.
|
| ++ hunspell::BDictReader* bdict_reader;
|
| ++#endif
|
| + int parse_file(const char * affpath, const char * key);
|
| + int parse_flag(char * line, unsigned short * out, FileMgr * af);
|
| + int parse_num(char * line, int * out, FileMgr * af);
|
| +Index: src/hunspell/htypes.hxx
|
| +===================================================================
|
| +--- src/hunspell/htypes.hxx (revision 48261)
|
| ++++ src/hunspell/htypes.hxx (working copy)
|
| +@@ -1,6 +1,16 @@
|
| + #ifndef _HTYPES_HXX_
|
| + #define _HTYPES_HXX_
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++// This is a workaround for preventing errors in parsing Turkish BDICs, which
|
| ++// contain very long AF lines (~ 12,000 chars).
|
| ++// TODO(hbono) change the HashMgr::parse_aliasf() function to be able to parse
|
| ++// longer lines than MAXDELEN.
|
| ++#define MAXDELEN (8192 * 2)
|
| ++#else
|
| ++#define MAXDELEN 8192
|
| ++#endif // HUNSPELL_CHROME_CLIENT
|
| ++
|
| + #define ROTATE_LEN 5
|
| +
|
| + #define ROTATE(v,q) \
|
| Index: src/hunspell/hashmgr.cxx
|
| ===================================================================
|
| ---- src/hunspell/hashmgr.cxx (revision 3811)
|
| +--- src/hunspell/hashmgr.cxx (revision 48261)
|
| +++ src/hunspell/hashmgr.cxx (working copy)
|
| -@@ -29,7 +31,7 @@
|
| +@@ -12,8 +12,14 @@
|
|
|
| // build a hash table from a munched word list
|
|
|
| --HashMgr::HashMgr(const char * tpath, const char * apath)
|
| -+HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle)
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++HashMgr::HashMgr(hunspell::BDictReader* reader)
|
| ++{
|
| ++ bdict_reader = reader;
|
| ++#else
|
| + HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
| {
|
| ++#endif
|
| tablesize = 0;
|
| tableptr = NULL;
|
| -@@ -43,8 +45,8 @@
|
| - aliasf = NULL;
|
| + flag_mode = FLAG_CHAR;
|
| +@@ -31,8 +37,14 @@
|
| numaliasm = 0;
|
| aliasm = NULL;
|
| -- load_config(apath);
|
| -- int ec = load_tables(tpath);
|
| -+ load_config(aff_handle);
|
| -+ int ec = load_tables(dic_handle);
|
| + forbiddenword = FORBIDDENWORD; // forbidden word signing flag
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // No tables to load, just the AF lines.
|
| ++ load_config(NULL, NULL);
|
| ++ int ec = LoadAFLines();
|
| ++#else
|
| + load_config(apath, key);
|
| + int ec = load_tables(tpath, key);
|
| ++#endif
|
| if (ec) {
|
| /* error condition - what should we do here */
|
| HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
|
| -@@ -240,7 +242,7 @@
|
| +@@ -91,15 +103,59 @@
|
| + if (ignorechars) free(ignorechars);
|
| + if (ignorechars_utf16) free(ignorechars_utf16);
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ EmptyHentryCache();
|
| ++ for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
|
| ++ it != pointer_to_strings_.end(); ++it) {
|
| ++ delete *it;
|
| ++ }
|
| ++#endif
|
| ++
|
| + #ifdef MOZILLA_CLIENT
|
| + delete [] csconv;
|
| + #endif
|
| }
|
|
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++void HashMgr::EmptyHentryCache() {
|
| ++ // We need to delete each cache entry, and each additional one in the linked
|
| ++ // list of homonyms.
|
| ++ for (HEntryCache::iterator i = hentry_cache.begin();
|
| ++ i != hentry_cache.end(); ++i) {
|
| ++ hentry* cur = i->second;
|
| ++ while (cur) {
|
| ++ hentry* next = cur->next_homonym;
|
| ++ DeleteHashEntry(cur);
|
| ++ cur = next;
|
| ++ }
|
| ++ }
|
| ++ hentry_cache.clear();
|
| ++}
|
| ++#endif
|
| ++
|
| + // lookup a root word in the hashtable
|
| +
|
| + struct hentry * HashMgr::lookup(const char *word) const
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
|
| ++ int affix_count = bdict_reader->FindWord(word, affix_ids);
|
| ++ if (affix_count == 0) { // look for custom added word
|
| ++ std::map<base::StringPiece, int>::const_iterator iter =
|
| ++ custom_word_to_affix_id_map_.find(word);
|
| ++ if (iter != custom_word_to_affix_id_map_.end()) {
|
| ++ affix_count = 1;
|
| ++ affix_ids[0] = iter->second;
|
| ++ }
|
| ++ }
|
| ++
|
| ++ static const int kMaxWordLen = 128;
|
| ++ static char word_buf[kMaxWordLen];
|
| ++ // To take account of null-termination, we use upto 127.
|
| ++ strncpy(word_buf, word, kMaxWordLen - 1);
|
| ++
|
| ++ return AffixIDsToHentry(word_buf, affix_ids, affix_count);
|
| ++#else
|
| + struct hentry * dp;
|
| + if (tableptr) {
|
| + dp = tableptr[hash(word)];
|
| +@@ -109,12 +165,14 @@
|
| + }
|
| + }
|
| + return NULL;
|
| ++#endif
|
| + }
|
| +
|
| + // add a word to the hash table (private)
|
| + int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
| + int al, const char * desc, bool onlyupcase)
|
| + {
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + bool upcasehomonym = false;
|
| + int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
|
| + // variable-length hash record with word and optional fields
|
| +@@ -206,6 +264,17 @@
|
| + if (hp->astr) free(hp->astr);
|
| + free(hp);
|
| + }
|
| ++#else
|
| ++ std::map<base::StringPiece, int>::iterator iter =
|
| ++ custom_word_to_affix_id_map_.find(word);
|
| ++ if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
|
| ++ std::string* new_string_word = new std::string(word);
|
| ++ pointer_to_strings_.push_back(new_string_word);
|
| ++ base::StringPiece sp(*(new_string_word));
|
| ++ custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
|
| ++ return 1;
|
| ++ }
|
| ++#endif
|
| + return 0;
|
| + }
|
| +
|
| +@@ -339,6 +408,43 @@
|
| + // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
|
| + struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Return NULL if dictionary is not valid.
|
| ++ if (!bdict_reader->IsValid())
|
| ++ return NULL;
|
| ++
|
| ++ // This function is only ever called by one place and not nested. We can
|
| ++ // therefore keep static state between calls and use |col| as a "reset" flag
|
| ++ // to avoid changing the API. It is set to -1 for the first call.
|
| ++ static hunspell::WordIterator word_iterator =
|
| ++ bdict_reader->GetAllWordIterator();
|
| ++ if (col < 0) {
|
| ++ col = 1;
|
| ++ word_iterator = bdict_reader->GetAllWordIterator();
|
| ++ }
|
| ++
|
| ++ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
|
| ++ static const int kMaxWordLen = 128;
|
| ++ static char word[kMaxWordLen];
|
| ++ int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);
|
| ++ if (affix_count == 0)
|
| ++ return NULL;
|
| ++ short word_len = static_cast<short>(strlen(word));
|
| ++
|
| ++ // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,
|
| ++ // i.e. a struct which uses its array 'word[1]' as a variable-length array.
|
| ++ // As noted above, this function is not nested. So, we just use a static
|
| ++ // struct which consists of an hentry and a char[kMaxWordLen], and initialize
|
| ++ // the static struct and return it for now.
|
| ++ // No need to create linked lists for the extra affixes.
|
| ++ static struct {
|
| ++ hentry entry;
|
| ++ char word[kMaxWordLen];
|
| ++ } hash_entry;
|
| ++
|
| ++ return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),
|
| ++ &word[0], word_len, affix_ids[0]);
|
| ++#else
|
| + if (hp && hp->next != NULL) return hp->next;
|
| + for (col++; col < tablesize; col++) {
|
| + if (tableptr[col]) return tableptr[col];
|
| +@@ -346,11 +452,13 @@
|
| + // null at end and reset to start
|
| + col = -1;
|
| + return NULL;
|
| ++#endif
|
| + }
|
| +
|
| // load a munched word list and build a hash table on the fly
|
| --int HashMgr::load_tables(const char * tpath)
|
| -+int HashMgr::load_tables(FILE* t_handle)
|
| + int HashMgr::load_tables(const char * tpath, const char * key)
|
| {
|
| - int wl, al;
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + int al;
|
| char * ap;
|
| -@@ -248,8 +250,9 @@
|
| - unsigned short * flags;
|
| + char * dp;
|
| +@@ -470,6 +578,7 @@
|
| + }
|
|
|
| - // raw dictionary - munched file
|
| -- FILE * rawdict = fopen(tpath, "r");
|
| -+ FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r");
|
| - if (rawdict == NULL) return 1;
|
| -+ fseek(rawdict, 0, SEEK_SET);
|
| + delete dict;
|
| ++#endif
|
| + return 0;
|
| + }
|
|
|
| - // first read the first line of file to get hash table size */
|
| - char ts[MAXDELEN];
|
| -@@ -442,7 +445,7 @@
|
| +@@ -478,6 +587,9 @@
|
| +
|
| + int HashMgr::hash(const char * word) const
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ return 0;
|
| ++#else
|
| + long hv = 0;
|
| + for (int i=0; i < 4 && *word != 0; i++)
|
| + hv = (hv << 8) | (*word++);
|
| +@@ -486,6 +598,7 @@
|
| + hv ^= (*word++);
|
| + }
|
| + return (unsigned long) hv % tablesize;
|
| ++#endif
|
| }
|
|
|
| - // read in aff file and set flag mode
|
| --int HashMgr::load_config(const char * affpath)
|
| -+int HashMgr::load_config(FILE* aff_handle)
|
| - {
|
| + int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
|
| +@@ -607,7 +720,12 @@
|
| int firstline = 1;
|
| -
|
| -@@ -451,11 +454,12 @@
|
|
|
| // open the affix file
|
| - FILE * afflst;
|
| -- afflst = fopen(affpath,"r");
|
| -+ afflst = _fdopen(_dup(_fileno(aff_handle)), "r");
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
|
| ++ FileMgr * afflst = new FileMgr(&iterator);
|
| ++#else
|
| + FileMgr * afflst = new FileMgr(affpath, key);
|
| ++#endif
|
| if (!afflst) {
|
| -- HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
|
| -+ HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");
|
| + HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
|
| return 1;
|
| - }
|
| -+ fseek(afflst, 0, SEEK_SET);
|
| +@@ -802,6 +920,121 @@
|
| + return 0;
|
| + }
|
|
|
| - // read in each line ignoring any that do not
|
| - // start with a known line type indicator
|
| -Index: src/hunspell/hashmgr.hxx
|
| -===================================================================
|
| ---- src/hunspell/hashmgr.hxx (revision 3811)
|
| -+++ src/hunspell/hashmgr.hxx (working copy)
|
| -@@ -25,7 +25,7 @@
|
| -
|
| -
|
| - public:
|
| -- HashMgr(const char * tpath, const char * apath);
|
| -+ HashMgr(FILE* t_handle, FILE* a_handle);
|
| - ~HashMgr();
|
| -
|
| - struct hentry * lookup(const char *) const;
|
| -@@ -46,9 +46,9 @@
|
| -
|
| -
|
| - private:
|
| -- int load_tables(const char * tpath);
|
| -+ int load_tables(FILE* t_handle);
|
| - int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc);
|
| -- int load_config(const char * affpath);
|
| -+ int load_config(FILE* aff_handle);
|
| - int parse_aliasf(char * line, FILE * af);
|
| - #ifdef HUNSPELL_EXPERIMENTAL
|
| - int parse_aliasm(char * line, FILE * af);
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++int HashMgr::LoadAFLines()
|
| ++{
|
| ++ utf8 = 1; // We always use UTF-8.
|
| ++
|
| ++ // Read in all the AF lines which tell us the rules for each affix group ID.
|
| ++ hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
|
| ++ FileMgr afflst(&iterator);
|
| ++ while (char* line = afflst.getline()) {
|
| ++ int rv = parse_aliasf(line, &afflst);
|
| ++ if (rv)
|
| ++ return rv;
|
| ++ }
|
| ++
|
| ++ return 0;
|
| ++}
|
| ++
|
| ++hentry* HashMgr::InitHashEntry(hentry* entry,
|
| ++ size_t item_size,
|
| ++ const char* word,
|
| ++ int word_length,
|
| ++ int affix_index) const {
|
| ++ // Return if the given buffer doesn't have enough space for a hentry struct
|
| ++ // or the given word is too long.
|
| ++ // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is
|
| ++ // better to return an error if the given word is too long and prevent
|
| ++ // an unexpected result caused by a long word.
|
| ++ const int kMaxWordLen = 128;
|
| ++ if (item_size < sizeof(hentry) + word_length + 1 ||
|
| ++ word_length >= kMaxWordLen)
|
| ++ return NULL;
|
| ++
|
| ++ // Initialize a hentry struct with the given parameters, and
|
| ++ // append the given string at the end of this hentry struct.
|
| ++ memset(entry, 0, item_size);
|
| ++ FileMgr af(NULL);
|
| ++ entry->alen = static_cast<short>(
|
| ++ const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));
|
| ++ entry->blen = static_cast<unsigned char>(word_length);
|
| ++ memcpy(&entry->word, word, word_length);
|
| ++
|
| ++ return entry;
|
| ++}
|
| ++
|
| ++hentry* HashMgr::CreateHashEntry(const char* word,
|
| ++ int word_length,
|
| ++ int affix_index) const {
|
| ++ // Return if the given word is too long.
|
| ++ // (See the comment in HashMgr::InitHashEntry().)
|
| ++ const int kMaxWordLen = 128;
|
| ++ if (word_length >= kMaxWordLen)
|
| ++ return NULL;
|
| ++
|
| ++ const size_t kEntrySize = sizeof(hentry) + word_length + 1;
|
| ++ struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));
|
| ++ if (entry)
|
| ++ InitHashEntry(entry, kEntrySize, word, word_length, affix_index);
|
| ++
|
| ++ return entry;
|
| ++}
|
| ++
|
| ++void HashMgr::DeleteHashEntry(hentry* entry) const {
|
| ++ free(entry);
|
| ++}
|
| ++
|
| ++hentry* HashMgr::AffixIDsToHentry(char* word,
|
| ++ int* affix_ids,
|
| ++ int affix_count) const
|
| ++{
|
| ++ if (affix_count == 0)
|
| ++ return NULL;
|
| ++
|
| ++ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
|
| ++ std::string std_word(word);
|
| ++ HEntryCache::iterator found = cache.find(std_word);
|
| ++ if (found != cache.end()) {
|
| ++ // We must return an existing hentry for the same word if we've previously
|
| ++ // handed one out. Hunspell will compare pointers in some cases to see if
|
| ++ // two words it has found are the same.
|
| ++ return found->second;
|
| ++ }
|
| ++
|
| ++ short word_len = static_cast<short>(strlen(word));
|
| ++
|
| ++ // We can get a number of prefixes per word. There will normally be only one,
|
| ++ // but if not, there will be a linked list of "hentry"s for the "homonym"s
|
| ++ // for the word.
|
| ++ struct hentry* first_he = NULL;
|
| ++ struct hentry* prev_he = NULL; // For making linked list.
|
| ++ for (int i = 0; i < affix_count; i++) {
|
| ++ struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);
|
| ++ if (!he)
|
| ++ break;
|
| ++ if (i == 0)
|
| ++ first_he = he;
|
| ++ if (prev_he)
|
| ++ prev_he->next_homonym = he;
|
| ++ prev_he = he;
|
| ++ }
|
| ++
|
| ++ cache[std_word] = first_he; // Save this word in the cache for later.
|
| ++ return first_he;
|
| ++}
|
| ++
|
| ++hentry* HashMgr::GetHentryFromHEntryCache(char* word) {
|
| ++ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
|
| ++ std::string std_word(word);
|
| ++ HEntryCache::iterator found = cache.find(std_word);
|
| ++ if (found != cache.end())
|
| ++ return found->second;
|
| ++ else
|
| ++ return NULL;
|
| ++}
|
| ++#endif
|
| ++
|
| + int HashMgr::is_aliasf() {
|
| + return (aliasf != NULL);
|
| + }
|
| Index: src/hunspell/hunspell.cxx
|
| ===================================================================
|
| ---- src/hunspell/hunspell.cxx (revision 3811)
|
| +--- src/hunspell/hunspell.cxx (revision 48261)
|
| +++ src/hunspell/hunspell.cxx (working copy)
|
| -@@ -20,7 +20,7 @@
|
| - #endif
|
| - #endif
|
| +@@ -7,18 +7,35 @@
|
|
|
| --Hunspell::Hunspell(const char * affpath, const char * dpath)
|
| -+Hunspell::Hunspell(FILE* aff_handle, FILE* dic_handle)
|
| + #include "hunspell.hxx"
|
| + #include "hunspell.h"
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + #include "config.h"
|
| ++#endif
|
| + #include "csutil.hxx"
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
|
| ++#else
|
| + Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
|
| ++#endif
|
| {
|
| encoding = NULL;
|
| csconv = NULL;
|
| -@@ -28,11 +28,11 @@
|
| + utf8 = 0;
|
| complexprefixes = 0;
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + affixpath = mystrdup(affpath);
|
| ++#endif
|
| + maxdic = 0;
|
|
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ bdict_reader = new hunspell::BDictReader;
|
| ++ bdict_reader->Init(bdict_data, bdict_length);
|
| ++
|
| ++ pHMgr[0] = new HashMgr(bdict_reader);
|
| ++ if (pHMgr[0]) maxdic = 1;
|
| ++
|
| ++ pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
|
| ++#else
|
| /* first set up the hash manager */
|
| -- pHMgr = new HashMgr(dpath, affpath);
|
| -+ pHMgr = new HashMgr(dic_handle, aff_handle);
|
| -
|
| + pHMgr[0] = new HashMgr(dpath, affpath, key);
|
| + if (pHMgr[0]) maxdic = 1;
|
| +@@ -26,6 +43,7 @@
|
| /* next set up the affix manager */
|
| /* it needs access to the hash manager lookup methods */
|
| -- pAMgr = new AffixMgr(affpath,pHMgr);
|
| -+ pAMgr = new AffixMgr(aff_handle, pHMgr);
|
| + pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
|
| ++#endif
|
|
|
| /* get the preferred try string and the dictionary */
|
| /* encoding from the Affix Manager for that dictionary */
|
| -@@ -1694,9 +1694,9 @@
|
| +@@ -56,10 +74,17 @@
|
| + csconv= NULL;
|
| + if (encoding) free(encoding);
|
| + encoding = NULL;
|
| ++
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ if (bdict_reader) delete bdict_reader;
|
| ++ bdict_reader = NULL;
|
| ++#else
|
| + if (affixpath) free(affixpath);
|
| + affixpath = NULL;
|
| ++#endif
|
| + }
|
|
|
| - #endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + // load extra dictionaries
|
| + int Hunspell::add_dic(const char * dpath, const char * key) {
|
| + if (maxdic == MAXDIC || !affixpath) return 1;
|
| +@@ -67,6 +92,7 @@
|
| + if (pHMgr[maxdic]) maxdic++; else return 1;
|
| + return 0;
|
| + }
|
| ++#endif
|
|
|
| --Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
|
| -+Hunhandle *Hunspell_create(FILE* aff_handle, FILE* dic_handle)
|
| + // make a copy of src at destination while removing all leading
|
| + // blanks and removing any trailing periods after recording
|
| +@@ -319,6 +345,9 @@
|
| +
|
| + int Hunspell::spell(const char * word, int * info, char ** root)
|
| {
|
| -- return (Hunhandle*)(new Hunspell(affpath, dpath));
|
| -+ return (Hunhandle*)(new Hunspell(aff_handle, dic_handle));
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ if (pHMgr) pHMgr[0]->EmptyHentryCache();
|
| ++#endif
|
| + struct hentry * rv=NULL;
|
| + // need larger vector. For example, Turkish capital letter I converted a
|
| + // 2-byte UTF-8 character (dotless i) by mkallsmall.
|
| +@@ -567,6 +596,13 @@
|
| + word = w2;
|
| + } else word = w;
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // We need to check the word length if its valid to make coverity (Event
|
| ++ // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.
|
| ++ if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAXWORDLEN))
|
| ++ return NULL;
|
| ++#endif
|
| ++
|
| + // word reversing wrapper for complex prefixes
|
| + if (complexprefixes) {
|
| + if (word != w2) {
|
| +@@ -657,6 +693,9 @@
|
| +
|
| + int Hunspell::suggest(char*** slst, const char * word)
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ if (pHMgr) pHMgr[0]->EmptyHentryCache();
|
| ++#endif
|
| + int onlycmpdsug = 0;
|
| + char cw[MAXWORDUTF8LEN];
|
| + char wspace[MAXWORDUTF8LEN];
|
| +@@ -1874,13 +1913,21 @@
|
| +
|
| + Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ return NULL;
|
| ++#else
|
| + return (Hunhandle*)(new Hunspell(affpath, dpath));
|
| ++#endif
|
| }
|
|
|
| + Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
| + const char * key)
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ return NULL;
|
| ++#else
|
| + return (Hunhandle*)(new Hunspell(affpath, dpath, key));
|
| ++#endif
|
| + }
|
| +
|
| void Hunspell_destroy(Hunhandle *pHunspell)
|
| +Index: src/hunspell/hashmgr.hxx
|
| +===================================================================
|
| +--- src/hunspell/hashmgr.hxx (revision 48261)
|
| ++++ src/hunspell/hashmgr.hxx (working copy)
|
| +@@ -8,10 +8,25 @@
|
| + #include "htypes.hxx"
|
| + #include "filemgr.hxx"
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++#include <string>
|
| ++#include <map>
|
| ++
|
| ++#include "base/stl_util-inl.h"
|
| ++#include "base/string_piece.h"
|
| ++#include "third_party/hunspell/google/bdict_reader.h"
|
| ++#endif
|
| ++
|
| + enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
| +
|
| + class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
| + {
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Not owned by this class, owned by the Hunspell object.
|
| ++ hunspell::BDictReader* bdict_reader;
|
| ++ std::map<base::StringPiece, int> custom_word_to_affix_id_map_;
|
| ++ std::vector<std::string*> pointer_to_strings_;
|
| ++#endif
|
| + int tablesize;
|
| + struct hentry ** tableptr;
|
| + int userword;
|
| +@@ -34,7 +49,23 @@
|
| +
|
| +
|
| + public:
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ HashMgr(hunspell::BDictReader* reader);
|
| ++
|
| ++ // Return the hentry corresponding to the given word. Returns NULL if the
|
| ++ // word is not there in the cache.
|
| ++ hentry* GetHentryFromHEntryCache(char* word);
|
| ++
|
| ++ // Called before we do a new operation. This will empty the cache of pointers
|
| ++ // to hentries that we have cached. In Chrome, we make these on-demand, but
|
| ++ // they must live as long as the single spellcheck operation that they're part
|
| ++ // of since Hunspell will save pointers to various ones as it works.
|
| ++ //
|
| ++ // This function allows that cache to be emptied and not grow infinitely.
|
| ++ void EmptyHentryCache();
|
| ++#else
|
| + HashMgr(const char * tpath, const char * apath, const char * key = NULL);
|
| ++#endif
|
| + ~HashMgr();
|
| +
|
| + struct hentry * lookup(const char *) const;
|
| +@@ -59,6 +90,40 @@
|
| + int al, const char * desc, bool onlyupcase);
|
| + int load_config(const char * affpath, const char * key);
|
| + int parse_aliasf(char * line, FileMgr * af);
|
| ++
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Loads the AF lines from a BDICT.
|
| ++ // A BDICT file compresses its AF lines to save memory.
|
| ++ // This function decompresses each AF line and call parse_aliasf().
|
| ++ int LoadAFLines();
|
| ++
|
| ++ // Helper functions that create a new hentry struct, initialize it, and
|
| ++ // delete it.
|
| ++ // These functions encapsulate non-trivial operations in creating and
|
| ++ // initializing a hentry struct from BDICT data to avoid changing code so much
|
| ++ // even when a hentry struct is changed.
|
| ++ hentry* InitHashEntry(hentry* entry,
|
| ++ size_t item_size,
|
| ++ const char* word,
|
| ++ int word_length,
|
| ++ int affix_index) const;
|
| ++ hentry* CreateHashEntry(const char* word,
|
| ++ int word_length,
|
| ++ int affix_index) const;
|
| ++ void DeleteHashEntry(hentry* entry) const;
|
| ++
|
| ++ // Converts the list of affix IDs to a linked list of hentry structures. The
|
| ++ // hentry structures will point to the given word. The returned pointer will
|
| ++ // be a statically allocated variable that will change for the next call. The
|
| ++ // |word| buffer must be the same.
|
| ++ hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const;
|
| ++
|
| ++ // See EmptyHentryCache above. Note that each one is actually a linked list
|
| ++ // followed by the homonym pointer.
|
| ++ typedef std::map<std::string, hentry*> HEntryCache;
|
| ++ HEntryCache hentry_cache;
|
| ++#endif
|
| ++
|
| + int add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
| + unsigned short * flags, int al, char * dp, int captype);
|
| + int parse_aliasm(char * line, FileMgr * af);
|
| Index: src/hunspell/hunspell.hxx
|
| ===================================================================
|
| ---- src/hunspell/hunspell.hxx (revision 3811)
|
| +--- src/hunspell/hunspell.hxx (revision 48261)
|
| +++ src/hunspell/hunspell.hxx (working copy)
|
| -@@ -48,7 +48,7 @@
|
| +@@ -5,6 +5,10 @@
|
| + #include "suggestmgr.hxx"
|
| + #include "langnum.hxx"
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++#include "third_party/hunspell/google/bdict_reader.h"
|
| ++#endif
|
| ++
|
| + #define SPELL_COMPOUND (1 << 0)
|
| + #define SPELL_FORBIDDEN (1 << 1)
|
| + #define SPELL_ALLCAP (1 << 2)
|
| +@@ -26,7 +30,9 @@
|
| + HashMgr* pHMgr[MAXDIC];
|
| + int maxdic;
|
| + SuggestMgr* pSMgr;
|
| ++#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.
|
| + char * affixpath;
|
| ++#endif
|
| + char * encoding;
|
| + struct cs_info * csconv;
|
| + int langnum;
|
| +@@ -34,17 +40,28 @@
|
| + int complexprefixes;
|
| + char** wordbreak;
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ // Not owned by us, owned by the Hunspell object.
|
| ++ hunspell::BDictReader* bdict_reader;
|
| ++#endif
|
| ++
|
| + public:
|
| +
|
| + /* Hunspell(aff, dic) - constructor of Hunspell class
|
| * input: path of affix file and dictionary file
|
| */
|
| -
|
| -- Hunspell(const char * affpath, const char * dpath);
|
| -+ Hunspell(FILE* aff_handle, FILE* dic_handle);
|
| +
|
| ++#ifdef HUNSPELL_CHROME_CLIENT
|
| ++ Hunspell(const unsigned char* bdict_data, size_t bdict_length);
|
| ++#else
|
| + Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
| ++#endif
|
| + ~Hunspell();
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + /* load extra dictionaries (only dic files) */
|
| + int add_dic(const char * dpath, const char * key = NULL);
|
| ++#endif
|
| +
|
| + /* spell(word) - spellcheck word
|
| + * output: 0 = bad word, not 0 = good word
|
| +Index: src/hunspell/license.hunspell
|
| +===================================================================
|
| +--- src/hunspell/license.hunspell (revision 48261)
|
| ++++ src/hunspell/license.hunspell (working copy)
|
| +@@ -56,4 +56,6 @@
|
| + *
|
| + * ***** END LICENSE BLOCK ***** */
|
| +
|
| ++#ifndef HUNSPELL_CHROME_CLIENT
|
| + #include "config.h"
|
| ++#endif
|
|
|