| Index: third_party/hunspell/src/hunspell/hashmgr.cxx
|
| ===================================================================
|
| --- third_party/hunspell/src/hunspell/hashmgr.cxx (revision 50428)
|
| +++ third_party/hunspell/src/hunspell/hashmgr.cxx (working copy)
|
| @@ -12,8 +12,14 @@
|
|
|
| // build a hash table from a munched word list
|
|
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| +HashMgr::HashMgr(hunspell::BDictReader* reader)
|
| +{
|
| + bdict_reader = reader;
|
| +#else
|
| HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
| {
|
| +#endif
|
| tablesize = 0;
|
| tableptr = NULL;
|
| flag_mode = FLAG_CHAR;
|
| @@ -31,8 +37,14 @@
|
| numaliasm = 0;
|
| aliasm = NULL;
|
| forbiddenword = FORBIDDENWORD; // forbidden word signing flag
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + // No tables to load, just the AF lines.
|
| + load_config(NULL, NULL);
|
| + int ec = LoadAFLines();
|
| +#else
|
| load_config(apath, key);
|
| int ec = load_tables(tpath, key);
|
| +#endif
|
| if (ec) {
|
| /* error condition - what should we do here */
|
| HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
|
| @@ -91,15 +103,59 @@
|
| if (ignorechars) free(ignorechars);
|
| if (ignorechars_utf16) free(ignorechars_utf16);
|
|
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + EmptyHentryCache();
|
| + for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
|
| + it != pointer_to_strings_.end(); ++it) {
|
| + delete *it;
|
| + }
|
| +#endif
|
| +
|
| #ifdef MOZILLA_CLIENT
|
| delete [] csconv;
|
| #endif
|
| }
|
|
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| +void HashMgr::EmptyHentryCache() {
|
| + // We need to delete each cache entry, and each additional one in the linked
|
| + // list of homonyms.
|
| + for (HEntryCache::iterator i = hentry_cache.begin();
|
| + i != hentry_cache.end(); ++i) {
|
| + hentry* cur = i->second;
|
| + while (cur) {
|
| + hentry* next = cur->next_homonym;
|
| + DeleteHashEntry(cur);
|
| + cur = next;
|
| + }
|
| + }
|
| + hentry_cache.clear();
|
| +}
|
| +#endif
|
| +
|
| // lookup a root word in the hashtable
|
|
|
| struct hentry * HashMgr::lookup(const char *word) const
|
| {
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
|
| + int affix_count = bdict_reader->FindWord(word, affix_ids);
|
| + if (affix_count == 0) { // look for custom added word
|
| + std::map<base::StringPiece, int>::const_iterator iter =
|
| + custom_word_to_affix_id_map_.find(word);
|
| + if (iter != custom_word_to_affix_id_map_.end()) {
|
| + affix_count = 1;
|
| + affix_ids[0] = iter->second;
|
| + }
|
| + }
|
| +
|
| + static const int kMaxWordLen = 128;
|
| + static char word_buf[kMaxWordLen];
|
| + // To take account of null-termination, we use upto 127.
|
| + strncpy(word_buf, word, kMaxWordLen - 1);
|
| +
|
| + return AffixIDsToHentry(word_buf, affix_ids, affix_count);
|
| +#else
|
| struct hentry * dp;
|
| if (tableptr) {
|
| dp = tableptr[hash(word)];
|
| @@ -109,12 +165,14 @@
|
| }
|
| }
|
| return NULL;
|
| +#endif
|
| }
|
|
|
| // add a word to the hash table (private)
|
| int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
| int al, const char * desc, bool onlyupcase)
|
| {
|
| +#ifndef HUNSPELL_CHROME_CLIENT
|
| bool upcasehomonym = false;
|
| int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
|
| // variable-length hash record with word and optional fields
|
| @@ -206,6 +264,17 @@
|
| if (hp->astr) free(hp->astr);
|
| free(hp);
|
| }
|
| +#else
|
| + std::map<base::StringPiece, int>::iterator iter =
|
| + custom_word_to_affix_id_map_.find(word);
|
| + if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
|
| + std::string* new_string_word = new std::string(word);
|
| + pointer_to_strings_.push_back(new_string_word);
|
| + base::StringPiece sp(*(new_string_word));
|
| + custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
|
| + return 1;
|
| + }
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -339,6 +408,43 @@
|
| // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
|
| struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
|
| {
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + // Return NULL if dictionary is not valid.
|
| + if (!bdict_reader->IsValid())
|
| + return NULL;
|
| +
|
| + // This function is only ever called by one place and not nested. We can
|
| + // therefore keep static state between calls and use |col| as a "reset" flag
|
| + // to avoid changing the API. It is set to -1 for the first call.
|
| + static hunspell::WordIterator word_iterator =
|
| + bdict_reader->GetAllWordIterator();
|
| + if (col < 0) {
|
| + col = 1;
|
| + word_iterator = bdict_reader->GetAllWordIterator();
|
| + }
|
| +
|
| + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
|
| + static const int kMaxWordLen = 128;
|
| + static char word[kMaxWordLen];
|
| + int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);
|
| + if (affix_count == 0)
|
| + return NULL;
|
| + short word_len = static_cast<short>(strlen(word));
|
| +
|
| + // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,
|
| + // i.e. a struct which uses its array 'word[1]' as a variable-length array.
|
| + // As noted above, this function is not nested. So, we just use a static
|
| + // struct which consists of an hentry and a char[kMaxWordLen], and initialize
|
| + // the static struct and return it for now.
|
| + // No need to create linked lists for the extra affixes.
|
| + static struct {
|
| + hentry entry;
|
| + char word[kMaxWordLen];
|
| + } hash_entry;
|
| +
|
| + return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),
|
| + &word[0], word_len, affix_ids[0]);
|
| +#else
|
| if (hp && hp->next != NULL) return hp->next;
|
| for (col++; col < tablesize; col++) {
|
| if (tableptr[col]) return tableptr[col];
|
| @@ -346,11 +452,13 @@
|
| // null at end and reset to start
|
| col = -1;
|
| return NULL;
|
| +#endif
|
| }
|
|
|
| // load a munched word list and build a hash table on the fly
|
| int HashMgr::load_tables(const char * tpath, const char * key)
|
| {
|
| +#ifndef HUNSPELL_CHROME_CLIENT
|
| int al;
|
| char * ap;
|
| char * dp;
|
| @@ -470,6 +578,7 @@
|
| }
|
|
|
| delete dict;
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -478,6 +587,9 @@
|
|
|
| int HashMgr::hash(const char * word) const
|
| {
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + return 0;
|
| +#else
|
| long hv = 0;
|
| for (int i=0; i < 4 && *word != 0; i++)
|
| hv = (hv << 8) | (*word++);
|
| @@ -486,6 +598,7 @@
|
| hv ^= (*word++);
|
| }
|
| return (unsigned long) hv % tablesize;
|
| +#endif
|
| }
|
|
|
| int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
|
| @@ -607,7 +720,12 @@
|
| int firstline = 1;
|
|
|
| // open the affix file
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
|
| + FileMgr * afflst = new FileMgr(&iterator);
|
| +#else
|
| FileMgr * afflst = new FileMgr(affpath, key);
|
| +#endif
|
| if (!afflst) {
|
| HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
|
| return 1;
|
| @@ -802,6 +920,121 @@
|
| return 0;
|
| }
|
|
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| +int HashMgr::LoadAFLines()
|
| +{
|
| + utf8 = 1; // We always use UTF-8.
|
| +
|
| + // Read in all the AF lines which tell us the rules for each affix group ID.
|
| + hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
|
| + FileMgr afflst(&iterator);
|
| + while (char* line = afflst.getline()) {
|
| + int rv = parse_aliasf(line, &afflst);
|
| + if (rv)
|
| + return rv;
|
| + }
|
| +
|
| + return 0;
|
| +}
|
| +
|
| +hentry* HashMgr::InitHashEntry(hentry* entry,
|
| + size_t item_size,
|
| + const char* word,
|
| + int word_length,
|
| + int affix_index) const {
|
| + // Return if the given buffer doesn't have enough space for a hentry struct
|
| + // or the given word is too long.
|
| + // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is
|
| + // better to return an error if the given word is too long and prevent
|
| + // an unexpected result caused by a long word.
|
| + const int kMaxWordLen = 128;
|
| + if (item_size < sizeof(hentry) + word_length + 1 ||
|
| + word_length >= kMaxWordLen)
|
| + return NULL;
|
| +
|
| + // Initialize a hentry struct with the given parameters, and
|
| + // append the given string at the end of this hentry struct.
|
| + memset(entry, 0, item_size);
|
| + FileMgr af(NULL);
|
| + entry->alen = static_cast<short>(
|
| + const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));
|
| + entry->blen = static_cast<unsigned char>(word_length);
|
| + memcpy(&entry->word, word, word_length);
|
| +
|
| + return entry;
|
| +}
|
| +
|
| +hentry* HashMgr::CreateHashEntry(const char* word,
|
| + int word_length,
|
| + int affix_index) const {
|
| + // Return if the given word is too long.
|
| + // (See the comment in HashMgr::InitHashEntry().)
|
| + const int kMaxWordLen = 128;
|
| + if (word_length >= kMaxWordLen)
|
| + return NULL;
|
| +
|
| + const size_t kEntrySize = sizeof(hentry) + word_length + 1;
|
| + struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));
|
| + if (entry)
|
| + InitHashEntry(entry, kEntrySize, word, word_length, affix_index);
|
| +
|
| + return entry;
|
| +}
|
| +
|
| +void HashMgr::DeleteHashEntry(hentry* entry) const {
|
| + free(entry);
|
| +}
|
| +
|
| +hentry* HashMgr::AffixIDsToHentry(char* word,
|
| + int* affix_ids,
|
| + int affix_count) const
|
| +{
|
| + if (affix_count == 0)
|
| + return NULL;
|
| +
|
| + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
|
| + std::string std_word(word);
|
| + HEntryCache::iterator found = cache.find(std_word);
|
| + if (found != cache.end()) {
|
| + // We must return an existing hentry for the same word if we've previously
|
| + // handed one out. Hunspell will compare pointers in some cases to see if
|
| + // two words it has found are the same.
|
| + return found->second;
|
| + }
|
| +
|
| + short word_len = static_cast<short>(strlen(word));
|
| +
|
| + // We can get a number of prefixes per word. There will normally be only one,
|
| + // but if not, there will be a linked list of "hentry"s for the "homonym"s
|
| + // for the word.
|
| + struct hentry* first_he = NULL;
|
| + struct hentry* prev_he = NULL; // For making linked list.
|
| + for (int i = 0; i < affix_count; i++) {
|
| + struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);
|
| + if (!he)
|
| + break;
|
| + if (i == 0)
|
| + first_he = he;
|
| + if (prev_he)
|
| + prev_he->next_homonym = he;
|
| + prev_he = he;
|
| + }
|
| +
|
| + cache[std_word] = first_he; // Save this word in the cache for later.
|
| + return first_he;
|
| +}
|
| +
|
| +hentry* HashMgr::GetHentryFromHEntryCache(char* word) {
|
| + HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
|
| + std::string std_word(word);
|
| + HEntryCache::iterator found = cache.find(std_word);
|
| + if (found != cache.end())
|
| + return found->second;
|
| + else
|
| + return NULL;
|
| +}
|
| +#endif
|
| +
|
| int HashMgr::is_aliasf() {
|
| return (aliasf != NULL);
|
| }
|
|
|