Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(619)

Unified Diff: third_party/hunspell/src/hunspell/hashmgr.cxx

Issue 2239005: Merges our hunspell change to hunspell 1.2.10.... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/deps/
Patch Set: '' Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/hunspell/src/hunspell/hashmgr.cxx
===================================================================
--- third_party/hunspell/src/hunspell/hashmgr.cxx (revision 50428)
+++ third_party/hunspell/src/hunspell/hashmgr.cxx (working copy)
@@ -12,8 +12,14 @@
// build a hash table from a munched word list
+#ifdef HUNSPELL_CHROME_CLIENT
+HashMgr::HashMgr(hunspell::BDictReader* reader)
+{
+ bdict_reader = reader;
+#else
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
{
+#endif
tablesize = 0;
tableptr = NULL;
flag_mode = FLAG_CHAR;
@@ -31,8 +37,14 @@
numaliasm = 0;
aliasm = NULL;
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
+#ifdef HUNSPELL_CHROME_CLIENT
+ // No tables to load, just the AF lines.
+ load_config(NULL, NULL);
+ int ec = LoadAFLines();
+#else
load_config(apath, key);
int ec = load_tables(tpath, key);
+#endif
if (ec) {
/* error condition - what should we do here */
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
@@ -91,15 +103,59 @@
if (ignorechars) free(ignorechars);
if (ignorechars_utf16) free(ignorechars_utf16);
+#ifdef HUNSPELL_CHROME_CLIENT
+ EmptyHentryCache();
+ for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();
+ it != pointer_to_strings_.end(); ++it) {
+ delete *it;
+ }
+#endif
+
#ifdef MOZILLA_CLIENT
delete [] csconv;
#endif
}
+#ifdef HUNSPELL_CHROME_CLIENT
+void HashMgr::EmptyHentryCache() {
+ // We need to delete each cache entry, and each additional one in the linked
+ // list of homonyms.
+ for (HEntryCache::iterator i = hentry_cache.begin();
+ i != hentry_cache.end(); ++i) {
+ hentry* cur = i->second;
+ while (cur) {
+ hentry* next = cur->next_homonym;
+ DeleteHashEntry(cur);
+ cur = next;
+ }
+ }
+ hentry_cache.clear();
+}
+#endif
+
// lookup a root word in the hashtable
struct hentry * HashMgr::lookup(const char *word) const
{
+#ifdef HUNSPELL_CHROME_CLIENT
+ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
+ int affix_count = bdict_reader->FindWord(word, affix_ids);
+ if (affix_count == 0) { // look for custom added word
+ std::map<base::StringPiece, int>::const_iterator iter =
+ custom_word_to_affix_id_map_.find(word);
+ if (iter != custom_word_to_affix_id_map_.end()) {
+ affix_count = 1;
+ affix_ids[0] = iter->second;
+ }
+ }
+
+ static const int kMaxWordLen = 128;
+ static char word_buf[kMaxWordLen];
+ // To take account of null-termination, we use upto 127.
+ strncpy(word_buf, word, kMaxWordLen - 1);
+
+ return AffixIDsToHentry(word_buf, affix_ids, affix_count);
+#else
struct hentry * dp;
if (tableptr) {
dp = tableptr[hash(word)];
@@ -109,12 +165,14 @@
}
}
return NULL;
+#endif
}
// add a word to the hash table (private)
int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
{
+#ifndef HUNSPELL_CHROME_CLIENT
bool upcasehomonym = false;
int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
@@ -206,6 +264,17 @@
if (hp->astr) free(hp->astr);
free(hp);
}
+#else
+ std::map<base::StringPiece, int>::iterator iter =
+ custom_word_to_affix_id_map_.find(word);
+ if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added
+ std::string* new_string_word = new std::string(word);
+ pointer_to_strings_.push_back(new_string_word);
+ base::StringPiece sp(*(new_string_word));
+ custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words
+ return 1;
+ }
+#endif
return 0;
}
@@ -339,6 +408,43 @@
// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
{
+#ifdef HUNSPELL_CHROME_CLIENT
+ // Return NULL if dictionary is not valid.
+ if (!bdict_reader->IsValid())
+ return NULL;
+
+ // This function is only ever called by one place and not nested. We can
+ // therefore keep static state between calls and use |col| as a "reset" flag
+ // to avoid changing the API. It is set to -1 for the first call.
+ static hunspell::WordIterator word_iterator =
+ bdict_reader->GetAllWordIterator();
+ if (col < 0) {
+ col = 1;
+ word_iterator = bdict_reader->GetAllWordIterator();
+ }
+
+ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
+ static const int kMaxWordLen = 128;
+ static char word[kMaxWordLen];
+ int affix_count = word_iterator.Advance(word, kMaxWordLen, affix_ids);
+ if (affix_count == 0)
+ return NULL;
+ short word_len = static_cast<short>(strlen(word));
+
+ // Since hunspell 1.2.8, an hentry struct becomes a variable-length struct,
+ // i.e. a struct which uses its array 'word[1]' as a variable-length array.
+ // As noted above, this function is not nested. So, we just use a static
+ // struct which consists of an hentry and a char[kMaxWordLen], and initialize
+ // the static struct and return it for now.
+ // No need to create linked lists for the extra affixes.
+ static struct {
+ hentry entry;
+ char word[kMaxWordLen];
+ } hash_entry;
+
+ return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),
+ &word[0], word_len, affix_ids[0]);
+#else
if (hp && hp->next != NULL) return hp->next;
for (col++; col < tablesize; col++) {
if (tableptr[col]) return tableptr[col];
@@ -346,11 +452,13 @@
// null at end and reset to start
col = -1;
return NULL;
+#endif
}
// load a munched word list and build a hash table on the fly
int HashMgr::load_tables(const char * tpath, const char * key)
{
+#ifndef HUNSPELL_CHROME_CLIENT
int al;
char * ap;
char * dp;
@@ -470,6 +578,7 @@
}
delete dict;
+#endif
return 0;
}
@@ -478,6 +587,9 @@
int HashMgr::hash(const char * word) const
{
+#ifdef HUNSPELL_CHROME_CLIENT
+ return 0;
+#else
long hv = 0;
for (int i=0; i < 4 && *word != 0; i++)
hv = (hv << 8) | (*word++);
@@ -486,6 +598,7 @@
hv ^= (*word++);
}
return (unsigned long) hv % tablesize;
+#endif
}
int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
@@ -607,7 +720,12 @@
int firstline = 1;
// open the affix file
+#ifdef HUNSPELL_CHROME_CLIENT
+ hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();
+ FileMgr * afflst = new FileMgr(&iterator);
+#else
FileMgr * afflst = new FileMgr(affpath, key);
+#endif
if (!afflst) {
HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
return 1;
@@ -802,6 +920,121 @@
return 0;
}
+#ifdef HUNSPELL_CHROME_CLIENT
+int HashMgr::LoadAFLines()
+{
+ utf8 = 1; // We always use UTF-8.
+
+ // Read in all the AF lines which tell us the rules for each affix group ID.
+ hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();
+ FileMgr afflst(&iterator);
+ while (char* line = afflst.getline()) {
+ int rv = parse_aliasf(line, &afflst);
+ if (rv)
+ return rv;
+ }
+
+ return 0;
+}
+
+hentry* HashMgr::InitHashEntry(hentry* entry,
+ size_t item_size,
+ const char* word,
+ int word_length,
+ int affix_index) const {
+ // Return if the given buffer doesn't have enough space for a hentry struct
+ // or the given word is too long.
+ // Our BDICT cannot handle words longer than (128 - 1) bytes. So, it is
+ // better to return an error if the given word is too long and prevent
+ // an unexpected result caused by a long word.
+ const int kMaxWordLen = 128;
+ if (item_size < sizeof(hentry) + word_length + 1 ||
+ word_length >= kMaxWordLen)
+ return NULL;
+
+ // Initialize a hentry struct with the given parameters, and
+ // append the given string at the end of this hentry struct.
+ memset(entry, 0, item_size);
+ FileMgr af(NULL);
+ entry->alen = static_cast<short>(
+ const_cast<HashMgr*>(this)->get_aliasf(affix_index, &entry->astr, &af));
+ entry->blen = static_cast<unsigned char>(word_length);
+ memcpy(&entry->word, word, word_length);
+
+ return entry;
+}
+
+hentry* HashMgr::CreateHashEntry(const char* word,
+ int word_length,
+ int affix_index) const {
+ // Return if the given word is too long.
+ // (See the comment in HashMgr::InitHashEntry().)
+ const int kMaxWordLen = 128;
+ if (word_length >= kMaxWordLen)
+ return NULL;
+
+ const size_t kEntrySize = sizeof(hentry) + word_length + 1;
+ struct hentry* entry = reinterpret_cast<hentry*>(malloc(kEntrySize));
+ if (entry)
+ InitHashEntry(entry, kEntrySize, word, word_length, affix_index);
+
+ return entry;
+}
+
+void HashMgr::DeleteHashEntry(hentry* entry) const {
+ free(entry);
+}
+
+hentry* HashMgr::AffixIDsToHentry(char* word,
+ int* affix_ids,
+ int affix_count) const
+{
+ if (affix_count == 0)
+ return NULL;
+
+ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
+ std::string std_word(word);
+ HEntryCache::iterator found = cache.find(std_word);
+ if (found != cache.end()) {
+ // We must return an existing hentry for the same word if we've previously
+ // handed one out. Hunspell will compare pointers in some cases to see if
+ // two words it has found are the same.
+ return found->second;
+ }
+
+ short word_len = static_cast<short>(strlen(word));
+
+ // We can get a number of prefixes per word. There will normally be only one,
+ // but if not, there will be a linked list of "hentry"s for the "homonym"s
+ // for the word.
+ struct hentry* first_he = NULL;
+ struct hentry* prev_he = NULL; // For making linked list.
+ for (int i = 0; i < affix_count; i++) {
+ struct hentry* he = CreateHashEntry(word, word_len, affix_ids[i]);
+ if (!he)
+ break;
+ if (i == 0)
+ first_he = he;
+ if (prev_he)
+ prev_he->next_homonym = he;
+ prev_he = he;
+ }
+
+ cache[std_word] = first_he; // Save this word in the cache for later.
+ return first_he;
+}
+
+hentry* HashMgr::GetHentryFromHEntryCache(char* word) {
+ HEntryCache& cache = const_cast<HashMgr*>(this)->hentry_cache;
+ std::string std_word(word);
+ HEntryCache::iterator found = cache.find(std_word);
+ if (found != cache.end())
+ return found->second;
+ else
+ return NULL;
+}
+#endif
+
int HashMgr::is_aliasf() {
return (aliasf != NULL);
}
« no previous file with comments | « third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698