third_party/hunspell/src/hunspell/hashmgr.cxx - Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4

Unified Diff: third_party/hunspell/src/hunspell/hashmgr.cxx

Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4 (Closed)

Patch Set: Updated patch with encoding change Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/hunspell/src/hunspell/hashmgr.cxx

diff --git a/third_party/hunspell/src/hunspell/hashmgr.cxx b/third_party/hunspell/src/hunspell/hashmgr.cxx

index 5b7c269a3a34249f15724b03a1f172d8e973f883..770fac1dfae5502ee4485fcfa075a1bdf1f406eb 100644

--- a/third_party/hunspell/src/hunspell/hashmgr.cxx

+++ b/third_party/hunspell/src/hunspell/hashmgr.cxx

@@ -1,10 +1,82 @@

-#include "license.hunspell"

-#include "license.myspell"

-#include <stdlib.h>

+/* ***** BEGIN LICENSE BLOCK *****

+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1

+ *

+ * The contents of this file are subject to the Mozilla Public License Version

+ * 1.1 (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ * http://www.mozilla.org/MPL/

+ *

+ * Software distributed under the License is distributed on an "AS IS" basis,

+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

+ * for the specific language governing rights and limitations under the

+ * License.

+ *

+ * The Original Code is Hunspell, based on MySpell.

+ *

+ * The Initial Developers of the Original Code are

+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).

+ *

+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,

+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,

+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,

+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,

+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen

+ *

+ * Alternatively, the contents of this file may be used under the terms of

+ * either the GNU General Public License Version 2 or later (the "GPL"), or

+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

+ * in which case the provisions of the GPL or the LGPL are applicable instead

+ * of those above. If you wish to allow use of your version of this file only

+ * under the terms of either the GPL or the LGPL, and not to allow others to

+ * use your version of this file under the terms of the MPL, indicate your

+ * decision by deleting the provisions above and replace them with the notice

+ * and other provisions required by the GPL or the LGPL. If you do not delete

+ * the provisions above, a recipient may use your version of this file under

+ * the terms of any one of the MPL, the GPL or the LGPL.

+ *

+ * ***** END LICENSE BLOCK ***** */

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ *

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ *

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * 3. All modifications to the source code must be clearly marked as

+ * such. Binary redistributions based on modified source code

+ * must be clearly marked as modified versions in the documentation

+ * and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS

+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

+#include <stdlib.h>

#include <string.h>

-#include <stdio.h>

+#include <stdio.h>

#include <ctype.h>

+#include <limits>

+#include <sstream>

#include "hashmgr.hxx"

#include "csutil.hxx"

@@ -14,29 +86,25 @@

#ifdef HUNSPELL_CHROME_CLIENT

HashMgr::HashMgr(hunspell::BDictReader* reader)

- bdict_reader = reader;

+ : bdict_reader(reader),

#else

-HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)

+HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)

+ :

#endif

- tablesize = 0;

- tableptr = NULL;

- flag_mode = FLAG_CHAR;

- complexprefixes = 0;

- utf8 = 0;

+ tablesize(0),

+ tableptr(NULL),

+ flag_mode(FLAG_CHAR),

+ complexprefixes(0),

+ utf8(0),

+ forbiddenword(FORBIDDENWORD) // forbidden word signing flag

+ ,

+ numaliasf(0),

+ aliasf(NULL),

+ aliasflen(0),

+ numaliasm(0),

+ aliasm(NULL) {

langnum = 0;

- lang = NULL;

- enc = NULL;

csconv = 0;

- ignorechars = NULL;

- ignorechars_utf16 = NULL;

- ignorechars_utf16_len = 0;

- numaliasf = 0;

- aliasf = NULL;

- numaliasm = 0;

- aliasm = NULL;

- forbiddenword = FORBIDDENWORD; // forbidden word signing flag

#ifdef HUNSPELL_CHROME_CLIENT

// No tables to load, just the AF lines.

load_config(NULL, NULL);

@@ -47,27 +115,29 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)

#endif

if (ec) {

/* error condition - what should we do here */

- HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);

- if (tableptr) {

- free(tableptr);

- tableptr = NULL;

+ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);

+ free(tableptr);

+ //keep tablesize to 1 to fix possible division with zero

+ tablesize = 1;

+ tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));

+ if (!tableptr) {

+ tablesize = 0;

}

- tablesize = 0;

}

-HashMgr::~HashMgr()

+HashMgr::~HashMgr() {

if (tableptr) {

// now pass through hash table freeing up everything

// go through column by column of the table

- for (int i=0; i < tablesize; i++) {

- struct hentry * pt = tableptr[i];

- struct hentry * nt = NULL;

- while(pt) {

+ for (int i = 0; i < tablesize; i++) {

+ struct hentry* pt = tableptr[i];

+ struct hentry* nt = NULL;

+ while (pt) {

nt = pt->next;

- if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);

+ if (pt->astr &&

+ (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))

+ free(pt->astr);

free(pt);

pt = nt;

}

@@ -77,7 +147,8 @@ HashMgr::~HashMgr()

tablesize = 0;

if (aliasf) {

- for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);

+ for (int j = 0; j < (numaliasf); j++)

+ free(aliasf[j]);

free(aliasf);

aliasf = NULL;

if (aliasflen) {

@@ -86,23 +157,19 @@ HashMgr::~HashMgr()

}

if (aliasm) {

- for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);

+ for (int j = 0; j < (numaliasm); j++)

+ free(aliasm[j]);

free(aliasm);

aliasm = NULL;

- }

+ }

#ifndef OPENOFFICEORG

#ifndef MOZILLA_CLIENT

- if (utf8) free_utf_tbl();

+ if (utf8)

+ free_utf_tbl();

#endif

- if (enc) free(enc);

- if (lang) free(lang);

- if (ignorechars) free(ignorechars);

- if (ignorechars_utf16) free(ignorechars_utf16);

#ifdef HUNSPELL_CHROME_CLIENT

EmptyHentryCache();

for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();

@@ -111,7 +178,7 @@ HashMgr::~HashMgr()

}

#endif

#ifdef MOZILLA_CLIENT

- delete [] csconv;

+ delete[] csconv;

#endif

}

@@ -134,8 +201,7 @@ void HashMgr::EmptyHentryCache() {

// lookup a root word in the hashtable

-struct hentry * HashMgr::lookup(const char *word) const

+struct hentry* HashMgr::lookup(const char* word) const {

#ifdef HUNSPELL_CHROME_CLIENT

int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];

int affix_count = bdict_reader->FindWord(word, affix_ids);

@@ -155,265 +221,330 @@ struct hentry * HashMgr::lookup(const char *word) const

return AffixIDsToHentry(word_buf, affix_ids, affix_count);

#else

- struct hentry * dp;

- if (tableptr) {

- dp = tableptr[hash(word)];

- if (!dp) return NULL;

- for ( ; dp != NULL; dp = dp->next) {

- if (strcmp(word, dp->word) == 0) return dp;

- }

+ struct hentry* dp;

+ if (tableptr) {

+ dp = tableptr[hash(word)];

+ if (!dp)

+ return NULL;

+ for (; dp != NULL; dp = dp->next) {

+ if (strcmp(word, dp->word) == 0)

+ return dp;

}

- return NULL;

+ }

+ return NULL;

#endif

}

// add a word to the hash table (private)

-int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,

- int al, const char * desc, bool onlyupcase)

+int HashMgr::add_word(const std::string& in_word,

+ int wcl,

+ unsigned short* aff,

+ int al,

+ const std::string* in_desc,

+ bool onlyupcase) {

+// TODO: The following 40 lines or so are actually new. Should they be included?

#ifndef HUNSPELL_CHROME_CLIENT

- bool upcasehomonym = false;

- int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;

- // variable-length hash record with word and optional fields

- struct hentry* hp =

- (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);

- if (!hp) return 1;

- char * hpw = hp->word;

- strcpy(hpw, word);

- if (ignorechars != NULL) {

+ const std::string* word = &in_word;

+ const std::string* desc = in_desc;

+ std::string *word_copy = NULL;

+ std::string *desc_copy = NULL;

+ if (!ignorechars.empty() || complexprefixes) {

+ word_copy = new std::string(in_word);

+ if (!ignorechars.empty()) {

if (utf8) {

- remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);

+ wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);

} else {

- remove_ignored_chars(hpw, ignorechars);

+ remove_ignored_chars(*word_copy, ignorechars);

}

if (complexprefixes) {

- if (utf8) reverseword_utf(hpw); else reverseword(hpw);

+ if (utf8)

+ wcl = reverseword_utf(*word_copy);

+ else

+ reverseword(*word_copy);

+ if (in_desc && !aliasm) {

+ desc_copy = new std::string(*in_desc);

+ if (complexprefixes) {

+ if (utf8)

+ reverseword_utf(*desc_copy);

+ else

+ reverseword(*desc_copy);

+ }

+ desc = desc_copy;

+ }

}

- int i = hash(hpw);

- hp->blen = (unsigned char) wbl;

- hp->clen = (unsigned char) wcl;

- hp->alen = (short) al;

- hp->astr = aff;

- hp->next = NULL;

- hp->next_homonym = NULL;

- // store the description string or its pointer

- if (desc) {

- hp->var = H_OPT;

- if (aliasm) {

- hp->var += H_OPT_ALIASM;

- store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));

+ word = word_copy;

+ }

+ bool upcasehomonym = false;

+ int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;

+ // variable-length hash record with word and optional fields

+ struct hentry* hp =

+ (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);

+ if (!hp) {

+ delete desc_copy;

+ delete word_copy;

+ return 1;

+ }

+ char* hpw = hp->word;

+ strcpy(hpw, word->c_str());

+ int i = hash(hpw);

+ hp->blen = (unsigned char)word->size();

+ hp->clen = (unsigned char)wcl;

+ hp->alen = (short)al;

+ hp->astr = aff;

+ hp->next = NULL;

+ hp->next_homonym = NULL;

+ // store the description string or its pointer

+ if (desc) {

+ hp->var = H_OPT;

+ if (aliasm) {

+ hp->var += H_OPT_ALIASM;

+ store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));

+ } else {

+ strcpy(hpw + word->size() + 1, desc->c_str());

+ }

+ if (strstr(HENTRY_DATA(hp), MORPH_PHON))

+ hp->var += H_OPT_PHON;

+ } else

+ hp->var = 0;

+ struct hentry* dp = tableptr[i];

+ if (!dp) {

+ tableptr[i] = hp;

+ delete desc_copy;

+ delete word_copy;

+ return 0;

+ }

+ while (dp->next != NULL) {

+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {

+ // remove hidden onlyupcase homonym

+ if (!onlyupcase) {

+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

+ free(dp->astr);

+ dp->astr = hp->astr;

+ dp->alen = hp->alen;

+ free(hp);

+ delete desc_copy;

+ delete word_copy;

+ return 0;

} else {

- strcpy(hpw + wbl + 1, desc);

- if (complexprefixes) {

- if (utf8) reverseword_utf(HENTRY_DATA(hp));

- else reverseword(HENTRY_DATA(hp));

- }

+ dp->next_homonym = hp;

}

- if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;

- } else hp->var = 0;

- struct hentry * dp = tableptr[i];

- if (!dp) {

- tableptr[i] = hp;

- return 0;

- }

- while (dp->next != NULL) {

- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {

- // remove hidden onlyupcase homonym

- if (!onlyupcase) {

- if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

- free(dp->astr);

- dp->astr = hp->astr;

- dp->alen = hp->alen;

- free(hp);

- return 0;

- } else {

- dp->next_homonym = hp;

- }

- } else {

- upcasehomonym = true;

- }

- dp=dp->next;

- }

- if (strcmp(hp->word, dp->word) == 0) {

- // remove hidden onlyupcase homonym

- if (!onlyupcase) {

- if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

- free(dp->astr);

- dp->astr = hp->astr;

- dp->alen = hp->alen;

- free(hp);

- return 0;

- } else {

- dp->next_homonym = hp;

- }

- } else {

- upcasehomonym = true;

- }

- if (!upcasehomonym) {

- dp->next = hp;

- } else {

- // remove hidden onlyupcase homonym

- if (hp->astr) free(hp->astr);

- free(hp);

- }

-#else

- std::map<base::StringPiece, int>::iterator iter =

- custom_word_to_affix_id_map_.find(word);

- if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added

- std::string* new_string_word = new std::string(word);

- pointer_to_strings_.push_back(new_string_word);

- base::StringPiece sp(*(new_string_word));

- custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words

- return 1;

+ } else {

+ upcasehomonym = true;

+ }

}

-#endif

- return 0;

-int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,

- unsigned short * flags, int al, char * dp, int captype)

- // add inner capitalized forms to handle the following allcap forms:

- // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG

- // Allcaps with suffixes: CIA's -> CIA'S

- if (((captype == HUHCAP) || (captype == HUHINITCAP) ||

- ((captype == ALLCAP) && (flags != NULL))) &&

- !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {

- unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));

- if (!flags2) return 1;

- if (al) memcpy(flags2, flags, al * sizeof(unsigned short));

- flags2[al] = ONLYUPCASEFLAG;

- if (utf8) {

- char st[BUFSIZE];

- w_char w[BUFSIZE];

- int wlen = u8_u16(w, BUFSIZE, word);

- mkallsmall_utf(w, wlen, langnum);

- mkallcap_utf(w, 1, langnum);

- u16_u8(st, BUFSIZE, w, wlen);

- return add_word(st,wbl,wcl,flags2,al+1,dp, true);

- } else {

- mkallsmall(word, csconv);

- mkinitcap(word, csconv);

- return add_word(word,wbl,wcl,flags2,al+1,dp, true);

- }

+ dp = dp->next;

+ }

+ if (strcmp(hp->word, dp->word) == 0) {

+ // remove hidden onlyupcase homonym

+ if (!onlyupcase) {

+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

+ free(dp->astr);

+ dp->astr = hp->astr;

+ dp->alen = hp->alen;

+ free(hp);

+ delete desc_copy;

+ delete word_copy;

+ return 0;

+ } else {

+ dp->next_homonym = hp;

+ }

+ } else {

+ upcasehomonym = true;

}

- return 0;

+ }

+ if (!upcasehomonym) {

+ dp->next = hp;

+ } else {

+ // remove hidden onlyupcase homonym

+ if (hp->astr)

+ free(hp->astr);

+ free(hp);

+ }

+ delete desc_copy;

+ delete word_copy;

+#else

+ std::map<base::StringPiece, int>::iterator iter =

+ custom_word_to_affix_id_map_.find(in_word);

+ if (iter == custom_word_to_affix_id_map_.end()) { // word needs to be added

+ std::string* new_string_word = new std::string(in_word);

+ pointer_to_strings_.push_back(new_string_word);

+ base::StringPiece sp(*(new_string_word));

+ custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words

+ return 1;

+ }

+#endif

+ return 0;

}

-// detect captype and modify word length for UTF-8 encoding

-int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {

- int len;

+int HashMgr::add_hidden_capitalized_word(const std::string& word,

+ int wcl,

+ unsigned short* flags,

+ int flagslen,

+ const std::string* dp,

+ int captype) {

+ if (flags == NULL)

+ flagslen = 0;

+ // add inner capitalized forms to handle the following allcap forms:

+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG

+ // Allcaps with suffixes: CIA's -> CIA'S

+ if (((captype == HUHCAP) || (captype == HUHINITCAP) ||

+ ((captype == ALLCAP) && (flagslen != 0))) &&

+ !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {

+ unsigned short* flags2 =

+ (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));

+ if (!flags2)

+ return 1;

+ if (flagslen)

+ memcpy(flags2, flags, flagslen * sizeof(unsigned short));

+ flags2[flagslen] = ONLYUPCASEFLAG;

if (utf8) {

- w_char dest_utf[BUFSIZE];

- len = u8_u16(dest_utf, BUFSIZE, word);

- *captype = get_captype_utf8(dest_utf, len, langnum);

+ std::string st;

+ std::vector<w_char> w;

+ u8_u16(w, word);

+ mkallsmall_utf(w, langnum);

+ mkinitcap_utf(w, langnum);

+ u16_u8(st, w);

+ return add_word(st, wcl, flags2, flagslen + 1, dp, true);

} else {

- len = wbl;

- *captype = get_captype((char *) word, len, csconv);

+ std::string new_word(word);

+ mkallsmall(new_word, csconv);

+ mkinitcap(new_word, csconv);

+ int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true);

+ return ret;

}

- return len;

+ }

+ return 0;

+// detect captype and modify word length for UTF-8 encoding

+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {

+ int len;

+ if (utf8) {

+ std::vector<w_char> dest_utf;

+ len = u8_u16(dest_utf, word);

+ *captype = get_captype_utf8(dest_utf, langnum);

+ } else {

+ len = word.size();

+ *captype = get_captype(word, csconv);

+ }

+ return len;

}

// remove word (personal dictionary function for standalone applications)

-int HashMgr::remove(const char * word)

+int HashMgr::remove(const std::string& word) {

#ifdef HUNSPELL_CHROME_CLIENT

- std::map<base::StringPiece, int>::iterator iter =

- custom_word_to_affix_id_map_.find(word);

- if (iter != custom_word_to_affix_id_map_.end())

- custom_word_to_affix_id_map_.erase(iter);

+ std::map<base::StringPiece, int>::iterator iter =

+ custom_word_to_affix_id_map_.find(word);

+ if (iter != custom_word_to_affix_id_map_.end())

+ custom_word_to_affix_id_map_.erase(iter);

#else

- struct hentry * dp = lookup(word);

- while (dp) {

- if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {

- unsigned short * flags =

- (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));

- if (!flags) return 1;

- for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];

- flags[dp->alen] = forbiddenword;

- dp->astr = flags;

- dp->alen++;

- flag_qsort(flags, 0, dp->alen);

- }

- dp = dp->next_homonym;

+ struct hentry* dp = lookup(word.c_str());

+ while (dp) {

+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {

+ unsigned short* flags =

+ (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));

+ if (!flags)

+ return 1;

+ for (int i = 0; i < dp->alen; i++)

+ flags[i] = dp->astr[i];

+ flags[dp->alen] = forbiddenword;

+ free(dp->astr);

+ dp->astr = flags;

+ dp->alen++;

+ std::sort(flags, flags + dp->alen);

}

+ dp = dp->next_homonym;

+ }

#endif

- return 0;

+ return 0;

}

/* remove forbidden flag to add a personal word to the hash */

-int HashMgr::remove_forbidden_flag(const char * word) {

- struct hentry * dp = lookup(word);

- if (!dp) return 1;

- while (dp) {

- if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {

- if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.

- else {

- unsigned short * flags2 =

- (unsigned short *) malloc(sizeof(short) * (dp->alen - 1));

- if (!flags2) return 1;

- int i, j = 0;

- for (i = 0; i < dp->alen; i++) {

- if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];

- }

- dp->alen--;

- dp->astr = flags2; // XXX allowed forbidden words

- }

- dp = dp->next_homonym;

- }

- return 0;

+int HashMgr::remove_forbidden_flag(const std::string& word) {

+ struct hentry* dp = lookup(word.c_str());

+ if (!dp)

+ return 1;

+ while (dp) {

+ if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {

+ if (dp->alen == 1)

+ dp->alen = 0; // XXX forbidden words of personal dic.

+ else {

+ unsigned short* flags2 =

+ (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1));

+ if (!flags2)

+ return 1;

+ int i, j = 0;

+ for (i = 0; i < dp->alen; i++) {

+ if (dp->astr[i] != forbiddenword)

+ flags2[j++] = dp->astr[i];

+ }

+ dp->alen--;

+ free(dp->astr);

+ dp->astr = flags2; // XXX allowed forbidden words

+ }

+ dp = dp->next_homonym;

+ }

+ return 0;

}

// add a custom dic. word to the hash table (public)

-int HashMgr::add(const char * word)

- unsigned short * flags = NULL;

+int HashMgr::add(const std::string& word) {

+ if (remove_forbidden_flag(word)) {

+ int captype;

int al = 0;

- if (remove_forbidden_flag(word)) {

- int captype;

- int wbl = strlen(word);

- int wcl = get_clen_and_captype(word, wbl, &captype);

- add_word(word, wbl, wcl, flags, al, NULL, false);

- return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);

- }

- return 0;

+ unsigned short* flags = NULL;

+ int wcl = get_clen_and_captype(word, &captype);

+ add_word(word, wcl, flags, al, NULL, false);

+ return add_hidden_capitalized_word(word, wcl, flags, al, NULL,

+ captype);

+ }

+ return 0;

}

-int HashMgr::add_with_affix(const char * word, const char * example)

- // detect captype and modify word length for UTF-8 encoding

- struct hentry * dp = lookup(example);

- remove_forbidden_flag(word);

- if (dp && dp->astr) {

- int captype;

- int wbl = strlen(word);

- int wcl = get_clen_and_captype(word, wbl, &captype);

- if (aliasf) {

- add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);

- } else {

- unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));

- if (flags) {

- memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));

- add_word(word, wbl, wcl, flags, dp->alen, NULL, false);

- } else return 1;

- }

- return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);

+int HashMgr::add_with_affix(const std::string& word, const std::string& example) {

+ // detect captype and modify word length for UTF-8 encoding

+ struct hentry* dp = lookup(example.c_str());

+ remove_forbidden_flag(word);

+ if (dp && dp->astr) {

+ int captype;

+ int wcl = get_clen_and_captype(word, &captype);

+ if (aliasf) {

+ add_word(word, wcl, dp->astr, dp->alen, NULL, false);

+ } else {

+ unsigned short* flags =

+ (unsigned short*)malloc(dp->alen * sizeof(unsigned short));

+ if (flags) {

+ memcpy((void*)flags, (void*)dp->astr,

+ dp->alen * sizeof(unsigned short));

+ add_word(word, wcl, flags, dp->alen, NULL, false);

+ } else

+ return 1;

}

- return 1;

+ return add_hidden_capitalized_word(word, wcl, dp->astr,

+ dp->alen, NULL, captype);

+ }

+ return 1;

}

// walk the hash table entry by entry - null at end

// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);

-struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

+struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {

#ifdef HUNSPELL_CHROME_CLIENT

// Return NULL if dictionary is not valid.

if (!bdict_reader->IsValid())

@@ -452,9 +583,11 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

return InitHashEntry(&hash_entry.entry, sizeof(hash_entry),

&word[0], word_len, affix_ids[0]);

#else

- if (hp && hp->next != NULL) return hp->next;

+ if (hp && hp->next != NULL)

+ return hp->next;

for (col++; col < tablesize; col++) {

- if (tableptr[col]) return tableptr[col];

+ if (tableptr[col])

+ return tableptr[col];

}

// null at end and reset to start

col = -1;

@@ -463,22 +596,16 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

}

// load a munched word list and build a hash table on the fly

-int HashMgr::load_tables(const char * tpath, const char * key)

+int HashMgr::load_tables(const char* tpath, const char* key) {

#ifndef HUNSPELL_CHROME_CLIENT

- int al;

- char * ap;

- char * dp;

- char * dp2;

- unsigned short * flags;

- char * ts;

// open dictionary file

- FileMgr * dict = new FileMgr(tpath, key);

- if (dict == NULL) return 1;

+ FileMgr* dict = new FileMgr(tpath, key);

+ if (dict == NULL)

+ return 1;

// first read the first line of file to get hash table size */

- if ((ts = dict->getline()) == NULL) {

+ std::string ts;

+ if (!dict->getline(ts)) {

HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);

delete dict;

return 2;

@@ -486,102 +613,115 @@ int HashMgr::load_tables(const char * tpath, const char * key)

mychomp(ts);

/* remove byte order mark */

- if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {

- memmove(ts, ts+3, strlen(ts+3)+1);

- // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions

+ if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {

+ ts.erase(0, 3);

}

- tablesize = atoi(ts);

- if (tablesize == 0) {

- HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");

+ tablesize = atoi(ts.c_str());

+ int nExtra = 5 + USERWORD;

+ if (tablesize <= 0 ||

+ (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /

+ int(sizeof(struct hentry*)))) {

+ HUNSPELL_WARNING(

+ stderr, "error: line 1: missing or bad word count in the dic file\n");

delete dict;

return 4;

}

- tablesize = tablesize + 5 + USERWORD;

- if ((tablesize %2) == 0) tablesize++;

+ tablesize += nExtra;

+ if ((tablesize % 2) == 0)

+ tablesize++;

// allocate the hash table

- tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));

- if (! tableptr) {

+ tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));

+ if (!tableptr) {

delete dict;

return 3;

}

- for (int i=0; i<tablesize; i++) tableptr[i] = NULL;

// loop through all words on much list and add to hash

// table and create word and affix strings

- while ((ts = dict->getline()) != NULL) {

+ while (dict->getline(ts)) {

mychomp(ts);

// split each line into word and morphological description

- dp = ts;

- while ((dp = strchr(dp, ':')) != NULL) {

- if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {

- for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);

- if (dp < ts) { // missing word

- dp = NULL;

- } else {

- *(dp + 1) = '\0';

- dp = dp + 2;

- }

- break;

- }

- dp++;

+ size_t dp_pos = 0;

+ while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) {

+ if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) {

+ for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)

+ ;

+ if (dp_pos == 0) { // missing word

+ dp_pos = std::string::npos;

+ } else {

+ ++dp_pos;

+ }

+ break;

+ }

+ ++dp_pos;

}

// tabulator is the old morphological field separator

- dp2 = strchr(ts, '\t');

- if (dp2 && (!dp || dp2 < dp)) {

- *dp2 = '\0';

- dp = dp2 + 1;

+ size_t dp2_pos = ts.find('\t');

+ if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {

+ dp_pos = dp2_pos + 1;

+ }

+ std::string dp;

+ if (dp_pos != std::string::npos) {

+ dp.assign(ts.substr(dp_pos));

+ ts.resize(dp_pos - 1);

}

// split each line into word and affix char strings

// "\/" signs slash in words (not affix separator)

// "/" at beginning of the line is word character (not affix separator)

- ap = strchr(ts,'/');

- while (ap) {

- if (ap == ts) {

- ap++;

- continue;

- } else if (*(ap - 1) != '\\') break;

- // replace "\/" with "/"

- for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);

- ap = strchr(ap,'/');

+ size_t ap_pos = ts.find('/');

+ while (ap_pos != std::string::npos) {

+ if (ap_pos == 0) {

+ ++ap_pos;

+ continue;

+ } else if (ts[ap_pos - 1] != '\\')

+ break;

+ // replace "\/" with "/"

+ ts.erase(ap_pos - 1, 1);

+ ap_pos = ts.find('/', ap_pos);

}

- if (ap) {

- *ap = '\0';

+ unsigned short* flags;

+ int al;

+ if (ap_pos != std::string::npos && ap_pos != ts.size()) {

+ std::string ap(ts.substr(ap_pos + 1));

+ ts.resize(ap_pos);

if (aliasf) {

- int index = atoi(ap + 1);

+ int index = atoi(ap.c_str());

al = get_aliasf(index, &flags, dict);

if (!al) {

- HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());

- *ap = '\0';

+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",

+ dict->getlinenum());

}

} else {

- al = decode_flags(&flags, ap + 1, dict);

+ al = decode_flags(&flags, ap.c_str(), dict);

if (al == -1) {

- HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");

- delete dict;

- return 6;

+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");

+ delete dict;

+ return 6;

}

- flag_qsort(flags, 0, al);

+ std::sort(flags, flags + al);

}

} else {

al = 0;

- ap = NULL;

flags = NULL;

}

int captype;

- int wbl = strlen(ts);

- int wcl = get_clen_and_captype(ts, wbl, &captype);

+ int wcl = get_clen_and_captype(ts, &captype);

+ const std::string *dp_str = dp.empty() ? NULL : &dp;

// add the word and its index plus its capitalized form optionally

- if (add_word(ts,wbl,wcl,flags,al,dp, false) ||

- add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {

- delete dict;

- return 5;

+ if (add_word(ts, wcl, flags, al, dp_str, false) ||

+ add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) {

+ delete dict;

+ return 5;

}

@@ -592,339 +732,460 @@ int HashMgr::load_tables(const char * tpath, const char * key)

// the hash function is a simple load and rotate

// algorithm borrowed

-int HashMgr::hash(const char * word) const

+int HashMgr::hash(const char* word) const {

#ifdef HUNSPELL_CHROME_CLIENT

return 0;

#else

- long hv = 0;

- for (int i=0; i < 4 && *word != 0; i++)

- hv = (hv << 8) | (*word++);

- while (*word != 0) {

- ROTATE(hv,ROTATE_LEN);

- hv ^= (*word++);

- }

- return (unsigned long) hv % tablesize;

+ unsigned long hv = 0;

+ for (int i = 0; i < 4 && *word != 0; i++)

+ hv = (hv << 8) | (*word++);

+ while (*word != 0) {

+ ROTATE(hv, ROTATE_LEN);

+ hv ^= (*word++);

+ }

+ return (unsigned long)hv % tablesize;

#endif

}

-int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {

- int len;

- if (*flags == '\0') {

- *result = NULL;

- return 0;

- }

- switch (flag_mode) {

- case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)

- len = strlen(flags);

- if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());

- len /= 2;

- *result = (unsigned short *) malloc(len * sizeof(short));

- if (!*result) return -1;

- for (int i = 0; i < len; i++) {

- (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];

- }

- break;

+int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {

+ int len;

+ if (flags.empty()) {

+ *result = NULL;

+ return 0;

+ }

+ switch (flag_mode) {

+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)

+ len = flags.size();

+ if (len % 2 == 1)

+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",

+ af->getlinenum());

+ len /= 2;

+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));

+ if (!*result)

+ return -1;

+ for (int i = 0; i < len; i++) {

+ (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +

+ (unsigned char)flags[i * 2 + 1];

}

- case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)

- int i;

- len = 1;

- char * src = flags;

- unsigned short * dest;

- char * p;

- for (p = flags; *p; p++) {

- if (*p == ',') len++;

- }

- *result = (unsigned short *) malloc(len * sizeof(short));

- if (!*result) return -1;

- dest = *result;

- for (p = flags; *p; p++) {

- if (*p == ',') {

- i = atoi(src);

- if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",

- af->getlinenum(), i, DEFAULTFLAGS - 1);

- *dest = (unsigned short) i;

- if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());

- src = p + 1;

- dest++;

- }

- i = atoi(src);

- if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",

- af->getlinenum(), i, DEFAULTFLAGS - 1);

- *dest = (unsigned short) i;

- if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());

- break;

- }

- case FLAG_UNI: { // UTF-8 characters

- w_char w[BUFSIZE/2];

- len = u8_u16(w, BUFSIZE/2, flags);

- *result = (unsigned short *) malloc(len * sizeof(short));

- if (!*result) return -1;

- memcpy(*result, w, len * sizeof(short));

- break;

+ break;

+ }

+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521

+ // 23 233)

+ len = 1;

+ unsigned short* dest;

+ for (size_t i = 0; i < flags.size(); ++i) {

+ if (flags[i] == ',')

+ len++;

}

- default: { // Ispell's one-character flags (erfg -> e r f g)

- unsigned short * dest;

- len = strlen(flags);

- *result = (unsigned short *) malloc(len * sizeof(short));

- if (!*result) return -1;

- dest = *result;

- for (unsigned char * p = (unsigned char *) flags; *p; p++) {

- *dest = (unsigned short) *p;

+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));

+ if (!*result)

+ return -1;

+ dest = *result;

+ const char* src = flags.c_str();

+ for (const char* p = src; *p; p++) {

+ if (*p == ',') {

+ int i = atoi(src);

+ if (i >= DEFAULTFLAGS)

+ HUNSPELL_WARNING(

+ stderr, "error: line %d: flag id %d is too large (max: %d)\n",

+ af->getlinenum(), i, DEFAULTFLAGS - 1);

+ *dest = (unsigned short)i;

+ if (*dest == 0)

+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",

+ af->getlinenum());

+ src = p + 1;

dest++;

}

+ int i = atoi(src);

+ if (i >= DEFAULTFLAGS)

+ HUNSPELL_WARNING(stderr,

+ "error: line %d: flag id %d is too large (max: %d)\n",

+ af->getlinenum(), i, DEFAULTFLAGS - 1);

+ *dest = (unsigned short)i;

+ if (*dest == 0)

+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",

+ af->getlinenum());

+ break;

+ }

+ case FLAG_UNI: { // UTF-8 characters

+ std::vector<w_char> w;

+ u8_u16(w, flags);

+ len = w.size();

+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));

+ if (!*result)

+ return -1;

+ memcpy(*result, &w[0], len * sizeof(short));

+ break;

}

- return len;

+ default: { // Ispell's one-character flags (erfg -> e r f g)

+ unsigned short* dest;

+ len = flags.size();

+ *result = (unsigned short*)malloc(len * sizeof(unsigned short));

+ if (!*result)

+ return -1;

+ dest = *result;

+ for (size_t i = 0; i < flags.size(); ++i) {

+ *dest = (unsigned char)flags[i];

+ dest++;

+ }

+ return len;

}

-unsigned short HashMgr::decode_flag(const char * f) {

- unsigned short s = 0;

- int i;

- switch (flag_mode) {

- case FLAG_LONG:

- s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];

- break;

- case FLAG_NUM:

- i = atoi(f);

- if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);

- s = (unsigned short) i;

- break;

- case FLAG_UNI:

- u8_u16((w_char *) &s, 1, f);

- break;

- default:

- s = (unsigned short) *((unsigned char *)f);

+bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const {

+ if (flags.empty()) {

+ return false;

+ }

+ switch (flag_mode) {

+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)

+ size_t len = flags.size();

+ if (len % 2 == 1)

+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",

+ af->getlinenum());

+ len /= 2;

+ result.reserve(result.size() + len);

+ for (size_t i = 0; i < len; ++i) {

+ result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) +

+ (unsigned char)flags[i * 2 + 1]);

+ }

+ break;

+ }

+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521

+ // 23 233)

+ const char* src = flags.c_str();

+ for (const char* p = src; *p; p++) {

+ if (*p == ',') {

+ int i = atoi(src);

+ if (i >= DEFAULTFLAGS)

+ HUNSPELL_WARNING(

+ stderr, "error: line %d: flag id %d is too large (max: %d)\n",

+ af->getlinenum(), i, DEFAULTFLAGS - 1);

+ result.push_back((unsigned short)i);

+ if (result.back() == 0)

+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",

+ af->getlinenum());

+ src = p + 1;

+ }

+ int i = atoi(src);

+ if (i >= DEFAULTFLAGS)

+ HUNSPELL_WARNING(stderr,

+ "error: line %d: flag id %d is too large (max: %d)\n",

+ af->getlinenum(), i, DEFAULTFLAGS - 1);

+ result.push_back((unsigned short)i);

+ if (result.back() == 0)

+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",

+ af->getlinenum());

+ break;

+ }

+ case FLAG_UNI: { // UTF-8 characters

+ std::vector<w_char> w;

+ u8_u16(w, flags);

+ size_t len = w.size();

+ size_t origsize = result.size();

+ result.resize(origsize + len);

+ memcpy(&result[origsize], &w[0], len * sizeof(short));

+ break;

}

- if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

- return s;

+ default: { // Ispell's one-character flags (erfg -> e r f g)

+ result.reserve(flags.size());

+ for (size_t i = 0; i < flags.size(); ++i) {

+ result.push_back((unsigned char)flags[i]);

+ }

+ return true;

}

-char * HashMgr::encode_flag(unsigned short f) {

- unsigned char ch[10];

- if (f==0) return mystrdup("(NULL)");

- if (flag_mode == FLAG_LONG) {

- ch[0] = (unsigned char) (f >> 8);

- ch[1] = (unsigned char) (f - ((f >> 8) << 8));

- ch[2] = '\0';

- } else if (flag_mode == FLAG_NUM) {

- sprintf((char *) ch, "%d", f);

- } else if (flag_mode == FLAG_UNI) {

- u16_u8((char *) &ch, 10, (w_char *) &f, 1);

- } else {

- ch[0] = (unsigned char) (f);

- ch[1] = '\0';

+unsigned short HashMgr::decode_flag(const char* f) const {

+ unsigned short s = 0;

+ int i;

+ switch (flag_mode) {

+ case FLAG_LONG:

+ s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1];

+ break;

+ case FLAG_NUM:

+ i = atoi(f);

+ if (i >= DEFAULTFLAGS)

+ HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",

+ i, DEFAULTFLAGS - 1);

+ s = (unsigned short)i;

+ break;

+ case FLAG_UNI: {

+ std::vector<w_char> w;

+ u8_u16(w, f);

+ if (!w.empty())

+ memcpy(&s, &w[0], 1 * sizeof(short));

+ break;

}

- return mystrdup((char *) ch);

+ default:

+ s = *(unsigned char*)f;

+ }

+ if (s == 0)

+ HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

+ return s;

+char* HashMgr::encode_flag(unsigned short f) const {

+ if (f == 0)

+ return mystrdup("(NULL)");

+ std::string ch;

+ if (flag_mode == FLAG_LONG) {

+ ch.push_back((unsigned char)(f >> 8));

+ ch.push_back((unsigned char)(f - ((f >> 8) << 8)));

+ } else if (flag_mode == FLAG_NUM) {

+ std::ostringstream stream;

+ stream << f;

+ ch = stream.str();

+ } else if (flag_mode == FLAG_UNI) {

+ const w_char* w_c = (const w_char*)&f;

+ std::vector<w_char> w(w_c, w_c + 1);

+ u16_u8(ch, w);

+ } else {

+ ch.push_back((unsigned char)(f));

+ }

+ return mystrdup(ch.c_str());

}

// read in aff file and set flag mode

-int HashMgr::load_config(const char * affpath, const char * key)

- char * line; // io buffers

+int HashMgr::load_config(const char* affpath, const char* key) {

int firstline = 1;

// open the affix file

#ifdef HUNSPELL_CHROME_CLIENT

hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();

FileMgr * afflst = new FileMgr(&iterator);

#else

- FileMgr * afflst = new FileMgr(affpath, key);

+ FileMgr* afflst = new FileMgr(affpath, key);

#endif

if (!afflst) {

- HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);

+ HUNSPELL_WARNING(

+ stderr, "Error - could not open affix description file %s\n", affpath);

return 1;

}

- // read in each line ignoring any that do not

- // start with a known line type indicator

- while ((line = afflst->getline()) != NULL) {

- mychomp(line);

- /* remove byte order mark */

- if (firstline) {

- firstline = 0;

- if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);

- }

- /* parse in the try string */

- if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {

- if (flag_mode != FLAG_CHAR) {

- HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());

- }

- if (strstr(line, "long")) flag_mode = FLAG_LONG;

- if (strstr(line, "num")) flag_mode = FLAG_NUM;

- if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;

- if (flag_mode == FLAG_CHAR) {

- HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());

- }

- if (strncmp(line,"FORBIDDENWORD",13) == 0) {

- char * st = NULL;

- if (parse_string(line, &st, afflst->getlinenum())) {

- delete afflst;

- return 1;

- }

- forbiddenword = decode_flag(st);

- free(st);

- }

- if (strncmp(line, "SET", 3) == 0) {

- if (parse_string(line, &enc, afflst->getlinenum())) {

- delete afflst;

- return 1;

- }

- if (strcmp(enc, "UTF-8") == 0) {

- utf8 = 1;

+ // read in each line ignoring any that do not

+ // start with a known line type indicator

+ std::string line;

+ while (afflst->getline(line)) {

+ mychomp(line);

+ /* remove byte order mark */

+ if (firstline) {

+ firstline = 0;

+ if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {

+ line.erase(0, 3);

+ }

+ /* parse in the try string */

+ if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) {

+ if (flag_mode != FLAG_CHAR) {

+ HUNSPELL_WARNING(stderr,

+ "error: line %d: multiple definitions of the FLAG "

+ "affix file parameter\n",

+ afflst->getlinenum());

+ }

+ if (line.find("long") != std::string::npos)

+ flag_mode = FLAG_LONG;

+ if (line.find("num") != std::string::npos)

+ flag_mode = FLAG_NUM;

+ if (line.find("UTF-8") != std::string::npos)

+ flag_mode = FLAG_UNI;

+ if (flag_mode == FLAG_CHAR) {

+ HUNSPELL_WARNING(

+ stderr,

+ "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",

+ afflst->getlinenum());

+ }

+ if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {

+ std::string st;

+ if (!parse_string(line, st, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ forbiddenword = decode_flag(st.c_str());

+ }

+ if (line.compare(0, 3, "SET", 3) == 0) {

+ if (!parse_string(line, enc, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ if (enc == "UTF-8") {

+ utf8 = 1;

#ifndef OPENOFFICEORG

#ifndef MOZILLA_CLIENT

- initialize_utf_tbl();

+ initialize_utf_tbl();

#endif

- } else csconv = get_current_cs(enc);

- }

- if (strncmp(line, "LANG", 4) == 0) {

- if (parse_string(line, &lang, afflst->getlinenum())) {

- delete afflst;

- return 1;

- }

- langnum = get_lang_num(lang);

- }

- /* parse in the ignored characters (for example, Arabic optional diacritics characters */

- if (strncmp(line,"IGNORE",6) == 0) {

- if (parse_array(line, &ignorechars, &ignorechars_utf16,

- &ignorechars_utf16_len, utf8, afflst->getlinenum())) {

- delete afflst;

- return 1;

- }

+ } else

+ csconv = get_current_cs(enc);

+ }

- if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {

- if (parse_aliasf(line, afflst)) {

- delete afflst;

- return 1;

- }

+ if (line.compare(0, 4, "LANG", 4) == 0) {

+ if (!parse_string(line, lang, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ langnum = get_lang_num(lang);

+ }

- if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {

- if (parse_aliasm(line, afflst)) {

- delete afflst;

- return 1;

- }

+ /* parse in the ignored characters (for example, Arabic optional diacritics

+ * characters */

+ if (line.compare(0, 6, "IGNORE", 6) == 0) {

+ if (!parse_array(line, ignorechars, ignorechars_utf16,

+ utf8, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) {

+ if (!parse_aliasf(line, afflst)) {

+ delete afflst;

+ return 1;

+ }

- if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;

- if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;

+ if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) {

+ if (!parse_aliasm(line, afflst)) {

+ delete afflst;

+ return 1;

+ }

}

- if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);

- delete afflst;

- return 0;

+ if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)

+ complexprefixes = 1;

+ if (((line.compare(0, 3, "SFX", 3) == 0) ||

+ (line.compare(0, 3, "PFX", 3) == 0)) && line.size() > 3 && isspace(line[3]))

+ break;

+ }

+ if (csconv == NULL)

+ csconv = get_current_cs(SPELL_ENCODING);

+ delete afflst;

+ return 0;

}

/* parse in the ALIAS table */

-int HashMgr::parse_aliasf(char * line, FileMgr * af)

- if (numaliasf != 0) {

- HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());

- return 1;

- }

- char * tp = line;

- char * piece;

- int i = 0;

- int np = 0;

- piece = mystrsep(&tp, 0);

- while (piece) {

- if (*piece != '\0') {

- switch(i) {

- case 0: { np++; break; }

- case 1: {

- numaliasf = atoi(piece);

- if (numaliasf < 1) {

- numaliasf = 0;

- aliasf = NULL;

- aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());

- return 1;

- }

- aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));

- aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));

- if (!aliasf || !aliasflen) {

- numaliasf = 0;

- if (aliasf) free(aliasf);

- if (aliasflen) free(aliasflen);

- aliasf = NULL;

- aliasflen = NULL;

- return 1;

- }

- np++;

- break;

- }

- default: break;

+bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) {

+ if (numaliasf != 0) {

+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",

+ af->getlinenum());

+ return false;

+ }

+ int i = 0;

+ int np = 0;

+ std::string::const_iterator iter = line.begin();

+ std::string::const_iterator start_piece = mystrsep(line, iter);

+ while (start_piece != line.end()) {

+ switch (i) {

+ case 0: {

+ np++;

+ break;

+ }

+ case 1: {

+ numaliasf = atoi(std::string(start_piece, iter).c_str());

+ if (numaliasf < 1) {

+ numaliasf = 0;

+ aliasf = NULL;

+ aliasflen = NULL;

+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",

+ af->getlinenum());

+ return false;

+ }

+ aliasf =

+ (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));

+ aliasflen =

+ (unsigned short*)malloc(numaliasf * sizeof(unsigned short));

+ if (!aliasf || !aliasflen) {

+ numaliasf = 0;

+ if (aliasf)

+ free(aliasf);

+ if (aliasflen)

+ free(aliasflen);

+ aliasf = NULL;

+ aliasflen = NULL;

+ return false;

+ }

+ np++;

+ break;

+ }

+ default:

+ break;

+ }

+ ++i;

+ start_piece = mystrsep(line, iter);

+ }

+ if (np != 2) {

+ numaliasf = 0;

+ free(aliasf);

+ free(aliasflen);

+ aliasf = NULL;

+ aliasflen = NULL;

+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",

+ af->getlinenum());

+ return false;

+ }

+ /* now parse the numaliasf lines to read in the remainder of the table */

+ for (int j = 0; j < numaliasf; j++) {

+ std::string nl;

+ if (!af->getline(nl))

+ return false;

+ mychomp(nl);

+ i = 0;

+ aliasf[j] = NULL;

+ aliasflen[j] = 0;

+ iter = nl.begin();

+ start_piece = mystrsep(nl, iter);

+ while (start_piece != nl.end()) {

+ switch (i) {

+ case 0: {

+ if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) {

+ numaliasf = 0;

+ free(aliasf);

+ free(aliasflen);

+ aliasf = NULL;

+ aliasflen = NULL;

+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",

+ af->getlinenum());

+ return false;

}

- i++;

- }

- piece = mystrsep(&tp, 0);

- }

- if (np != 2) {

- numaliasf = 0;

+ break;

+ }

+ case 1: {

+ std::string piece(start_piece, iter);

+ aliasflen[j] =

+ (unsigned short)decode_flags(&(aliasf[j]), piece, af);

+ std::sort(aliasf[j], aliasf[j] + aliasflen[j]);

+ break;

+ }

+ default:

+ break;

+ }

+ ++i;

+ start_piece = mystrsep(nl, iter);

+ }

+ if (!aliasf[j]) {

free(aliasf);

free(aliasflen);

aliasf = NULL;

aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());

- return 1;

- }

- /* now parse the numaliasf lines to read in the remainder of the table */

- char * nl;

- for (int j=0; j < numaliasf; j++) {

- if ((nl = af->getline()) == NULL) return 1;

- mychomp(nl);

- tp = nl;

- i = 0;

- aliasf[j] = NULL;

- aliasflen[j] = 0;

- piece = mystrsep(&tp, 0);

- while (piece) {

- if (*piece != '\0') {

- switch(i) {

- case 0: {

- if (strncmp(piece,"AF",2) != 0) {

- numaliasf = 0;

- free(aliasf);

- free(aliasflen);

- aliasf = NULL;

- aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

- return 1;

- }

- break;

- }

- case 1: {

- aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);

- flag_qsort(aliasf[j], 0, aliasflen[j]);

- break;

- }

- default: break;

- }

- i++;

- }

- piece = mystrsep(&tp, 0);

- }

- if (!aliasf[j]) {

- free(aliasf);

- free(aliasflen);

- aliasf = NULL;

- aliasflen = NULL;

- numaliasf = 0;

- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

- return 1;

- }

- return 0;

+ numaliasf = 0;

+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",

+ af->getlinenum());

+ return false;

+ }

+ return true;

}

#ifdef HUNSPELL_CHROME_CLIENT

@@ -935,7 +1196,8 @@ int HashMgr::LoadAFLines()

// Read in all the AF lines which tell us the rules for each affix group ID.

hunspell::LineIterator iterator = bdict_reader->GetAfLineIterator();

FileMgr afflst(&iterator);

- while (char* line = afflst.getline()) {

+ std::string line;

+ while (afflst.getline(line)) {

int rv = parse_aliasf(line, &afflst);

if (rv)

return rv;

@@ -1042,127 +1304,129 @@ hentry* HashMgr::GetHentryFromHEntryCache(char* word) {

}

#endif

-int HashMgr::is_aliasf() {

- return (aliasf != NULL);

+int HashMgr::is_aliasf() const {

+ return (aliasf != NULL);

}

-int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {

- if ((index > 0) && (index <= numaliasf)) {

- *fvec = aliasf[index - 1];

- return aliasflen[index - 1];

- }

- HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);

- *fvec = NULL;

- return 0;

+int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const {

+ if ((index > 0) && (index <= numaliasf)) {

+ *fvec = aliasf[index - 1];

+ return aliasflen[index - 1];

+ }

+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",

+ af->getlinenum(), index);

+ *fvec = NULL;

+ return 0;

}

/* parse morph alias definitions */

-int HashMgr::parse_aliasm(char * line, FileMgr * af)

- if (numaliasm != 0) {

- HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());

- return 1;

- }

- char * tp = line;

- char * piece;

- int i = 0;

- int np = 0;

- piece = mystrsep(&tp, 0);

- while (piece) {

- if (*piece != '\0') {

- switch(i) {

- case 0: { np++; break; }

- case 1: {

- numaliasm = atoi(piece);

- if (numaliasm < 1) {

- HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());

- return 1;

- }

- aliasm = (char **) malloc(numaliasm * sizeof(char *));

- if (!aliasm) {

- numaliasm = 0;

- return 1;

- }

- np++;

- break;

- }

- default: break;

+bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) {

+ if (numaliasm != 0) {

+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",

+ af->getlinenum());

+ return false;

+ }

+ int i = 0;

+ int np = 0;

+ std::string::const_iterator iter = line.begin();

+ std::string::const_iterator start_piece = mystrsep(line, iter);

+ while (start_piece != line.end()) {

+ switch (i) {

+ case 0: {

+ np++;

+ break;

+ }

+ case 1: {

+ numaliasm = atoi(std::string(start_piece, iter).c_str());

+ if (numaliasm < 1) {

+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",

+ af->getlinenum());

+ return false;

+ }

+ aliasm = (char**)malloc(numaliasm * sizeof(char*));

+ if (!aliasm) {

+ numaliasm = 0;

+ return false;

+ }

+ np++;

+ break;

+ }

+ default:

+ break;

+ }

+ ++i;

+ start_piece = mystrsep(line, iter);

+ }

+ if (np != 2) {

+ numaliasm = 0;

+ free(aliasm);

+ aliasm = NULL;

+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",

+ af->getlinenum());

+ return false;

+ }

+ /* now parse the numaliasm lines to read in the remainder of the table */

+ for (int j = 0; j < numaliasm; j++) {

+ std::string nl;

+ if (!af->getline(nl))

+ return false;

+ mychomp(nl);

+ aliasm[j] = NULL;

+ iter = nl.begin();

+ i = 0;

+ start_piece = mystrsep(nl, iter);

+ while (start_piece != nl.end()) {

+ switch (i) {

+ case 0: {

+ if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) {

+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",

+ af->getlinenum());

+ numaliasm = 0;

+ free(aliasm);

+ aliasm = NULL;

+ return false;

+ }

+ break;

+ }

+ case 1: {

+ // add the remaining of the line

+ std::string::const_iterator end = nl.end();

+ std::string chunk(start_piece, end);

+ if (complexprefixes) {

+ if (utf8)

+ reverseword_utf(chunk);

+ else

+ reverseword(chunk);

}

- i++;

- }

- piece = mystrsep(&tp, 0);

- }

- if (np != 2) {

+ aliasm[j] = mystrdup(chunk.c_str());

+ break;

+ }

+ default:

+ break;

+ }

+ ++i;

+ start_piece = mystrsep(nl, iter);

+ }

+ if (!aliasm[j]) {

numaliasm = 0;

free(aliasm);

aliasm = NULL;

- HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());

- return 1;

- }

- /* now parse the numaliasm lines to read in the remainder of the table */

- char * nl = line;

- for (int j=0; j < numaliasm; j++) {

- if ((nl = af->getline()) == NULL) return 1;

- mychomp(nl);

- tp = nl;

- i = 0;

- aliasm[j] = NULL;

- piece = mystrsep(&tp, ' ');

- while (piece) {

- if (*piece != '\0') {

- switch(i) {

- case 0: {

- if (strncmp(piece,"AM",2) != 0) {

- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

- numaliasm = 0;

- free(aliasm);

- aliasm = NULL;

- return 1;

- }

- break;

- }

- case 1: {

- // add the remaining of the line

- if (*tp) {

- *(tp - 1) = ' ';

- tp = tp + strlen(tp);

- }

- if (complexprefixes) {

- if (utf8) reverseword_utf(piece);

- else reverseword(piece);

- }

- aliasm[j] = mystrdup(piece);

- if (!aliasm[j]) {

- numaliasm = 0;

- free(aliasm);

- aliasm = NULL;

- return 1;

- }

- break; }

- default: break;

- }

- i++;

- }

- piece = mystrsep(&tp, ' ');

- }

- if (!aliasm[j]) {

- numaliasm = 0;

- free(aliasm);

- aliasm = NULL;

- HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

- return 1;

- }

- return 0;

+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",

+ af->getlinenum());

+ return false;

+ }

+ return true;

}

-int HashMgr::is_aliasm() {

- return (aliasm != NULL);

+int HashMgr::is_aliasm() const {

+ return (aliasm != NULL);

}

-char * HashMgr::get_aliasm(int index) {

- if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];

- HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);

- return NULL;

+char* HashMgr::get_aliasm(int index) const {

+ if ((index > 0) && (index <= numaliasm))

+ return aliasm[index - 1];

+ HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);

+ return NULL;

}

« no previous file with comments | « third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »