chrome/third_party/hunspell/src/hunspell/hashmgr.cxx - Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for...

Unified Diff: chrome/third_party/hunspell/src/hunspell/hashmgr.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/third_party/hunspell/src/hunspell/hashmgr.cxx

===================================================================

--- chrome/third_party/hunspell/src/hunspell/hashmgr.cxx (revision 21721)

+++ chrome/third_party/hunspell/src/hunspell/hashmgr.cxx (working copy)

@@ -22,18 +22,19 @@

using namespace std;

#endif

#else

-#ifndef W32

+#ifndef WIN32

using namespace std;

#endif

// build a hash table from a munched word list

#ifdef HUNSPELL_CHROME_CLIENT

HashMgr::HashMgr(hunspell::BDictReader* reader)

{

bdict_reader = reader;

#else

-HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle)

+HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle, const char * key)

{

#endif

tablesize = 0;

@@ -41,6 +42,10 @@

flag_mode = FLAG_CHAR;

complexprefixes = 0;

utf8 = 0;

+ langnum = 0;

+ lang = NULL;

+ enc = NULL;

+ csconv = 0;

ignorechars = NULL;

ignorechars_utf16 = NULL;

ignorechars_utf16_len = 0;

@@ -48,12 +53,13 @@

aliasf = NULL;

numaliasm = 0;

aliasm = NULL;

+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag

#ifdef HUNSPELL_CHROME_CLIENT

// No tables to load, just the AF config.

int ec = load_config();

#else

load_config(aff_handle);

- int ec = load_tables(dic_handle);

+ int ec = load_tables(dic_handle, key);

#endif

if (ec) {

/* error condition - what should we do here */

@@ -73,29 +79,16 @@

// now pass through hash table freeing up everything

// go through column by column of the table

for (int i=0; i < tablesize; i++) {

- struct hentry * pt = &tableptr[i];

+ struct hentry * pt = tableptr[i];

struct hentry * nt = NULL;

- if (pt) {

- if (pt->astr && !aliasf) free(pt->astr);

- if (pt->word) free(pt->word);

-#ifdef HUNSPELL_EXPERIMENTAL

- if (pt->description && !aliasm) free(pt->description);

-#endif

- pt = pt->next;

- }

while(pt) {

nt = pt->next;

- if (pt->astr && !aliasf) free(pt->astr);

- if (pt->word) free(pt->word);

-#ifdef HUNSPELL_EXPERIMENTAL

- if (pt->description && !aliasm) free(pt->description);

-#endif

+ if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);

free(pt);

pt = nt;

}

free(tableptr);

- tableptr = NULL;

}

tablesize = 0;

@@ -113,6 +106,15 @@

free(aliasm);

aliasm = NULL;

}

+#ifndef OPENOFFICEORG

+#ifndef MOZILLA_CLIENT

+ if (utf8) free_utf_tbl();

+#endif

+ if (enc) free(enc);

+ if (lang) free(lang);

if (ignorechars) free(ignorechars);

if (ignorechars_utf16) free(ignorechars_utf16);

@@ -144,7 +146,6 @@

#endif

// lookup a root word in the hashtable

struct hentry * HashMgr::lookup(const char *word) const

{

#ifdef HUNSPELL_CHROME_CLIENT

@@ -167,10 +168,10 @@

#else

struct hentry * dp;

if (tableptr) {

- dp = &tableptr[hash(word)];

- if (dp->word == NULL) return NULL;

+ dp = tableptr[hash(word)];

+ if (!dp) return NULL;

for ( ; dp != NULL; dp = dp->next) {

- if (strcmp(word,dp->word) == 0) return dp;

+ if (strcmp(word,&(dp->word)) == 0) return dp;

}

return NULL;

@@ -178,69 +179,101 @@

}

// add a word to the hash table (private)

-int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)

+int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,

+ int al, const char * desc, bool onlyupcase)

{

#ifndef HUNSPELL_CHROME_CLIENT

- char * st = mystrdup(word);

- if (wl && !st) return 1;

+ bool upcasehomonym = false;

+ int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;

+ // variable-length hash record with word and optional fields

+ struct hentry* hp =

+ (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);

+ if (!hp) return 1;

+ char * hpw = &(hp->word);

+ strcpy(hpw, word);

if (ignorechars != NULL) {

if (utf8) {

- remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len);

+ remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);

} else {

- remove_ignored_chars(st, ignorechars);

+ remove_ignored_chars(hpw, ignorechars);

}

if (complexprefixes) {

- if (utf8) reverseword_utf(st); else reverseword(st);

+ if (utf8) reverseword_utf(hpw); else reverseword(hpw);

}

- int i = hash(st);

- struct hentry * dp = &tableptr[i];

- if (dp->word == NULL) {

- dp->wlen = (short) wl;

- dp->alen = (short) al;

- dp->word = st;

- dp->astr = aff;

- dp->next = NULL;

- dp->next_homonym = NULL;

-#ifdef HUNSPELL_EXPERIMENTAL

- if (aliasm) {

- dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);

- } else {

- dp->description = mystrdup(desc);

- if (desc && !dp->description) return 1;

- if (dp->description && complexprefixes) {

- if (utf8) reverseword_utf(dp->description); else reverseword(dp->description);

+ int i = hash(hpw);

+ hp->blen = (unsigned char) wbl;

+ hp->clen = (unsigned char) wcl;

+ hp->alen = (short) al;

+ hp->astr = aff;

+ hp->next = NULL;

+ hp->next_homonym = NULL;

+ // store the description string or its pointer

+ if (desc) {

+ hp->var = H_OPT;

+ if (aliasm) {

+ hp->var += H_OPT_ALIASM;

+ store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));

+ } else {

+ strcpy(hpw + wbl + 1, desc);

+ if (complexprefixes) {

+ if (utf8) reverseword_utf(HENTRY_DATA(hp));

+ else reverseword(HENTRY_DATA(hp));

}

+ }

+ if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;

+ } else hp->var = 0;

+ struct hentry * dp = tableptr[i];

+ if (!dp) {

+ tableptr[i] = hp;

+ return 0;

}

-#endif

- } else {

- struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));

- if (!hp) return 1;

- hp->wlen = (short) wl;

- hp->alen = (short) al;

- hp->word = st;

- hp->astr = aff;

- hp->next = NULL;

- hp->next_homonym = NULL;

-#ifdef HUNSPELL_EXPERIMENTAL

- if (aliasm) {

- hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);

- } else {

- hp->description = mystrdup(desc);

- if (desc && !hp->description) return 1;

- if (dp->description && complexprefixes) {

- if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);

+ while (dp->next != NULL) {

+ if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) {

+ // remove hidden onlyupcase homonym

+ if (!onlyupcase) {

+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

+ free(dp->astr);

+ dp->astr = hp->astr;

+ dp->alen = hp->alen;

+ free(hp);

+ return 0;

+ } else {

+ dp->next_homonym = hp;

+ }

+ } else {

+ upcasehomonym = true;

}

- }

-#endif

- while (dp->next != NULL) {

- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;

+ }

dp=dp->next;

}

- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;

- dp->next = hp;

- }

+ if (strcmp(&(hp->word), &(dp->word)) == 0) {

+ // remove hidden onlyupcase homonym

+ if (!onlyupcase) {

+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

+ free(dp->astr);

+ dp->astr = hp->astr;

+ dp->alen = hp->alen;

+ free(hp);

+ return 0;

+ } else {

+ dp->next_homonym = hp;

+ }

+ } else {

+ upcasehomonym = true;

+ }

+ if (!upcasehomonym) {

+ dp->next = hp;

+ } else {

+ // remove hidden onlyupcase homonym

+ if (hp->astr) free(hp->astr);

+ free(hp);

+ }

#endif // HUNSPELL_CHROME_CLIENT

std::map<StringPiece, int>::iterator iter =

custom_word_to_affix_id_map_.find(word);

@@ -255,33 +288,134 @@

return 0;

}

-// add a custom dic. word to the hash table (public)

-int HashMgr::put_word(const char * word, int wl, char * aff)

+int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,

+ unsigned short * flags, int al, char * dp, int captype)

{

- unsigned short * flags;

- int al = 0;

- if (aff) {

- al = decode_flags(&flags, aff);

- flag_qsort(flags, 0, al);

+ // add inner capitalized forms to handle the following allcap forms:

+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG

+ // Allcaps with suffixes: CIA's -> CIA'S

+ if (((captype == HUHCAP) || (captype == HUHINITCAP) ||

+ ((captype == ALLCAP) && (flags != NULL))) &&

+ !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {

+ unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));

+ if (!flags2) return 1;

+ if (al) memcpy(flags2, flags, al * sizeof(unsigned short));

+ flags2[al] = ONLYUPCASEFLAG;

+ if (utf8) {

+ char st[BUFSIZE];

+ w_char w[BUFSIZE];

+ int wlen = u8_u16(w, BUFSIZE, word);

+ mkallsmall_utf(w, wlen, langnum);

+ mkallcap_utf(w, 1, langnum);

+ u16_u8(st, BUFSIZE, w, wlen);

+ return add_word(st,wbl,wcl,flags2,al+1,dp, true);

+ } else {

+ mkallsmall(word, csconv);

+ mkinitcap(word, csconv);

+ return add_word(word,wbl,wcl,flags2,al+1,dp, true);

+ }

+ return 0;

+// detect captype and modify word length for UTF-8 encoding

+int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {

+ int len;

+ if (utf8) {

+ w_char dest_utf[BUFSIZE];

+ len = u8_u16(dest_utf, BUFSIZE, word);

+ *captype = get_captype_utf8(dest_utf, len, langnum);

} else {

- flags = NULL;

+ len = wbl;

+ *captype = get_captype((char *) word, len, csconv);

}

- add_word(word, wl, flags, al, NULL);

+ return len;

+// remove word (personal dictionary function for standalone applications)

+int HashMgr::remove(const char * word)

+ struct hentry * dp = lookup(word);

+ while (dp) {

+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {

+ unsigned short * flags =

+ (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1));

+ if (!flags) return 1;

+ for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];

+ flags[dp->alen] = forbiddenword;

+ dp->astr = flags;

+ dp->alen++;

+ flag_qsort(flags, 0, dp->alen);

+ }

+ dp = dp->next_homonym;

+ }

return 0;

}

-int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)

+/* remove forbidden flag to add a personal word to the hash */

+int HashMgr::remove_forbidden_flag(const char * word) {

+ struct hentry * dp = lookup(word);

+ if (!dp) return 1;

+ while (dp) {

+ if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {

+ if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.

+ else {

+ unsigned short * flags2 =

+ (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1));

+ if (!flags2) return 1;

+ int i, j = 0;

+ for (i = 0; i < dp->alen; i++) {

+ if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];

+ }

+ dp->alen--;

+ dp->astr = flags2; // XXX allowed forbidden words

+ }

+ dp = dp->next_homonym;

+ }

+ return 0;

+// add a custom dic. word to the hash table (public)

+int HashMgr::add(const char * word)

{

- unsigned short * flags;

- struct hentry * dp = lookup(pattern);

- if (!dp || !dp->astr) return 1;

- flags = (unsigned short *) malloc (dp->alen * sizeof(short));

- memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));

- add_word(word, wl, flags, dp->alen, NULL);

+ unsigned short * flags = NULL;

+ int al = 0;

+ if (remove_forbidden_flag(word)) {

+ int captype;

+ int wbl = strlen(word);

+ int wcl = get_clen_and_captype(word, wbl, &captype);

+ add_word(word, wbl, wcl, flags, al, NULL, false);

+ return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);

+ }

return 0;

}

+int HashMgr::add_with_affix(const char * word, const char * example)

+ // detect captype and modify word length for UTF-8 encoding

+ struct hentry * dp = lookup(example);

+ remove_forbidden_flag(word);

+ if (dp && dp->astr) {

+ int captype;

+ int wbl = strlen(word);

+ int wcl = get_clen_and_captype(word, wbl, &captype);

+ if (aliasf) {

+ add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);

+ } else {

+ unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));

+ if (flags) {

+ memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));

+ add_word(word, wbl, wcl, flags, dp->alen, NULL, false);

+ } else return 1;

+ }

+ return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);

+ }

+ return 1;

// walk the hash table entry by entry - null at end

+// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);

struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

{

#ifdef HUNSPELL_CHROME_CLIENT

@@ -312,88 +446,99 @@

// lists for the extra affixes. If hp is NULL, create it here.

if (!hp)

hp = new hentry;

- hp->word = word;

- hp->wlen = word_len;

+ hp->word = *word;

+ hp->blen = word_len;

hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0],

&hp->astr);

hp->next = NULL;

hp->next_homonym = NULL;

+ hp->var = 0;

+ hp->clen = 0;

return hp;

#else

- //reset to start

- if ((col < 0) || (hp == NULL)) {

- col = -1;

- hp = NULL;

+ if (hp && hp->next != NULL) return hp->next;

+ for (col++; col < tablesize; col++) {

+ if (tableptr[col]) return tableptr[col];

}

- if (hp && hp->next != NULL) {

- hp = hp->next;

- } else {

- col++;

- hp = (col < tablesize) ? &tableptr[col] : NULL;

- // search for next non-blank column entry

- while (hp && (hp->word == NULL)) {

- col ++;

- hp = (col < tablesize) ? &tableptr[col] : NULL;

- }

- if (col < tablesize) return hp;

- hp = NULL;

- col = -1;

- }

- return hp;

+ // null at end and reset to start

+ col = -1;

+ return NULL;

#endif

}

// load a munched word list and build a hash table on the fly

-int HashMgr::load_tables(FILE* t_handle)

+int HashMgr::load_tables(FILE* t_handle, const char * key)

{

#ifndef HUNSPELL_CHROME_CLIENT

- int wl, al;

+ int al;

char * ap;

char * dp;

+ char * dp2;

unsigned short * flags;

+ char * ts;

- // raw dictionary - munched file

- FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r");

- if (rawdict == NULL) return 1;

- fseek(rawdict, 0, SEEK_SET);

+ // open dictionary file

+ FileMgr * dict = new FileMgr(tpath, key);

+ if (dict == NULL) return 1;

// first read the first line of file to get hash table size */

- char ts[MAXDELEN];

- if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;

+ if (!(ts = dict->getline())) {

+ HUNSPELL_WARNING(stderr, "error: empty dic file\n");

+ delete dict;

+ return 2;

+ }

mychomp(ts);

/* remove byte order mark */

- if (strncmp(ts,"\xef\xbb\xbf",3) == 0) {

+ if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {

memmove(ts, ts+3, strlen(ts+3)+1);

HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n");

}

- if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");

tablesize = atoi(ts);

- if (!tablesize) return 4;

+ if (tablesize == 0) {

+ HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");

+ delete dict;

+ return 4;

+ }

tablesize = tablesize + 5 + USERWORD;

if ((tablesize %2) == 0) tablesize++;

// allocate the hash table

- tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));

- if (! tableptr) return 3;

- for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;

+ tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));

+ if (! tableptr) {

+ delete dict;

+ return 3;

+ }

+ for (int i=0; i<tablesize; i++) tableptr[i] = NULL;

// loop through all words on much list and add to hash

// table and create word and affix strings

- while (fgets(ts,MAXDELEN-1,rawdict)) {

+ while ((ts = dict->getline())) {

mychomp(ts);

// split each line into word and morphological description

- dp = strchr(ts,'\t');

+ dp = ts;

+ while ((dp = strchr(dp, ':'))) {

+ if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {

+ for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);

+ if (dp < ts) { // missing word

+ dp = NULL;

+ } else {

+ *(dp + 1) = '\0';

+ dp = dp + 2;

+ }

+ break;

+ }

+ dp++;

+ }

- if (dp) {

- *dp = '\0';

- dp++;

- } else {

- dp = NULL;

+ // tabulator is the old morphological field separator

+ dp2 = strchr(ts, '\t');

+ if (dp2 && (!dp || dp2 < dp)) {

+ *dp2 = '\0';

+ dp = dp2 + 1;

}

// split each line into word and affix char strings

@@ -414,13 +559,13 @@

*ap = '\0';

if (aliasf) {

int index = atoi(ap + 1);

- al = get_aliasf(index, &flags);

+ al = get_aliasf(index, &flags, dict);

if (!al) {

- HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts);

+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());

*ap = '\0';

}

} else {

- al = decode_flags(&flags, ap + 1);

+ al = decode_flags(&flags, ap + 1, dict);

flag_qsort(flags, 0, al);

}

} else {

@@ -429,19 +574,22 @@

flags = NULL;

}

- wl = strlen(ts);

+ int captype;

+ int wbl = strlen(ts);

+ int wcl = get_clen_and_captype(ts, wbl, &captype);

+ // add the word and its index plus its capitalized form optionally

+ if (add_word(ts,wbl,wcl,flags,al,dp, false) ||

+ add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {

+ delete dict;

+ return 5;

+ }

- // add the word and its index

- if (add_word(ts,wl,flags,al,dp)) return 5;

- }

- fclose(rawdict);

+ delete dict;

#endif

return 0;

}

// the hash function is a simple load and rotate

// algorithm borrowed

@@ -466,15 +614,17 @@

switch (flag_mode) {

case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)

len = strlen(flags);

- if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG flagvector is odd: %s\n", flags);

- len = len/2;

+ if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: bad flagvector\n");

+ len /= 2;

*result = (unsigned short *) malloc(len * sizeof(short));

+ if (!*result) return -1;

for (int i = 0; i < len; i++) {

(*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];

}

break;

}

case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)

+ int i;

len = 1;

char * src = flags;

unsigned short * dest;

@@ -483,23 +633,29 @@

if (*p == ',') len++;

}

*result = (unsigned short *) malloc(len * sizeof(short));

+ if (!*result) return -1;

dest = *result;

for (p = flags; *p; p++) {

if (*p == ',') {

- *dest = (unsigned short) atoi(src);

+ i = atoi(src);

+ if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);

+ *dest = (unsigned short) i;

if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

src = p + 1;

dest++;

}

- *dest = (unsigned short) atoi(src);

+ i = atoi(src);

+ if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);

+ *dest = (unsigned short) i;

if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

break;

}

case FLAG_UNI: { // UTF-8 characters

- w_char w[MAXDELEN/2];

- len = u8_u16(w, MAXDELEN/2, flags);

+ w_char w[BUFSIZE/2];

+ len = u8_u16(w, BUFSIZE/2, flags);

*result = (unsigned short *) malloc(len * sizeof(short));

+ if (!*result) return -1;

memcpy(*result, w, len * sizeof(short));

break;

}

@@ -507,24 +663,28 @@

unsigned short * dest;

len = strlen(flags);

*result = (unsigned short *) malloc(len * sizeof(short));

+ if (!*result) return -1;

dest = *result;

for (unsigned char * p = (unsigned char *) flags; *p; p++) {

*dest = (unsigned short) *p;

dest++;

}

- }

+ }

return len;

}

unsigned short HashMgr::decode_flag(const char * f) {

unsigned short s = 0;

+ int i;

switch (flag_mode) {

case FLAG_LONG:

s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];

break;

case FLAG_NUM:

- s = (unsigned short) atoi(f);

+ i = atoi(f);

+ if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);

+ s = (unsigned short) i;

break;

case FLAG_UNI:

u8_u16((w_char *) &s, 1, f);

@@ -532,7 +692,7 @@

default:

s = (unsigned short) *((unsigned char *)f);

}

- if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

+ if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

return s;

}

@@ -569,7 +729,7 @@

// diacritics characters.

if (strncmp(line,"IGNORE",6) == 0) {

parse_array(line, &ignorechars, &ignorechars_utf16,

- &ignorechars_utf16_len, "IGNORE", utf8);

+ &ignorechars_utf16_len, utf8, 0);

}

// Retrieve the format of an AF line.

if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {

@@ -591,75 +751,101 @@

}

#else

// read in aff file and set flag mode

-int HashMgr::load_config(FILE* aff_handle)

+int HashMgr::load_config(FILE* aff_handle, const char * key)

{

+ char * line; // io buffers

int firstline = 1;

- // io buffers

- char line[MAXDELEN+1];

// open the affix file

- FILE * afflst;

- afflst = _fdopen(_dup(_fileno(aff_handle)), "r");

+ FileMgr * afflst = new FileMgr(affpath, key);

if (!afflst) {

HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");

return 1;

}

- fseek(afflst, 0, SEEK_SET);

// read in each line ignoring any that do not

// start with a known line type indicator

- while (fgets(line,MAXDELEN,afflst)) {

+ while ((line = afflst->getline())) {

mychomp(line);

/* remove byte order mark */

if (firstline) {

firstline = 0;

- if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(line+3)+1);

+ if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);

}

/* parse in the try string */

if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {

if (flag_mode != FLAG_CHAR) {

- HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n");

+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());

}

if (strstr(line, "long")) flag_mode = FLAG_LONG;

if (strstr(line, "num")) flag_mode = FLAG_NUM;

if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;

if (flag_mode == FLAG_CHAR) {

- HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line);

+ HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());

}

- if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UTF-8")) utf8 = 1;

+ if (strncmp(line,"FORBIDDENWORD",13) == 0) {

+ char * st = NULL;

+ if (parse_string(line, &st, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ forbiddenword = decode_flag(st);

+ free(st);

+ }

+ if (strncmp(line, "SET", 3) == 0) {

+ if (parse_string(line, &enc, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ if (strcmp(enc, "UTF-8") == 0) {

+ utf8 = 1;

+#ifndef OPENOFFICEORG

+#ifndef MOZILLA_CLIENT

+ initialize_utf_tbl();

+#endif

+ } else csconv = get_current_cs(enc);

+ }

+ if (strncmp(line, "LANG", 4) == 0) {

+ if (parse_string(line, &lang, afflst->getlinenum())) {

+ delete afflst;

+ return 1;

+ }

+ langnum = get_lang_num(lang);

+ }

/* parse in the ignored characters (for example, Arabic optional diacritics characters */

if (strncmp(line,"IGNORE",6) == 0) {

- if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {

- fclose(afflst);

+ if (parse_array(line, &ignorechars, &ignorechars_utf16,

+ &ignorechars_utf16_len, utf8, afflst->getlinenum())) {

+ delete afflst;

return 1;

}

if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {

if (parse_aliasf(line, afflst)) {

- fclose(afflst);

+ delete afflst;

return 1;

}

-#ifdef HUNSPELL_EXPERIMENTAL

if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {

if (parse_aliasm(line, afflst)) {

- fclose(afflst);

+ delete afflst;

return 1;

}

-#endif

- if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;

- if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;

+ if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;

+ if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;

}

- fclose(afflst);

+ if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);

+ delete afflst;

return 0;

}

#endif // HUNSPELL_CHROME_CLIENT

@@ -669,11 +855,11 @@

int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator)

{

#else

-int HashMgr::parse_aliasf(char * line, FILE * af)

+int HashMgr::parse_aliasf(char * line, FileMgr * af)

{

#endif

if (numaliasf != 0) {

- HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n");

+ HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");

return 1;

}

char * tp = line;

@@ -691,8 +877,7 @@

numaliasf = 0;

aliasf = NULL;

aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n");

- free(piece);

+ HUNSPELL_WARNING(stderr, "error: bad entry number\n");

return 1;

}

aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));

@@ -712,7 +897,6 @@

}

i++;

}

- free(piece);

piece = mystrsep(&tp, 0);

}

if (np != 2) {

@@ -721,7 +905,7 @@

free(aliasflen);

aliasf = NULL;

aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "error: missing AF table information\n");

+ HUNSPELL_WARNING(stderr, "error: missing data\n");

return 1;

}

@@ -732,9 +916,9 @@

if (!iterator->AdvanceAndCopy(nl, MAXDELEN))

return 1;

#else

- if (!fgets(nl,MAXDELEN,af)) return 1;

+ if (!(nl = af->getline())) return 1;

#endif

- mychomp(nl);

+ mychomp(nl);

tp = nl;

i = 0;

aliasf[j] = NULL;

@@ -750,8 +934,7 @@

free(aliasflen);

aliasf = NULL;

aliasflen = NULL;

- HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");

- free(piece);

+ HUNSPELL_WARNING(stderr, "error: table is corrupt\n");

return 1;

}

break;

@@ -765,7 +948,6 @@

}

i++;

}

- free(piece);

piece = mystrsep(&tp, 0);

}

if (!aliasf[j]) {

@@ -774,7 +956,7 @@

aliasf = NULL;

aliasflen = NULL;

numaliasf = 0;

- HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");

+ HUNSPELL_WARNING(stderr, "error: table is corrupt\n");

return 1;

}

@@ -810,8 +992,8 @@

struct hentry* he = new hentry;

if (i == 0)

first_he = he;

- he->word = word;

- he->wlen = word_len;

+ he->word = *word;

+ he->blen = word_len;

he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i],

&he->astr);

he->next = NULL;

@@ -854,12 +1036,11 @@

return 0;

}

-#ifdef HUNSPELL_EXPERIMENTAL

/* parse morph alias definitions */

-int HashMgr::parse_aliasm(char * line, FILE * af)

+int HashMgr::parse_aliasm(char * line, FileMgr * af)

{

if (numaliasm != 0) {

- HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n");

+ HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");

return 1;

}

char * tp = line;

@@ -874,8 +1055,7 @@

case 1: {

numaliasm = atoi(piece);

if (numaliasm < 1) {

- HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n");

- free(piece);

+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());

return 1;

}

aliasm = (char **) malloc(numaliasm * sizeof(char *));

@@ -890,33 +1070,31 @@

}

i++;

}

- free(piece);

piece = mystrsep(&tp, 0);

}

if (np != 2) {

numaliasm = 0;

free(aliasm);

aliasm = NULL;

- HUNSPELL_WARNING(stderr, "error: missing AM alias information\n");

+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());

return 1;

}

/* now parse the numaliasm lines to read in the remainder of the table */

char * nl = line;

for (int j=0; j < numaliasm; j++) {

- if (!fgets(nl,MAXDELEN,af)) return 1;

+ if (!(nl = af->getline())) return 1;

mychomp(nl);

tp = nl;

i = 0;

aliasm[j] = NULL;

- piece = mystrsep(&tp, 0);

+ piece = mystrsep(&tp, ' ');

while (piece) {

if (*piece != '\0') {

switch(i) {

case 0: {

if (strncmp(piece,"AM",2) != 0) {

- HUNSPELL_WARNING(stderr, "error: AM table is corrupt\n");

- free(piece);

+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

numaliasm = 0;

free(aliasm);

aliasm = NULL;

@@ -925,24 +1103,34 @@

break;

}

case 1: {

+ // add the remaining of the line

+ if (*tp) {

+ *(tp - 1) = ' ';

+ tp = tp + strlen(tp);

+ }

if (complexprefixes) {

if (utf8) reverseword_utf(piece);

else reverseword(piece);

}

aliasm[j] = mystrdup(piece);

+ if (!aliasm[j]) {

+ numaliasm = 0;

+ free(aliasm);

+ aliasm = NULL;

+ return 1;

+ }

break; }

default: break;

}

i++;

}

- free(piece);

- piece = mystrsep(&tp, 0);

+ piece = mystrsep(&tp, ' ');

}

if (!aliasm[j]) {

numaliasm = 0;

free(aliasm);

aliasm = NULL;

- HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");

+ HUNSPELL_WARNING(stderr, "error: table is corrupt\n");

return 1;

}

@@ -958,4 +1146,3 @@

HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);

return NULL;

}

-#endif

« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | chrome/third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »