chrome/third_party/hunspell/src/hunspell/csutil.cxx - Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for...

Unified Diff: chrome/third_party/hunspell/src/hunspell/csutil.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/third_party/hunspell/src/hunspell/csutil.cxx

===================================================================

--- chrome/third_party/hunspell/src/hunspell/csutil.cxx (revision 21721)

+++ chrome/third_party/hunspell/src/hunspell/csutil.cxx (working copy)

@@ -5,10 +5,12 @@

#include <cstdlib>

#include <cstring>

#include <cstdio>

+#include <cctype>

#else

#include <stdlib.h>

#include <string.h>

#include <stdio.h>

+#include <ctype.h>

#endif

#include "csutil.hxx"

@@ -43,17 +45,18 @@

using namespace std;

#endif

#else

-#ifndef W32

+#ifndef WIN32

using namespace std;

#endif

-struct unicode_info2 * utf_tbl = NULL;

+static struct unicode_info2 * utf_tbl = NULL;

+static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances

/* only UTF-16 (BMP) implementation */

char * u16_u8(char * dest, int size, const w_char * src, int srclen) {

- char * u8 = dest;

- char * u8_max = u8 + size;

+ signed char * u8 = (signed char *)dest;

+ signed char * u8_max = (signed char *)(u8 + size);

const w_char * u2 = src;

const w_char * u2_max = src + srclen;

while ((u2 < u2_max) && (u8 < u8_max)) {

@@ -100,12 +103,12 @@

/* only UTF-16 (BMP) implementation */

int u8_u16(w_char * dest, int size, const char * src) {

- const char * u8 = src;

+ const signed char * u8 = (const signed char *)src;

w_char * u2 = dest;

w_char * u2_max = u2 + size;

while ((u2 < u2_max) && *u8) {

- switch ((*u8) & 0xf0) {

+ switch ((*u8) & 0xf0) {

case 0x00:

case 0x10:

case 0x20:

@@ -122,7 +125,7 @@

case 0x90:

case 0xa0:

case 0xb0: {

- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %d. character position\n%s\n", u8 - src, src);

+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - (signed char *)src), src);

u2->h = 0xff;

u2->l = 0xfd;

break;

@@ -134,7 +137,7 @@

u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);

u8++;

} else {

- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);

+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);

u2->h = 0xff;

u2->l = 0xfd;

}

@@ -148,12 +151,12 @@

u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);

u8++;

} else {

- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);

+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);

u2->h = 0xff;

u2->l = 0xfd;

}

} else {

- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);

+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);

u2->h = 0xff;

u2->l = 0xfd;

}

@@ -218,13 +221,11 @@

char * mystrsep(char ** stringp, const char delim)

{

- char * rv = NULL;

char * mp = *stringp;

- int n = strlen(mp);

- if (n > 0) {

+ if (*mp != '\0') {

char * dp;

if (delim) {

- dp = (char *)memchr(mp,(int)((unsigned char)delim),n);

+ dp = strchr(mp, delim);

} else {

// don't use isspace() here, the string can be in some random charset

// that's way different than the locale's

@@ -234,22 +235,16 @@

if (dp) {

*stringp = dp+1;

int nc = (int)((unsigned long)dp - (unsigned long)mp);

- rv = (char *) malloc(nc+1);

- memcpy(rv,mp,nc);

- *(rv+nc) = '\0';

- return rv;

+ *(mp+nc) = '\0';

+ return mp;

} else {

- rv = (char *) malloc(n+1);

- memcpy(rv, mp, n);

- *(rv+n) = '\0';

- *stringp = mp + n;

- return rv;

+ *stringp = mp + strlen(mp);

+ return mp;

}

return NULL;

}

// replaces strdup with ansi version

char * mystrdup(const char * s)

{

@@ -257,12 +252,27 @@

if (s) {

int sl = strlen(s);

d = (char *) malloc(((sl+1) * sizeof(char)));

- if (d) memcpy(d,s,((sl+1)*sizeof(char)));

+ if (d) {

+ memcpy(d,s,((sl+1)*sizeof(char)));

+ return d;

+ }

+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");

}

return d;

}

+ // strcat for limited length destination string

+ char * mystrcat(char * dest, const char * st, int max) {

+ int len;

+ int len2;

+ if (dest == NULL || st == NULL) return dest;

+ len = strlen(dest);

+ len2 = strlen(st);

+ if (len + len2 + 1 > max) return dest;

+ strcpy(dest + len, st);

+ return dest;

+ }

// remove cross-platform text line end characters

void mychomp(char * s)

{

@@ -289,113 +299,259 @@

return d;

}

-#ifdef HUNSPELL_EXPERIMENTAL

- // append s to ends of every lines in text

- void strlinecat(char * dest, const char * s)

- {

- char * dup = mystrdup(dest);

- char * source = dup;

- int len = strlen(s);

- while (*source) {

- if (*source == '\n') {

- strncpy(dest, s, len);

- dest += len;

- }

- *dest = *source;

- source++; dest++;

- }

- strcpy(dest, s);

- free(dup);

- }

// break text to lines

// return number of lines

-int line_tok(const char * text, char *** lines) {

+int line_tok(const char * text, char *** lines, char breakchar) {

int linenum = 0;

char * dup = mystrdup(text);

- char * p = strchr(dup, '\n');

+ char * p = strchr(dup, breakchar);

while (p) {

linenum++;

*p = '\0';

p++;

- p = strchr(p, '\n');

+ p = strchr(p, breakchar);

}

- *lines = (char **) calloc(linenum + 1, sizeof(char *));

- if (!(*lines)) return -1;

+ linenum++;

+// fprintf(stderr, "LINEN:%d %p %p\n", linenum, lines, *lines);

+ *lines = (char **) malloc(linenum * sizeof(char *));

+// fprintf(stderr, "hello\n");

+ if (!(*lines)) {

+ free(dup);

+ return 0;

+ }

- p = dup;

- for (int i = 0; i < linenum + 1; i++) {

- (*lines)[i] = mystrdup(p);

+ p = dup;

+ int l = 0;

+ for (int i = 0; i < linenum; i++) {

+ if (*p != '\0') {

+ (*lines)[l] = mystrdup(p);

+ if (!(*lines)[l]) {

+ for (i = 0; i < l; i++) free((*lines)[i]);

+ free(dup);

+ return 0;

+ }

+ l++;

+ }

p += strlen(p) + 1;

}

free(dup);

- return linenum;

+ if (!l) free(*lines);

+ return l;

}

// uniq line in place

-char * line_uniq(char * text) {

+char * line_uniq(char * text, char breakchar) {

char ** lines;

- int linenum = line_tok(text, &lines);

+ int linenum = line_tok(text, &lines, breakchar);

int i;

strcpy(text, lines[0]);

- for ( i = 1; i<=linenum; i++ ) {

+ for ( i = 1; i < linenum; i++ ) {

int dup = 0;

for (int j = 0; j < i; j++) {

if (strcmp(lines[i], lines[j]) == 0) dup = 1;

}

if (!dup) {

- if ((i > 1) || (*(lines[0]) != '\0')) strcat(text, "\n");

+ if ((i > 1) || (*(lines[0]) != '\0')) {

+ sprintf(text + strlen(text), "%c", breakchar);

+ }

strcat(text, lines[i]);

}

- for ( i = 0; i<=linenum; i++ ) {

+ for ( i = 0; i < linenum; i++ ) {

if (lines[i]) free(lines[i]);

}

if (lines) free(lines);

return text;

}

+// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "

+char * line_uniq_app(char ** text, char breakchar) {

+ if (!strchr(*text, breakchar)) {

+ return *text;

+ }

+ char ** lines;

+ int i;

+ int linenum = line_tok(*text, &lines, breakchar);

+ int dup = 0;

+ for (i = 0; i < linenum; i++) {

+ for (int j = 0; j < (i - 1); j++) {

+ if (strcmp(lines[i], lines[j]) == 0) {

+ *(lines[i]) = '\0';

+ dup++;

+ break;

+ }

+ if ((linenum - dup) == 1) {

+ strcpy(*text, lines[0]);

+ freelist(&lines, linenum);

+ return *text;

+ }

+ char * newtext = (char *) malloc(strlen(*text) + 2 * linenum + 3 + 1);

+ if (newtext) {

+ free(*text);

+ *text = newtext;

+ } else {

+ freelist(&lines, linenum);

+ return *text;

+ }

+ strcpy(*text," ( ");

+ for (i = 0; i < linenum; i++) if (*(lines[i])) {

+ sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");

+ }

+ (*text)[strlen(*text) - 2] = ')'; // " ) "

+ freelist(&lines, linenum);

+ return *text;

+ // append s to ends of every lines in text

+ void strlinecat(char * dest, const char * s)

+ {

+ char * dup = mystrdup(dest);

+ char * source = dup;

+ int len = strlen(s);

+ if (dup) {

+ while (*source) {

+ if (*source == '\n') {

+ strncpy(dest, s, len);

+ dest += len;

+ }

+ *dest = *source;

+ source++; dest++;

+ }

+ strcpy(dest, s);

+ free(dup);

+ }

// change \n to char c

-char * line_join(char * text, char c) {

+char * tr(char * text, char oldc, char newc) {

char * p;

- for (p = text; *p; p++) if (*p == '\n') *p = c;

+ for (p = text; *p; p++) if (*p == oldc) *p = newc;

return text;

}

-// leave only last {[^}]*} substring for handling zero morphemes

-char * delete_zeros(char * morphout) {

- char * p = morphout;

- char * q = p;

- char * q2 = NULL;

- int suffix = 0;

- for (;*p && *(p+1);) {

- switch (*p) {

- case '{':

- q2 = q;

- q--;

- break;

- case '}':

- if (q2) {

- suffix = 1;

- q--;

- }

- break;

- default:

- if (suffix) {

- q = q2;

- }

- suffix = 0;

- *q = *p;

+// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields

+// in the first line of the inputs

+// return 0, if inputs equal

+// return 1, if inputs may equal with a secondary suffix

+// otherwise return -1

+int morphcmp(const char * s, const char * t)

+ int se = 0;

+ int te = 0;

+ const char * sl;

+ const char * tl;

+ const char * olds;

+ const char * oldt;

+ if (!s || !t) return 1;

+ olds = s;

+ sl = strchr(s, '\n');

+ s = strstr(s, MORPH_DERI_SFX);

+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);

+ if (!s || (sl && sl < s)) {

+ s= strstr(olds, MORPH_TERM_SFX);

+ olds = NULL;

+ }

+ oldt = t;

+ tl = strchr(t, '\n');

+ t = strstr(t, MORPH_DERI_SFX);

+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);

+ if (!t || (tl && tl < t)) {

+ t = strstr(oldt, MORPH_TERM_SFX);

+ oldt = NULL;

+ }

+ while (s && t && (!sl || sl > s) && (!tl || tl > t)) {

+ s += MORPH_TAG_LEN;

+ t += MORPH_TAG_LEN;

+ se = 0;

+ te = 0;

+ while ((*s == *t) && !se && !te) {

+ s++;

+ t++;

+ switch(*s) {

+ case ' ':

+ case '\n':

+ case '\t':

+ case '\0': se = 1;

+ }

+ switch(*t) {

+ case ' ':

+ case '\n':

+ case '\t':

+ case '\0': te = 1;

+ }

}

- p++;

- q++;

+ if (!se || !te) {

+ // not terminal suffix difference

+ if (olds) return -1;

+ return 1;

+ }

+ olds = s;

+ s = strstr(s, MORPH_DERI_SFX);

+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);

+ if (!s || (sl && sl < s)) {

+ s = strstr(olds, MORPH_TERM_SFX);

+ olds = NULL;

+ }

+ oldt = t;

+ t = strstr(t, MORPH_DERI_SFX);

+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);

+ if (!t || (tl && tl < t)) {

+ t = strstr(oldt, MORPH_TERM_SFX);

+ oldt = NULL;

+ }

}

- *q = '\0';

- return morphout;

+ if (!s && !t && se && te) return 0;

+ return 1;

}

-#endif // END OF HUNSPELL_EXPERIMENTAL CODE

+int get_sfxcount(const char * morph)

+ if (!morph || !*morph) return 0;

+ int n = 0;

+ const char * old = morph;

+ morph = strstr(morph, MORPH_DERI_SFX);

+ if (!morph) morph = strstr(old, MORPH_INFL_SFX);

+ if (!morph) morph = strstr(old, MORPH_TERM_SFX);

+ while (morph) {

+ n++;

+ old = morph;

+ morph = strstr(morph + 1, MORPH_DERI_SFX);

+ if (!morph) morph = strstr(old + 1, MORPH_INFL_SFX);

+ if (!morph) morph = strstr(old + 1, MORPH_TERM_SFX);

+ }

+ return n;

+int fieldlen(const char * r)

+ int n = 0;

+ while (r && *r != '\t' && *r != '\0' && *r != '\n' && *r != ' ') {

+ r++;

+ n++;

+ }

+ return n;

+char * copy_field(char * dest, const char * morph, const char * var)

+ if (!morph) return NULL;

+ const char * beg = strstr(morph, var);

+ if (beg) {

+ char * d = dest;

+ for (beg += MORPH_TAG_LEN; *beg != ' ' && *beg != '\t' &&

+ *beg != '\n' && *beg != '\0'; d++, beg++) {

+ *d = *beg;

+ }

+ *d = '\0';

+ return dest;

+ }

+ return NULL;

char * mystrrep(char * word, const char * pat, const char * rep) {

char * pos = strstr(word, pat);

if (pos) {

@@ -445,7 +601,35 @@

u16_u8(word, MAXWORDUTF8LEN, w, l);

return 0;

}

+ int uniqlist(char ** list, int n) {

+ int i;

+ if (n < 2) return n;

+ for (i = 0; i < n; i++) {

+ for (int j = 0; j < i; j++) {

+ if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {

+ free(list[i]);

+ list[i] = NULL;

+ break;

+ }

+ int m = 1;

+ for (i = 1; i < n; i++) if (list[i]) {

+ list[m] = list[i];

+ m++;

+ }

+ return m;

+ }

+ void freelist(char *** list, int n) {

+ if (list && *list && n > 0) {

+ for (int i = 0; i < n; i++) if ((*list)[i]) free((*list)[i]);

+ free(*list);

+ *list = NULL;

+ }

// convert null terminated string to all caps

void mkallcap(char * p, const struct cs_info * csconv)

{

@@ -478,8 +662,8 @@

for (int i = 0; i < nc; i++) {

unsigned short idx = (u[i].h << 8) + u[i].l;

if (idx != unicodetoupper(idx, langnum)) {

- u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);

- u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);

+ u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);

+ u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);

}

@@ -490,6 +674,20 @@

if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;

}

+ // conversion function for protected memory

+ void store_pointer(char * dest, char * source)

+ {

+ memcpy(dest, &source, sizeof(char *));

+ }

+ // conversion function for protected memory

+ char * get_stored_pointer(char * s)

+ {

+ char * p;

+ memcpy(&p, s, sizeof(char *));

+ return p;

+ }

#ifndef MOZILLA_CLIENT

// convert null terminated string to all caps using encoding

void enmkallcap(char * d, const char * p, const char * encoding)

@@ -782,7 +980,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

@@ -1042,7 +1240,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

@@ -1302,7 +1500,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso4_tbl[] = {

@@ -1561,7 +1759,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso5_tbl[] = {

@@ -1820,7 +2018,7 @@

{ 0x00, 0xfc, 0xac },

{ 0x00, 0xfd, 0xfd },

{ 0x00, 0xfe, 0xae },

-{ 0x00, 0xff, 0xaf },

+{ 0x00, 0xff, 0xaf }

};

struct cs_info iso6_tbl[] = {

@@ -2079,7 +2277,7 @@

{ 0x00, 0xfc, 0xfc },

{ 0x00, 0xfd, 0xfd },

{ 0x00, 0xfe, 0xfe },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso7_tbl[] = {

@@ -2338,7 +2536,7 @@

{ 0x00, 0xfc, 0xbc },

{ 0x00, 0xfd, 0xbe },

{ 0x00, 0xfe, 0xbf },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso8_tbl[] = {

@@ -2597,7 +2795,7 @@

{ 0x00, 0xfc, 0xfc },

{ 0x00, 0xfd, 0xfd },

{ 0x00, 0xfe, 0xfe },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso9_tbl[] = {

@@ -2856,7 +3054,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0x49 },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso10_tbl[] = {

@@ -3115,7 +3313,7 @@

{ 0x00, 0xfc, 0xfc },

{ 0x00, 0xfd, 0xfd },

{ 0x00, 0xfe, 0xfe },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info koi8r_tbl[] = {

@@ -3374,7 +3572,7 @@

{ 0x01, 0xdc, 0xfc },

{ 0x01, 0xdd, 0xfd },

{ 0x01, 0xde, 0xfe },

-{ 0x01, 0xdf, 0xff },

+{ 0x01, 0xdf, 0xff }

};

struct cs_info koi8u_tbl[] = {

@@ -3633,7 +3831,7 @@

{ 0x01, 0xdc, 0xfc },

{ 0x01, 0xdd, 0xfd },

{ 0x01, 0xde, 0xfe },

-{ 0x01, 0xdf, 0xff },

+{ 0x01, 0xdf, 0xff }

};

struct cs_info cp1251_tbl[] = {

@@ -3892,7 +4090,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xdf },

+{ 0x00, 0xff, 0xdf }

};

struct cs_info iso13_tbl[] = {

@@ -4151,7 +4349,7 @@

{ 0x00, 0xFC, 0xDC },

{ 0x00, 0xFD, 0xDD },

{ 0x00, 0xFE, 0xDE },

-{ 0x00, 0xFF, 0xFF },

+{ 0x00, 0xFF, 0xFF }

};

@@ -4411,7 +4609,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

struct cs_info iso15_tbl[] = {

@@ -4670,7 +4868,7 @@

{ 0x00, 0xfc, 0xdc },

{ 0x00, 0xfd, 0xdd },

{ 0x00, 0xfe, 0xde },

-{ 0x00, 0xff, 0xbe },

+{ 0x00, 0xff, 0xbe }

};

struct cs_info iscii_devanagari_tbl[] = {

@@ -4929,10 +5127,10 @@

{ 0x00, 0xfc, 0xfc },

{ 0x00, 0xfd, 0xfd },

{ 0x00, 0xfe, 0xfe },

-{ 0x00, 0xff, 0xff },

+{ 0x00, 0xff, 0xff }

};

-struct enc_entry encds[] = {

+static struct enc_entry encds[] = {

{"ISO8859-1",iso1_tbl},

{"ISO8859-2",iso2_tbl},

{"ISO8859-3",iso3_tbl},

@@ -4949,7 +5147,7 @@

{"ISO8859-13", iso13_tbl},

{"ISO8859-14", iso14_tbl},

{"ISO8859-15", iso15_tbl},

-{"ISCII-DEVANAGARI", iscii_devanagari_tbl},

+{"ISCII-DEVANAGARI", iscii_devanagari_tbl}

};

struct cs_info * get_current_cs(const char * es) {

@@ -4958,6 +5156,7 @@

for (int i = 0; i < n; i++) {

if (strcmp(es,encds[i].enc_name) == 0) {

ccs = encds[i].cs_table;

+ break;

}

return ccs;

@@ -5038,6 +5237,26 @@

}

#endif

+// primitive isalpha() replacement for tokenization

+char * get_casechars(const char * enc) {

+ struct cs_info * csconv = get_current_cs(enc);

+ char expw[MAXLNLEN];

+ char * p = expw;

+ for (int i = 0; i <= 255; i++) {

+ if ((csconv[i].cupper != csconv[i].clower)) {

+ *p = (char) i;

+ p++;

+ }

+ *p = '\0';

+#ifdef MOZILLA_CLIENT

+ delete csconv;

+#endif

+ return mystrdup(expw);

struct lang_map lang2enc[] = {

{"ar", "UTF-8", LANG_ar},

{"az", "UTF-8", LANG_az},

@@ -5090,6 +5309,8 @@

#ifndef OPENOFFICEORG

#ifndef MOZILLA_CLIENT

int initialize_utf_tbl() {

+ utf_tbl_count++;

+ if (utf_tbl) return 0;

utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));

if (utf_tbl) {

int j;

@@ -5110,7 +5331,11 @@

#endif

void free_utf_tbl() {

- if (utf_tbl) free(utf_tbl);

+ if (utf_tbl_count > 0) utf_tbl_count--;

+ if (utf_tbl && (utf_tbl_count == 0)) {

+ free(utf_tbl);

+ utf_tbl = NULL;

+ }

}

#ifdef MOZILLA_CLIENT

@@ -5133,11 +5358,11 @@

return u_toupper(c);

#else

#ifdef MOZILLA_CLIENT

- unsigned short ret(c);

- getcaseConv()->ToUpper(c, &ret);

- return ret;

+ PRUnichar ch2;

+ getcaseConv()->ToUpper((PRUnichar) c, &ch2);

+ return ch2;

#else

- return utf_tbl[c].cupper;

+ return (utf_tbl) ? utf_tbl[c].cupper : c;

#endif

}

@@ -5153,11 +5378,11 @@

return u_tolower(c);

#else

#ifdef MOZILLA_CLIENT

- unsigned short ret(c);

- getcaseConv()->ToLower(c, &ret);

- return ret;

+ PRUnichar ch2;

+ getcaseConv()->ToLower((PRUnichar) c, &ch2);

+ return ch2;

#else

- return utf_tbl[c].clower;

+ return (utf_tbl) ? utf_tbl[c].clower : c;

#endif

}

@@ -5167,10 +5392,72 @@

#ifdef OPENOFFICEORG

return u_isalpha(c);

#else

- return utf_tbl[c].cletter;

+ return (utf_tbl) ? utf_tbl[c].cletter : 0;

#endif

}

+/* get type of capitalization */

+int get_captype(char * word, int nl, cs_info * csconv) {

+ // now determine the capitalization type of the first nl letters

+ int ncap = 0;

+ int nneutral = 0;

+ int firstcap = 0;

+ if (csconv == NULL) return NOCAP;

+ for (char * q = word; *q != '\0'; q++) {

+ if (csconv[*((unsigned char *)q)].ccase) ncap++;

+ if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;

+ }

+ if (ncap) {

+ firstcap = csconv[*((unsigned char *) word)].ccase;

+ }

+ // now finally set the captype

+ if (ncap == 0) {

+ return NOCAP;

+ } else if ((ncap == 1) && firstcap) {

+ return INITCAP;

+ } else if ((ncap == nl) || ((ncap + nneutral) == nl)) {

+ return ALLCAP;

+ } else if ((ncap > 1) && firstcap) {

+ return HUHINITCAP;

+ }

+ return HUHCAP;

+int get_captype_utf8(w_char * word, int nl, int langnum) {

+ // now determine the capitalization type of the first nl letters

+ int ncap = 0;

+ int nneutral = 0;

+ int firstcap = 0;

+ unsigned short idx;

+ // don't check too long words

+ if (nl >= MAXWORDLEN) return 0;

+ // big Unicode character (non BMP area)

+ if (nl == -1) return NOCAP;

+ for (int i = 0; i < nl; i++) {

+ idx = (word[i].h << 8) + word[i].l;

+ if (idx != unicodetolower(idx, langnum)) ncap++;

+ if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;

+ }

+ if (ncap) {

+ idx = (word[0].h << 8) + word[0].l;

+ firstcap = (idx != unicodetolower(idx, langnum));

+ }

+ // now finally set the captype

+ if (ncap == 0) {

+ return NOCAP;

+ } else if ((ncap == 1) && firstcap) {

+ return INITCAP;

+ } else if ((ncap == nl) || ((ncap + nneutral) == nl)) {

+ return ALLCAP;

+ } else if ((ncap > 1) && firstcap) {

+ return HUHINITCAP;

+ }

+ return HUHCAP;

// strip all ignored characters in the string

void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len)

{

@@ -5200,14 +5487,14 @@

*word = '\0';

}

-int parse_string(char * line, char ** out, const char * name)

+int parse_string(char * line, char ** out, int ln)

{

char * tp = line;

char * piece;

int i = 0;

int np = 0;

if (*out) {

- HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);

+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);

return 1;

}

piece = mystrsep(&tp, 0);

@@ -5217,6 +5504,7 @@

case 0: { np++; break; }

case 1: {

*out = mystrdup(piece);

+ if (!*out) return 1;

np++;

break;

}

@@ -5224,19 +5512,19 @@

}

i++;

}

- free(piece);

+ // free(piece);

piece = mystrsep(&tp, 0);

}

if (np != 2) {

- HUNSPELL_WARNING(stderr, "error: missing %s information\n", name);

+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);

return 1;

}

return 0;

}

-int parse_array(char * line, char ** out,

- unsigned short ** out_utf16, int * out_utf16_len, const char * name, int utf8) {

- if (parse_string(line, out, name)) return 1;

+int parse_array(char * line, char ** out, unsigned short ** out_utf16,

+ int * out_utf16_len, int utf8, int ln) {

+ if (parse_string(line, out, ln)) return 1;

if (utf8) {

w_char w[MAXWORDLEN];

int n = u8_u16(w, MAXWORDLEN, *out);

Property changes on: chrome\third_party\hunspell\src\hunspell\csutil.cxx

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/csutil.hxx ('k') | chrome/third_party/hunspell/src/hunspell/dictmgr.cxx » ('j') | no next file with comments »