Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Unified Diff: chrome/third_party/hunspell/src/hunspell/csutil.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/third_party/hunspell/src/hunspell/csutil.cxx
===================================================================
--- chrome/third_party/hunspell/src/hunspell/csutil.cxx (revision 21721)
+++ chrome/third_party/hunspell/src/hunspell/csutil.cxx (working copy)
@@ -5,10 +5,12 @@
#include <cstdlib>
#include <cstring>
#include <cstdio>
+#include <cctype>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <ctype.h>
#endif
#include "csutil.hxx"
@@ -43,17 +45,18 @@
using namespace std;
#endif
#else
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
-struct unicode_info2 * utf_tbl = NULL;
+static struct unicode_info2 * utf_tbl = NULL;
+static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
- char * u8 = dest;
- char * u8_max = u8 + size;
+ signed char * u8 = (signed char *)dest;
+ signed char * u8_max = (signed char *)(u8 + size);
const w_char * u2 = src;
const w_char * u2_max = src + srclen;
while ((u2 < u2_max) && (u8 < u8_max)) {
@@ -100,12 +103,12 @@
/* only UTF-16 (BMP) implementation */
int u8_u16(w_char * dest, int size, const char * src) {
- const char * u8 = src;
+ const signed char * u8 = (const signed char *)src;
w_char * u2 = dest;
w_char * u2_max = u2 + size;
while ((u2 < u2_max) && *u8) {
- switch ((*u8) & 0xf0) {
+ switch ((*u8) & 0xf0) {
case 0x00:
case 0x10:
case 0x20:
@@ -122,7 +125,7 @@
case 0x90:
case 0xa0:
case 0xb0: {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %d. character position\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
break;
@@ -134,7 +137,7 @@
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -148,12 +151,12 @@
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -218,13 +221,11 @@
char * mystrsep(char ** stringp, const char delim)
{
- char * rv = NULL;
char * mp = *stringp;
- int n = strlen(mp);
- if (n > 0) {
+ if (*mp != '\0') {
char * dp;
if (delim) {
- dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+ dp = strchr(mp, delim);
} else {
// don't use isspace() here, the string can be in some random charset
// that's way different than the locale's
@@ -234,22 +235,16 @@
if (dp) {
*stringp = dp+1;
int nc = (int)((unsigned long)dp - (unsigned long)mp);
- rv = (char *) malloc(nc+1);
- memcpy(rv,mp,nc);
- *(rv+nc) = '\0';
- return rv;
+ *(mp+nc) = '\0';
+ return mp;
} else {
- rv = (char *) malloc(n+1);
- memcpy(rv, mp, n);
- *(rv+n) = '\0';
- *stringp = mp + n;
- return rv;
+ *stringp = mp + strlen(mp);
+ return mp;
}
}
return NULL;
}
-
// replaces strdup with ansi version
char * mystrdup(const char * s)
{
@@ -257,12 +252,27 @@
if (s) {
int sl = strlen(s);
d = (char *) malloc(((sl+1) * sizeof(char)));
- if (d) memcpy(d,s,((sl+1)*sizeof(char)));
+ if (d) {
+ memcpy(d,s,((sl+1)*sizeof(char)));
+ return d;
+ }
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
return d;
}
-
-
+
+ // strcat for limited length destination string
+ char * mystrcat(char * dest, const char * st, int max) {
+ int len;
+ int len2;
+ if (dest == NULL || st == NULL) return dest;
+ len = strlen(dest);
+ len2 = strlen(st);
+ if (len + len2 + 1 > max) return dest;
+ strcpy(dest + len, st);
+ return dest;
+ }
+
// remove cross-platform text line end characters
void mychomp(char * s)
{
@@ -289,113 +299,259 @@
return d;
}
-#ifdef HUNSPELL_EXPERIMENTAL
- // append s to ends of every lines in text
- void strlinecat(char * dest, const char * s)
- {
- char * dup = mystrdup(dest);
- char * source = dup;
- int len = strlen(s);
- while (*source) {
- if (*source == '\n') {
- strncpy(dest, s, len);
- dest += len;
- }
- *dest = *source;
- source++; dest++;
- }
- strcpy(dest, s);
- free(dup);
- }
-
// break text to lines
// return number of lines
-int line_tok(const char * text, char *** lines) {
+int line_tok(const char * text, char *** lines, char breakchar) {
int linenum = 0;
char * dup = mystrdup(text);
- char * p = strchr(dup, '\n');
+ char * p = strchr(dup, breakchar);
while (p) {
linenum++;
*p = '\0';
p++;
- p = strchr(p, '\n');
+ p = strchr(p, breakchar);
}
- *lines = (char **) calloc(linenum + 1, sizeof(char *));
- if (!(*lines)) return -1;
+ linenum++;
+// fprintf(stderr, "LINEN:%d %p %p\n", linenum, lines, *lines);
+ *lines = (char **) malloc(linenum * sizeof(char *));
+// fprintf(stderr, "hello\n");
+ if (!(*lines)) {
+ free(dup);
+ return 0;
+ }
- p = dup;
- for (int i = 0; i < linenum + 1; i++) {
- (*lines)[i] = mystrdup(p);
+ p = dup;
+ int l = 0;
+ for (int i = 0; i < linenum; i++) {
+ if (*p != '\0') {
+ (*lines)[l] = mystrdup(p);
+ if (!(*lines)[l]) {
+ for (i = 0; i < l; i++) free((*lines)[i]);
+ free(dup);
+ return 0;
+ }
+ l++;
+ }
p += strlen(p) + 1;
}
free(dup);
- return linenum;
+ if (!l) free(*lines);
+ return l;
}
// uniq line in place
-char * line_uniq(char * text) {
+char * line_uniq(char * text, char breakchar) {
char ** lines;
- int linenum = line_tok(text, &lines);
+ int linenum = line_tok(text, &lines, breakchar);
int i;
strcpy(text, lines[0]);
- for ( i = 1; i<=linenum; i++ ) {
+ for ( i = 1; i < linenum; i++ ) {
int dup = 0;
for (int j = 0; j < i; j++) {
if (strcmp(lines[i], lines[j]) == 0) dup = 1;
}
if (!dup) {
- if ((i > 1) || (*(lines[0]) != '\0')) strcat(text, "\n");
+ if ((i > 1) || (*(lines[0]) != '\0')) {
+ sprintf(text + strlen(text), "%c", breakchar);
+ }
strcat(text, lines[i]);
}
}
- for ( i = 0; i<=linenum; i++ ) {
+ for ( i = 0; i < linenum; i++ ) {
if (lines[i]) free(lines[i]);
}
if (lines) free(lines);
return text;
}
+// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
+char * line_uniq_app(char ** text, char breakchar) {
+ if (!strchr(*text, breakchar)) {
+ return *text;
+ }
+
+ char ** lines;
+ int i;
+ int linenum = line_tok(*text, &lines, breakchar);
+ int dup = 0;
+ for (i = 0; i < linenum; i++) {
+ for (int j = 0; j < (i - 1); j++) {
+ if (strcmp(lines[i], lines[j]) == 0) {
+ *(lines[i]) = '\0';
+ dup++;
+ break;
+ }
+ }
+ }
+ if ((linenum - dup) == 1) {
+ strcpy(*text, lines[0]);
+ freelist(&lines, linenum);
+ return *text;
+ }
+ char * newtext = (char *) malloc(strlen(*text) + 2 * linenum + 3 + 1);
+ if (newtext) {
+ free(*text);
+ *text = newtext;
+ } else {
+ freelist(&lines, linenum);
+ return *text;
+ }
+ strcpy(*text," ( ");
+ for (i = 0; i < linenum; i++) if (*(lines[i])) {
+ sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
+ }
+ (*text)[strlen(*text) - 2] = ')'; // " ) "
+ freelist(&lines, linenum);
+ return *text;
+}
+
+ // append s to ends of every lines in text
+ void strlinecat(char * dest, const char * s)
+ {
+ char * dup = mystrdup(dest);
+ char * source = dup;
+ int len = strlen(s);
+ if (dup) {
+ while (*source) {
+ if (*source == '\n') {
+ strncpy(dest, s, len);
+ dest += len;
+ }
+ *dest = *source;
+ source++; dest++;
+ }
+ strcpy(dest, s);
+ free(dup);
+ }
+ }
+
// change \n to char c
-char * line_join(char * text, char c) {
+char * tr(char * text, char oldc, char newc) {
char * p;
- for (p = text; *p; p++) if (*p == '\n') *p = c;
+ for (p = text; *p; p++) if (*p == oldc) *p = newc;
return text;
}
-// leave only last {[^}]*} substring for handling zero morphemes
-char * delete_zeros(char * morphout) {
- char * p = morphout;
- char * q = p;
- char * q2 = NULL;
- int suffix = 0;
-
- for (;*p && *(p+1);) {
- switch (*p) {
- case '{':
- q2 = q;
- q--;
- break;
- case '}':
- if (q2) {
- suffix = 1;
- q--;
- }
- break;
- default:
- if (suffix) {
- q = q2;
- }
- suffix = 0;
- *q = *p;
+// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
+// in the first line of the inputs
+// return 0, if inputs equal
+// return 1, if inputs may equal with a secondary suffix
+// otherwise return -1
+int morphcmp(const char * s, const char * t)
+{
+ int se = 0;
+ int te = 0;
+ const char * sl;
+ const char * tl;
+ const char * olds;
+ const char * oldt;
+ if (!s || !t) return 1;
+ olds = s;
+ sl = strchr(s, '\n');
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s= strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ tl = strchr(t, '\n');
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
+ s += MORPH_TAG_LEN;
+ t += MORPH_TAG_LEN;
+ se = 0;
+ te = 0;
+ while ((*s == *t) && !se && !te) {
+ s++;
+ t++;
+ switch(*s) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0': se = 1;
+ }
+ switch(*t) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0': te = 1;
+ }
}
- p++;
- q++;
+ if (!se || !te) {
+ // not terminal suffix difference
+ if (olds) return -1;
+ return 1;
+ }
+ olds = s;
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
}
- *q = '\0';
- return morphout;
+ if (!s && !t && se && te) return 0;
+ return 1;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
+int get_sfxcount(const char * morph)
+{
+ if (!morph || !*morph) return 0;
+ int n = 0;
+ const char * old = morph;
+ morph = strstr(morph, MORPH_DERI_SFX);
+ if (!morph) morph = strstr(old, MORPH_INFL_SFX);
+ if (!morph) morph = strstr(old, MORPH_TERM_SFX);
+ while (morph) {
+ n++;
+ old = morph;
+ morph = strstr(morph + 1, MORPH_DERI_SFX);
+ if (!morph) morph = strstr(old + 1, MORPH_INFL_SFX);
+ if (!morph) morph = strstr(old + 1, MORPH_TERM_SFX);
+ }
+ return n;
+}
+
+
+int fieldlen(const char * r)
+{
+ int n = 0;
+ while (r && *r != '\t' && *r != '\0' && *r != '\n' && *r != ' ') {
+ r++;
+ n++;
+ }
+ return n;
+}
+
+char * copy_field(char * dest, const char * morph, const char * var)
+{
+ if (!morph) return NULL;
+ const char * beg = strstr(morph, var);
+ if (beg) {
+ char * d = dest;
+ for (beg += MORPH_TAG_LEN; *beg != ' ' && *beg != '\t' &&
+ *beg != '\n' && *beg != '\0'; d++, beg++) {
+ *d = *beg;
+ }
+ *d = '\0';
+ return dest;
+ }
+ return NULL;
+}
+
char * mystrrep(char * word, const char * pat, const char * rep) {
char * pos = strstr(word, pat);
if (pos) {
@@ -445,7 +601,35 @@
u16_u8(word, MAXWORDUTF8LEN, w, l);
return 0;
}
+
+ int uniqlist(char ** list, int n) {
+ int i;
+ if (n < 2) return n;
+ for (i = 0; i < n; i++) {
+ for (int j = 0; j < i; j++) {
+ if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
+ free(list[i]);
+ list[i] = NULL;
+ break;
+ }
+ }
+ }
+ int m = 1;
+ for (i = 1; i < n; i++) if (list[i]) {
+ list[m] = list[i];
+ m++;
+ }
+ return m;
+ }
+ void freelist(char *** list, int n) {
+ if (list && *list && n > 0) {
+ for (int i = 0; i < n; i++) if ((*list)[i]) free((*list)[i]);
+ free(*list);
+ *list = NULL;
+ }
+ }
+
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv)
{
@@ -478,8 +662,8 @@
for (int i = 0; i < nc; i++) {
unsigned short idx = (u[i].h << 8) + u[i].l;
if (idx != unicodetoupper(idx, langnum)) {
- u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);
- u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);
+ u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
+ u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
}
}
}
@@ -490,6 +674,20 @@
if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
}
+ // conversion function for protected memory
+ void store_pointer(char * dest, char * source)
+ {
+ memcpy(dest, &source, sizeof(char *));
+ }
+
+ // conversion function for protected memory
+ char * get_stored_pointer(char * s)
+ {
+ char * p;
+ memcpy(&p, s, sizeof(char *));
+ return p;
+ }
+
#ifndef MOZILLA_CLIENT
// convert null terminated string to all caps using encoding
void enmkallcap(char * d, const char * p, const char * encoding)
@@ -782,7 +980,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1042,7 +1240,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1302,7 +1500,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso4_tbl[] = {
@@ -1561,7 +1759,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso5_tbl[] = {
@@ -1820,7 +2018,7 @@
{ 0x00, 0xfc, 0xac },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xae },
-{ 0x00, 0xff, 0xaf },
+{ 0x00, 0xff, 0xaf }
};
struct cs_info iso6_tbl[] = {
@@ -2079,7 +2277,7 @@
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso7_tbl[] = {
@@ -2338,7 +2536,7 @@
{ 0x00, 0xfc, 0xbc },
{ 0x00, 0xfd, 0xbe },
{ 0x00, 0xfe, 0xbf },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso8_tbl[] = {
@@ -2597,7 +2795,7 @@
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso9_tbl[] = {
@@ -2856,7 +3054,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0x49 },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso10_tbl[] = {
@@ -3115,7 +3313,7 @@
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info koi8r_tbl[] = {
@@ -3374,7 +3572,7 @@
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info koi8u_tbl[] = {
@@ -3633,7 +3831,7 @@
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info cp1251_tbl[] = {
@@ -3892,7 +4090,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xdf },
+{ 0x00, 0xff, 0xdf }
};
struct cs_info iso13_tbl[] = {
@@ -4151,7 +4349,7 @@
{ 0x00, 0xFC, 0xDC },
{ 0x00, 0xFD, 0xDD },
{ 0x00, 0xFE, 0xDE },
-{ 0x00, 0xFF, 0xFF },
+{ 0x00, 0xFF, 0xFF }
};
@@ -4411,7 +4609,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso15_tbl[] = {
@@ -4670,7 +4868,7 @@
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xbe },
+{ 0x00, 0xff, 0xbe }
};
struct cs_info iscii_devanagari_tbl[] = {
@@ -4929,10 +5127,10 @@
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
-struct enc_entry encds[] = {
+static struct enc_entry encds[] = {
{"ISO8859-1",iso1_tbl},
{"ISO8859-2",iso2_tbl},
{"ISO8859-3",iso3_tbl},
@@ -4949,7 +5147,7 @@
{"ISO8859-13", iso13_tbl},
{"ISO8859-14", iso14_tbl},
{"ISO8859-15", iso15_tbl},
-{"ISCII-DEVANAGARI", iscii_devanagari_tbl},
+{"ISCII-DEVANAGARI", iscii_devanagari_tbl}
};
struct cs_info * get_current_cs(const char * es) {
@@ -4958,6 +5156,7 @@
for (int i = 0; i < n; i++) {
if (strcmp(es,encds[i].enc_name) == 0) {
ccs = encds[i].cs_table;
+ break;
}
}
return ccs;
@@ -5038,6 +5237,26 @@
}
#endif
+// primitive isalpha() replacement for tokenization
+char * get_casechars(const char * enc) {
+ struct cs_info * csconv = get_current_cs(enc);
+ char expw[MAXLNLEN];
+ char * p = expw;
+ for (int i = 0; i <= 255; i++) {
+ if ((csconv[i].cupper != csconv[i].clower)) {
+ *p = (char) i;
+ p++;
+ }
+ }
+ *p = '\0';
+#ifdef MOZILLA_CLIENT
+ delete csconv;
+#endif
+ return mystrdup(expw);
+}
+
+
+
struct lang_map lang2enc[] = {
{"ar", "UTF-8", LANG_ar},
{"az", "UTF-8", LANG_az},
@@ -5090,6 +5309,8 @@
#ifndef OPENOFFICEORG
#ifndef MOZILLA_CLIENT
int initialize_utf_tbl() {
+ utf_tbl_count++;
+ if (utf_tbl) return 0;
utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));
if (utf_tbl) {
int j;
@@ -5110,7 +5331,11 @@
#endif
void free_utf_tbl() {
- if (utf_tbl) free(utf_tbl);
+ if (utf_tbl_count > 0) utf_tbl_count--;
+ if (utf_tbl && (utf_tbl_count == 0)) {
+ free(utf_tbl);
+ utf_tbl = NULL;
+ }
}
#ifdef MOZILLA_CLIENT
@@ -5133,11 +5358,11 @@
return u_toupper(c);
#else
#ifdef MOZILLA_CLIENT
- unsigned short ret(c);
- getcaseConv()->ToUpper(c, &ret);
- return ret;
+ PRUnichar ch2;
+ getcaseConv()->ToUpper((PRUnichar) c, &ch2);
+ return ch2;
#else
- return utf_tbl[c].cupper;
+ return (utf_tbl) ? utf_tbl[c].cupper : c;
#endif
#endif
}
@@ -5153,11 +5378,11 @@
return u_tolower(c);
#else
#ifdef MOZILLA_CLIENT
- unsigned short ret(c);
- getcaseConv()->ToLower(c, &ret);
- return ret;
+ PRUnichar ch2;
+ getcaseConv()->ToLower((PRUnichar) c, &ch2);
+ return ch2;
#else
- return utf_tbl[c].clower;
+ return (utf_tbl) ? utf_tbl[c].clower : c;
#endif
#endif
}
@@ -5167,10 +5392,72 @@
#ifdef OPENOFFICEORG
return u_isalpha(c);
#else
- return utf_tbl[c].cletter;
+ return (utf_tbl) ? utf_tbl[c].cletter : 0;
#endif
}
+/* get type of capitalization */
+int get_captype(char * word, int nl, cs_info * csconv) {
+ // now determine the capitalization type of the first nl letters
+ int ncap = 0;
+ int nneutral = 0;
+ int firstcap = 0;
+ if (csconv == NULL) return NOCAP;
+ for (char * q = word; *q != '\0'; q++) {
+ if (csconv[*((unsigned char *)q)].ccase) ncap++;
+ if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;
+ }
+ if (ncap) {
+ firstcap = csconv[*((unsigned char *) word)].ccase;
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == nl) || ((ncap + nneutral) == nl)) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+int get_captype_utf8(w_char * word, int nl, int langnum) {
+ // now determine the capitalization type of the first nl letters
+ int ncap = 0;
+ int nneutral = 0;
+ int firstcap = 0;
+ unsigned short idx;
+ // don't check too long words
+ if (nl >= MAXWORDLEN) return 0;
+ // big Unicode character (non BMP area)
+ if (nl == -1) return NOCAP;
+ for (int i = 0; i < nl; i++) {
+ idx = (word[i].h << 8) + word[i].l;
+ if (idx != unicodetolower(idx, langnum)) ncap++;
+ if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
+ }
+ if (ncap) {
+ idx = (word[0].h << 8) + word[0].l;
+ firstcap = (idx != unicodetolower(idx, langnum));
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == nl) || ((ncap + nneutral) == nl)) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+
// strip all ignored characters in the string
void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len)
{
@@ -5200,14 +5487,14 @@
*word = '\0';
}
-int parse_string(char * line, char ** out, const char * name)
+int parse_string(char * line, char ** out, int ln)
{
char * tp = line;
char * piece;
int i = 0;
int np = 0;
if (*out) {
- HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
return 1;
}
piece = mystrsep(&tp, 0);
@@ -5217,6 +5504,7 @@
case 0: { np++; break; }
case 1: {
*out = mystrdup(piece);
+ if (!*out) return 1;
np++;
break;
}
@@ -5224,19 +5512,19 @@
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
- HUNSPELL_WARNING(stderr, "error: missing %s information\n", name);
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);
return 1;
}
return 0;
}
-int parse_array(char * line, char ** out,
- unsigned short ** out_utf16, int * out_utf16_len, const char * name, int utf8) {
- if (parse_string(line, out, name)) return 1;
+int parse_array(char * line, char ** out, unsigned short ** out_utf16,
+ int * out_utf16_len, int utf8, int ln) {
+ if (parse_string(line, out, ln)) return 1;
if (utf8) {
w_char w[MAXWORDLEN];
int n = u8_u16(w, MAXWORDLEN, *out);
Property changes on: chrome\third_party\hunspell\src\hunspell\csutil.cxx
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/csutil.hxx ('k') | chrome/third_party/hunspell/src/hunspell/dictmgr.cxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698