chrome/third_party/hunspell/src/hunspell/hashmgr.cxx - Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for...

Side by Side Diff: chrome/third_party/hunspell/src/hunspell/hashmgr.cxx

Issue 155841: Update Hunspell to the latest stable version to use the latest dictionary for... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 11 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #include "license.hunspell"	1 #include "license.hunspell"

2 #include "license.myspell"	2 #include "license.myspell"

3	3

4 #ifndef MOZILLA_CLIENT	4 #ifndef MOZILLA_CLIENT

5 #include <cstdlib>	5 #include <cstdlib>

6 #include <cstring>	6 #include <cstring>

7 #include <cstdio>	7 #include <cstdio>

8 #include <cctype>	8 #include <cctype>

9 #else	9 #else

10 #include <stdlib.h>	10 #include <stdlib.h>

11 #include <string.h>	11 #include <string.h>

12 #include <stdio.h>	12 #include <stdio.h>

13 #include <ctype.h>	13 #include <ctype.h>

14 #endif	14 #endif

15	15

16 #include "hashmgr.hxx"	16 #include "hashmgr.hxx"

17 #include "csutil.hxx"	17 #include "csutil.hxx"

18 #include "atypes.hxx"	18 #include "atypes.hxx"

19	19

20 #ifdef MOZILLA_CLIENT	20 #ifdef MOZILLA_CLIENT

21 #ifdef __SUNPRO_CC // for SunONE Studio compiler	21 #ifdef __SUNPRO_CC // for SunONE Studio compiler

22 using namespace std;	22 using namespace std;

23 #endif	23 #endif

24 #else	24 #else

25 #ifndef W32	25 #ifndef WIN32

26 using namespace std;	26 using namespace std;

27 #endif	27 #endif

28 #endif	28 #endif

29	29

30 // build a hash table from a munched word list	30 // build a hash table from a munched word list

	31

31 #ifdef HUNSPELL_CHROME_CLIENT	32 #ifdef HUNSPELL_CHROME_CLIENT

32 HashMgr::HashMgr(hunspell::BDictReader* reader)	33 HashMgr::HashMgr(hunspell::BDictReader* reader)

33 {	34 {

34 bdict_reader = reader;	35 bdict_reader = reader;

35 #else	36 #else

36 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle)	37 HashMgr::HashMgr(FILE* dic_handle, FILE* aff_handle, const char * key)

37 {	38 {

38 #endif	39 #endif

39 tablesize = 0;	40 tablesize = 0;

40 tableptr = NULL;	41 tableptr = NULL;

41 flag_mode = FLAG_CHAR;	42 flag_mode = FLAG_CHAR;

42 complexprefixes = 0;	43 complexprefixes = 0;

43 utf8 = 0;	44 utf8 = 0;

	45 langnum = 0;

	46 lang = NULL;

	47 enc = NULL;

	48 csconv = 0;

44 ignorechars = NULL;	49 ignorechars = NULL;

45 ignorechars_utf16 = NULL;	50 ignorechars_utf16 = NULL;

46 ignorechars_utf16_len = 0;	51 ignorechars_utf16_len = 0;

47 numaliasf = 0;	52 numaliasf = 0;

48 aliasf = NULL;	53 aliasf = NULL;

49 numaliasm = 0;	54 numaliasm = 0;

50 aliasm = NULL;	55 aliasm = NULL;

	56 forbiddenword = FORBIDDENWORD; // forbidden word signing flag

51 #ifdef HUNSPELL_CHROME_CLIENT	57 #ifdef HUNSPELL_CHROME_CLIENT

52 // No tables to load, just the AF config.	58 // No tables to load, just the AF config.

53 int ec = load_config();	59 int ec = load_config();

54 #else	60 #else

55 load_config(aff_handle);	61 load_config(aff_handle);

56 int ec = load_tables(dic_handle);	62 int ec = load_tables(dic_handle, key);

57 #endif	63 #endif

58 if (ec) {	64 if (ec) {

59 /* error condition - what should we do here */	65 /* error condition - what should we do here */

60 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);	66 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);

61 if (tableptr) {	67 if (tableptr) {

62 free(tableptr);	68 free(tableptr);

63 tableptr = NULL;	69 tableptr = NULL;

64 }	70 }

65 tablesize = 0;	71 tablesize = 0;

66 }	72 }

67 }	73 }

68	74

69	75

70 HashMgr::~HashMgr()	76 HashMgr::~HashMgr()

71 {	77 {

72 if (tableptr) {	78 if (tableptr) {

73 // now pass through hash table freeing up everything	79 // now pass through hash table freeing up everything

74 // go through column by column of the table	80 // go through column by column of the table

75 for (int i=0; i < tablesize; i++) {	81 for (int i=0; i < tablesize; i++) {

76 struct hentry * pt = &tableptr[i];	82 struct hentry * pt = tableptr[i];

77 struct hentry * nt = NULL;	83 struct hentry * nt = NULL;

78 if (pt) {

79 if (pt->astr && !aliasf) free(pt->astr);

80 if (pt->word) free(pt->word);

81 #ifdef HUNSPELL_EXPERIMENTAL

82 if (pt->description && !aliasm) free(pt->description);

83 #endif

84 pt = pt->next;

85 }

86 while(pt) {	84 while(pt) {

87 nt = pt->next;	85 nt = pt->next;

88 if (pt->astr && !aliasf) free(pt->astr);	86 if (pt->astr && (!aliasf \|\| TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)) ) free(pt->astr);

89 if (pt->word) free(pt->word);

90 #ifdef HUNSPELL_EXPERIMENTAL

91 if (pt->description && !aliasm) free(pt->description);

92 #endif

93 free(pt);	87 free(pt);

94 pt = nt;	88 pt = nt;

95 }	89 }

96 }	90 }

97 free(tableptr);	91 free(tableptr);

98 tableptr = NULL;

99 }	92 }

100 tablesize = 0;	93 tablesize = 0;

101	94

102 if (aliasf) {	95 if (aliasf) {

103 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);	96 for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);

104 free(aliasf);	97 free(aliasf);

105 aliasf = NULL;	98 aliasf = NULL;

106 if (aliasflen) {	99 if (aliasflen) {

107 free(aliasflen);	100 free(aliasflen);

108 aliasflen = NULL;	101 aliasflen = NULL;

109 }	102 }

110 }	103 }

111 if (aliasm) {	104 if (aliasm) {

112 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);	105 for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);

113 free(aliasm);	106 free(aliasm);

114 aliasm = NULL;	107 aliasm = NULL;

115 }	108 }

	109

	110 #ifndef OPENOFFICEORG

	111 #ifndef MOZILLA_CLIENT

	112 if (utf8) free_utf_tbl();

	113 #endif

	114 #endif

	115

	116 if (enc) free(enc);

	117 if (lang) free(lang);

116	118

117 if (ignorechars) free(ignorechars);	119 if (ignorechars) free(ignorechars);

118 if (ignorechars_utf16) free(ignorechars_utf16);	120 if (ignorechars_utf16) free(ignorechars_utf16);

119	121

120 #ifdef HUNSPELL_CHROME_CLIENT	122 #ifdef HUNSPELL_CHROME_CLIENT

121 EmptyHentryCache();	123 EmptyHentryCache();

122 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();	124 for (std::vector<std::string*>::iterator it = pointer_to_strings_.begin();

123 it != pointer_to_strings_.end(); ++it) {	125 it != pointer_to_strings_.end(); ++it) {

124 delete *it;	126 delete *it;

125 }	127 }

(...skipping 11 matching lines...) Expand all Loading...
137 hentry* next = cur->next_homonym;	139 hentry* next = cur->next_homonym;

138 delete cur;	140 delete cur;

139 cur = next;	141 cur = next;

140 }	142 }

141 }	143 }

142 hentry_cache.clear();	144 hentry_cache.clear();

143 }	145 }

144 #endif	146 #endif

145	147

146 // lookup a root word in the hashtable	148 // lookup a root word in the hashtable

147

148 struct hentry * HashMgr::lookup(const char *word) const	149 struct hentry * HashMgr::lookup(const char *word) const

149 {	150 {

150 #ifdef HUNSPELL_CHROME_CLIENT	151 #ifdef HUNSPELL_CHROME_CLIENT

151 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];	152 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];

152 int affix_count = bdict_reader->FindWord(word, affix_ids);	153 int affix_count = bdict_reader->FindWord(word, affix_ids);

153 if (affix_count == 0) { // look for custom added word	154 if (affix_count == 0) { // look for custom added word

154 std::map<StringPiece, int>::const_iterator iter =	155 std::map<StringPiece, int>::const_iterator iter =

155 custom_word_to_affix_id_map_.find(word);	156 custom_word_to_affix_id_map_.find(word);

156 if (iter != custom_word_to_affix_id_map_.end()) {	157 if (iter != custom_word_to_affix_id_map_.end()) {

157 affix_count = 1;	158 affix_count = 1;

158 affix_ids[0] = iter->second;	159 affix_ids[0] = iter->second;

159 }	160 }

160 }	161 }

161	162

162 static const int kMaxWordLen = 128;	163 static const int kMaxWordLen = 128;

163 static char word_buf[kMaxWordLen];	164 static char word_buf[kMaxWordLen];

164 strncpy(word_buf, word, kMaxWordLen);	165 strncpy(word_buf, word, kMaxWordLen);

165	166

166 return AffixIDsToHentry(word_buf, affix_ids, affix_count);	167 return AffixIDsToHentry(word_buf, affix_ids, affix_count);

167 #else	168 #else

168 struct hentry * dp;	169 struct hentry * dp;

169 if (tableptr) {	170 if (tableptr) {

170 dp = &tableptr[hash(word)];	171 dp = tableptr[hash(word)];

171 if (dp->word == NULL) return NULL;	172 if (!dp) return NULL;

172 for ( ; dp != NULL; dp = dp->next) {	173 for ( ; dp != NULL; dp = dp->next) {

173 if (strcmp(word,dp->word) == 0) return dp;	174 if (strcmp(word,&(dp->word)) == 0) return dp;

174 }	175 }

175 }	176 }

176 return NULL;	177 return NULL;

177 #endif	178 #endif

178 }	179 }

179	180

180 // add a word to the hash table (private)	181 // add a word to the hash table (private)

181	182 int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,

182 int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, c onst char * desc)	183 int al, const char * desc, bool onlyupcase)

183 {	184 {

184 #ifndef HUNSPELL_CHROME_CLIENT	185 #ifndef HUNSPELL_CHROME_CLIENT

185 char * st = mystrdup(word);	186 bool upcasehomonym = false;

186 if (wl && !st) return 1;	187 int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;

	188 // variable-length hash record with word and optional fields

	189 struct hentry* hp =

	190 » (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);

	191 if (!hp) return 1;

	192 char * hpw = &(hp->word);

	193 strcpy(hpw, word);

187 if (ignorechars != NULL) {	194 if (ignorechars != NULL) {

188 if (utf8) {	195 if (utf8) {

189 remove_ignored_chars_utf(st, ignorechars_utf16, ignorechars_utf16_len);	196 remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);

190 } else {	197 } else {

191 remove_ignored_chars(st, ignorechars);	198 remove_ignored_chars(hpw, ignorechars);

192 }	199 }

193 }	200 }

194 if (complexprefixes) {	201 if (complexprefixes) {

195 if (utf8) reverseword_utf(st); else reverseword(st);	202 if (utf8) reverseword_utf(hpw); else reverseword(hpw);

196 }	203 }

197 int i = hash(st);	204

198 struct hentry * dp = &tableptr[i];	205 int i = hash(hpw);

199 if (dp->word == NULL) {	206

200 dp->wlen = (short) wl;	207 hp->blen = (unsigned char) wbl;

201 dp->alen = (short) al;	208 hp->clen = (unsigned char) wcl;

202 dp->word = st;	209 hp->alen = (short) al;

203 dp->astr = aff;	210 hp->astr = aff;

204 dp->next = NULL;	211 hp->next = NULL;

205 dp->next_homonym = NULL;	212 hp->next_homonym = NULL;

206 #ifdef HUNSPELL_EXPERIMENTAL	213

207 if (aliasm) {	214 // store the description string or its pointer

208 dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);	215 if (desc) {

209 } else {	216 hp->var = H_OPT;

210 dp->description = mystrdup(desc);	217 if (aliasm) {

211 if (desc && !dp->description) return 1;	218 hp->var += H_OPT_ALIASM;

212 if (dp->description && complexprefixes) {	219 store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));

213 if (utf8) reverseword_utf(dp->description); else reverseword(dp- >description);	220 } else {

	221 » strcpy(hpw + wbl + 1, desc);

	222 if (complexprefixes) {

	223 if (utf8) reverseword_utf(HENTRY_DATA(hp));

	224 else reverseword(HENTRY_DATA(hp));

	225 }

	226 }

	227 » if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;

	228 } else hp->var = 0;

	229

	230 struct hentry * dp = tableptr[i];

	231 if (!dp) {

	232 tableptr[i] = hp;

	233 return 0;

	234 }

	235 while (dp->next != NULL) {

	236 if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) {

	237 » // remove hidden onlyupcase homonym

	238 if (!onlyupcase) {

	239 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

	240 » » free(dp->astr);

	241 » » dp->astr = hp->astr;

	242 » » dp->alen = hp->alen;

	243 » » free(hp);

	244 » » return 0;

	245 » » } else {

	246 » » dp->next_homonym = hp;

	247 » » }

	248 } else {

	249 » upcasehomonym = true;

	250 }

	251 }

	252 dp=dp->next;

	253 }

	254 if (strcmp(&(hp->word), &(dp->word)) == 0) {

	255 » // remove hidden onlyupcase homonym

	256 if (!onlyupcase) {

	257 » » if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {

	258 » » free(dp->astr);

	259 » » dp->astr = hp->astr;

	260 » » dp->alen = hp->alen;

	261 » » free(hp);

	262 » » return 0;

	263 » » } else {

	264 » » dp->next_homonym = hp;

	265 » » }

	266 } else {

	267 » upcasehomonym = true;

214 }	268 }

215 }	269 }

216 #endif	270 if (!upcasehomonym) {

217 } else {	271 » dp->next = hp;

218 struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));

219 if (!hp) return 1;

220 hp->wlen = (short) wl;

221 hp->alen = (short) al;

222 hp->word = st;

223 hp->astr = aff;

224 hp->next = NULL;

225 hp->next_homonym = NULL;

226 #ifdef HUNSPELL_EXPERIMENTAL

227 if (aliasm) {

228 hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);

229 } else {	272 } else {

230 hp->description = mystrdup(desc);	273 » // remove hidden onlyupcase homonym

231 if (desc && !hp->description) return 1;	274 » if (hp->astr) free(hp->astr);

232 if (dp->description && complexprefixes) {	275 » free(hp);

233 if (utf8) reverseword_utf(hp->description); else reverseword(hp- >description);

234 }

235 }	276 }

236 #endif

237 while (dp->next != NULL) {

238 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_ homonym = hp;

239 dp=dp->next;

240 }

241 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_ho monym = hp;

242 dp->next = hp;

243 }

244 #endif // HUNSPELL_CHROME_CLIENT	277 #endif // HUNSPELL_CHROME_CLIENT

245 std::map<StringPiece, int>::iterator iter =	278 std::map<StringPiece, int>::iterator iter =

246 custom_word_to_affix_id_map_.find(word);	279 custom_word_to_affix_id_map_.find(word);

247 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added	280 if(iter == custom_word_to_affix_id_map_.end()) { // word needs to be added

248 std::string* new_string_word = new std::string(word);	281 std::string* new_string_word = new std::string(word);

249 pointer_to_strings_.push_back(new_string_word);	282 pointer_to_strings_.push_back(new_string_word);

250 StringPiece sp(*(new_string_word));	283 StringPiece sp(*(new_string_word));

251 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words	284 custom_word_to_affix_id_map_[sp] = 0; // no affixes for custom words

252 return 1;	285 return 1;

253 }	286 }

254	287

255 return 0;	288 return 0;

256 }	289 }

257	290

258 // add a custom dic. word to the hash table (public)	291 int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,

259 int HashMgr::put_word(const char * word, int wl, char * aff)	292 unsigned short * flags, int al, char * dp, int captype)

260 {	293 {

261 unsigned short * flags;	294 // add inner capitalized forms to handle the following allcap forms:

262 int al = 0;	295 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG

263 if (aff) {	296 // Allcaps with suffixes: CIA's -> CIA'S

264 al = decode_flags(&flags, aff);	297 if (((captype == HUHCAP) \|\| (captype == HUHINITCAP) \|\|

265 flag_qsort(flags, 0, al);	298 ((captype == ALLCAP) && (flags != NULL))) &&

266 } else {	299 !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {

267 flags = NULL;	300 unsigned short * flags2 = (unsigned short ) malloc (sizeof(unsigned s hort) (al+1));

	301 » if (!flags2) return 1;

	302 if (al) memcpy(flags2, flags, al * sizeof(unsigned short));

	303 flags2[al] = ONLYUPCASEFLAG;

	304 if (utf8) {

	305 char st[BUFSIZE];

	306 w_char w[BUFSIZE];

	307 int wlen = u8_u16(w, BUFSIZE, word);

	308 mkallsmall_utf(w, wlen, langnum);

	309 mkallcap_utf(w, 1, langnum);

	310 u16_u8(st, BUFSIZE, w, wlen);

	311 return add_word(st,wbl,wcl,flags2,al+1,dp, true);

	312 } else {

	313 mkallsmall(word, csconv);

	314 mkinitcap(word, csconv);

	315 return add_word(word,wbl,wcl,flags2,al+1,dp, true);

	316 }

268 }	317 }

269 add_word(word, wl, flags, al, NULL);

270 return 0;	318 return 0;

271 }	319 }

272	320

273 int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)	321 // detect captype and modify word length for UTF-8 encoding

	322 int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {

	323 int len;

	324 if (utf8) {

	325 w_char dest_utf[BUFSIZE];

	326 len = u8_u16(dest_utf, BUFSIZE, word);

	327 *captype = get_captype_utf8(dest_utf, len, langnum);

	328 } else {

	329 len = wbl;

	330 captype = get_captype((char ) word, len, csconv);

	331 }

	332 return len;

	333 }

	334

	335 // remove word (personal dictionary function for standalone applications)

	336 int HashMgr::remove(const char * word)

274 {	337 {

275 unsigned short * flags;	338 struct hentry * dp = lookup(word);

276 struct hentry * dp = lookup(pattern);	339 while (dp) {

277 if (!dp \|\| !dp->astr) return 1;	340 if (dp->alen == 0 \|\| !TESTAFF(dp->astr, forbiddenword, dp->alen)) {

278 flags = (unsigned short ) malloc (dp->alen sizeof(short));	341 unsigned short * flags =

279 memcpy((void ) flags, (void ) dp->astr, dp->alen * sizeof(short));	342 (unsigned short ) malloc(sizeof(short ) * (dp->alen + 1));

280 add_word(word, wl, flags, dp->alen, NULL);	343 if (!flags) return 1;

	344 for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];

	345 flags[dp->alen] = forbiddenword;

	346 dp->astr = flags;

	347 dp->alen++;

	348 flag_qsort(flags, 0, dp->alen);

	349 }

	350 dp = dp->next_homonym;

	351 }

281 return 0;	352 return 0;

282 }	353 }

283	354

	355 /* remove forbidden flag to add a personal word to the hash */

	356 int HashMgr::remove_forbidden_flag(const char * word) {

	357 struct hentry * dp = lookup(word);

	358 if (!dp) return 1;

	359 while (dp) {

	360 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {

	361 if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.

	362 else {

	363 unsigned short * flags2 =

	364 (unsigned short ) malloc(sizeof(short ) * (dp->alen - 1));

	365 if (!flags2) return 1;

	366 int i, j = 0;

	367 for (i = 0; i < dp->alen; i++) {

	368 if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];

	369 }

	370 dp->alen--;

	371 dp->astr = flags2; // XXX allowed forbidden words

	372 }

	373 }

	374 dp = dp->next_homonym;

	375 }

	376 return 0;

	377 }

	378

	379 // add a custom dic. word to the hash table (public)

	380 int HashMgr::add(const char * word)

	381 {

	382 unsigned short * flags = NULL;

	383 int al = 0;

	384 if (remove_forbidden_flag(word)) {

	385 int captype;

	386 int wbl = strlen(word);

	387 int wcl = get_clen_and_captype(word, wbl, &captype);

	388 add_word(word, wbl, wcl, flags, al, NULL, false);

	389 return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, N ULL, captype);

	390 }

	391 return 0;

	392 }

	393

	394 int HashMgr::add_with_affix(const char * word, const char * example)

	395 {

	396 // detect captype and modify word length for UTF-8 encoding

	397 struct hentry * dp = lookup(example);

	398 remove_forbidden_flag(word);

	399 if (dp && dp->astr) {

	400 int captype;

	401 int wbl = strlen(word);

	402 int wcl = get_clen_and_captype(word, wbl, &captype);

	403 if (aliasf) {

	404 add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);

	405 } else {

	406 unsigned short * flags = (unsigned short ) malloc (dp->alen sizeo f(short));

	407 if (flags) {

	408 memcpy((void ) flags, (void ) dp->astr, dp->alen * sizeof(shor t));

	409 add_word(word, wbl, wcl, flags, dp->alen, NULL, false);

	410 } else return 1;

	411 }

	412 return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp ->alen, NULL, captype);

	413 }

	414 return 1;

	415 }

	416

284 // walk the hash table entry by entry - null at end	417 // walk the hash table entry by entry - null at end

	418 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);

285 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const	419 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const

286 {	420 {

287 #ifdef HUNSPELL_CHROME_CLIENT	421 #ifdef HUNSPELL_CHROME_CLIENT

288 // This function creates a new hentry if NULL is passed as hp. It also takes	422 // This function creates a new hentry if NULL is passed as hp. It also takes

289 // the responsibility of deleting the pointer hp when walk is over.	423 // the responsibility of deleting the pointer hp when walk is over.

290	424

291 // This function is only ever called by one place and not nested. We can	425 // This function is only ever called by one place and not nested. We can

292 // therefore keep static state between calls and use \|col\| as a "reset" flag	426 // therefore keep static state between calls and use \|col\| as a "reset" flag

293 // to avoid changing the API. It is set to -1 for the first call.	427 // to avoid changing the API. It is set to -1 for the first call.

294 static hunspell::WordIterator word_iterator =	428 static hunspell::WordIterator word_iterator =

(...skipping 10 matching lines...) Expand all Loading...
305 if (affix_count == 0) {	439 if (affix_count == 0) {

306 delete hp;	440 delete hp;

307 return NULL;	441 return NULL;

308 }	442 }

309 short word_len = static_cast<short>(strlen(word));	443 short word_len = static_cast<short>(strlen(word));

310	444

311 // For now, just re-compute the \|hp\| and return it. No need to create linked	445 // For now, just re-compute the \|hp\| and return it. No need to create linked

312 // lists for the extra affixes. If hp is NULL, create it here.	446 // lists for the extra affixes. If hp is NULL, create it here.

313 if (!hp)	447 if (!hp)

314 hp = new hentry;	448 hp = new hentry;

315 hp->word = word;	449 hp->word = *word;

316 hp->wlen = word_len;	450 hp->blen = word_len;

317 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0],	451 hp->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[0],

318 &hp->astr);	452 &hp->astr);

319 hp->next = NULL;	453 hp->next = NULL;

320 hp->next_homonym = NULL;	454 hp->next_homonym = NULL;

321	455 hp->var = 0;

	456 hp->clen = 0;

322 return hp;	457 return hp;

323 #else	458 #else

324 //reset to start	459 »

325 if ((col < 0) \|\| (hp == NULL)) {	460 if (hp && hp->next != NULL) return hp->next;

326 col = -1;	461 for (col++; col < tablesize; col++) {

327 hp = NULL;	462 if (tableptr[col]) return tableptr[col];

328 }	463 }

329	464 // null at end and reset to start

330 if (hp && hp->next != NULL) {	465 col = -1;

331 hp = hp->next;	466 return NULL;

332 } else {

333 col++;

334 hp = (col < tablesize) ? &tableptr[col] : NULL;

335 // search for next non-blank column entry

336 while (hp && (hp->word == NULL)) {

337 col ++;

338 hp = (col < tablesize) ? &tableptr[col] : NULL;

339 }

340 if (col < tablesize) return hp;

341 hp = NULL;

342 col = -1;

343 }

344 return hp;

345 #endif	467 #endif

346 }	468 }

347	469

348 // load a munched word list and build a hash table on the fly	470 // load a munched word list and build a hash table on the fly

349 int HashMgr::load_tables(FILE* t_handle)	471 int HashMgr::load_tables(FILE* t_handle, const char * key)

350 {	472 {

351 #ifndef HUNSPELL_CHROME_CLIENT	473 #ifndef HUNSPELL_CHROME_CLIENT

352 int wl, al;	474 int al;

353 char * ap;	475 char * ap;

354 char * dp;	476 char * dp;

	477 char * dp2;

355 unsigned short * flags;	478 unsigned short * flags;

	479 char * ts;

356	480

357 // raw dictionary - munched file	481 // open dictionary file

358 FILE * rawdict = _fdopen(_dup(_fileno(t_handle)), "r");	482 FileMgr * dict = new FileMgr(tpath, key);

359 if (rawdict == NULL) return 1;	483 if (dict == NULL) return 1;

360 fseek(rawdict, 0, SEEK_SET);

361	484

362 // first read the first line of file to get hash table size */	485 // first read the first line of file to get hash table size */

363 char ts[MAXDELEN];	486 if (!(ts = dict->getline())) {

364 if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;	487 HUNSPELL_WARNING(stderr, "error: empty dic file\n");

	488 delete dict;

	489 return 2;

	490 }

365 mychomp(ts);	491 mychomp(ts);

366	492

367 /* remove byte order mark */	493 /* remove byte order mark */

368 if (strncmp(ts,"\xef\xbb\xbf",3) == 0) {	494 if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {

369 memmove(ts, ts+3, strlen(ts+3)+1);	495 memmove(ts, ts+3, strlen(ts+3)+1);

370 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos sible incompatibility with old Hunspell versions\n");	496 HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: pos sible incompatibility with old Hunspell versions\n");

371 }	497 }

372	498

373 if ((ts < '1') \|\| (ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");

374 tablesize = atoi(ts);	499 tablesize = atoi(ts);

375 if (!tablesize) return 4;	500 if (tablesize == 0) {

	501 HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the di c file\n");

	502 delete dict;

	503 return 4;

	504 }

376 tablesize = tablesize + 5 + USERWORD;	505 tablesize = tablesize + 5 + USERWORD;

377 if ((tablesize %2) == 0) tablesize++;	506 if ((tablesize %2) == 0) tablesize++;

378	507

379 // allocate the hash table	508 // allocate the hash table

380 tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));	509 tableptr = (struct hentry *) malloc(tablesize sizeof(struct hentry *));

381 if (! tableptr) return 3;	510 if (! tableptr) {

382 for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;	511 delete dict;

	512 return 3;

	513 }

	514 for (int i=0; i<tablesize; i++) tableptr[i] = NULL;

383	515

384 // loop through all words on much list and add to hash	516 // loop through all words on much list and add to hash

385 // table and create word and affix strings	517 // table and create word and affix strings

386	518

387 while (fgets(ts,MAXDELEN-1,rawdict)) {	519 while ((ts = dict->getline())) {

388 mychomp(ts);	520 mychomp(ts);

389 // split each line into word and morphological description	521 // split each line into word and morphological description

390 dp = strchr(ts,'\t');	522 dp = ts;

	523 while ((dp = strchr(dp, ':'))) {

	524 » if ((dp > ts + 3) && ((dp - 3) == ' ' \|\| (dp - 3) == '\t')) {

	525 » for (dp -= 4; dp >= ts && (dp == ' ' \|\| dp == '\t'); dp--);

	526 » if (dp < ts) { // missing word

	527 » » dp = NULL;

	528 » } else {

	529 » » *(dp + 1) = '\0';

	530 » » dp = dp + 2;

	531 » }

	532 » break;

	533 » }

	534 » dp++;

	535 }

391	536

392 if (dp) {	537 // tabulator is the old morphological field separator

393 *dp = '\0';	538 dp2 = strchr(ts, '\t');

394 dp++;	539 if (dp2 && (!dp \|\| dp2 < dp)) {

395 } else {	540 » *dp2 = '\0';

396 dp = NULL;	541 » dp = dp2 + 1;

397 }	542 }

398	543

399 // split each line into word and affix char strings	544 // split each line into word and affix char strings

400 // "\/" signs slash in words (not affix separator)	545 // "\/" signs slash in words (not affix separator)

401 // "/" at beginning of the line is word character (not affix separator)	546 // "/" at beginning of the line is word character (not affix separator)

402 ap = strchr(ts,'/');	547 ap = strchr(ts,'/');

403 while (ap) {	548 while (ap) {

404 if (ap == ts) {	549 if (ap == ts) {

405 ap++;	550 ap++;

406 continue;	551 continue;

407 } else if (*(ap - 1) != '\\') break;	552 } else if (*(ap - 1) != '\\') break;

408 // replace "\/" with "/"	553 // replace "\/" with "/"

409 for (char * sp = ap - 1; sp; sp = *(sp + 1), sp++);	554 for (char * sp = ap - 1; sp; sp = *(sp + 1), sp++);

410 ap = strchr(ap,'/');	555 ap = strchr(ap,'/');

411 }	556 }

412	557

413 if (ap) {	558 if (ap) {

414 *ap = '\0';	559 *ap = '\0';

415 if (aliasf) {	560 if (aliasf) {

416 int index = atoi(ap + 1);	561 int index = atoi(ap + 1);

417 al = get_aliasf(index, &flags);	562 al = get_aliasf(index, &flags, dict);

418 if (!al) {	563 if (!al) {

419 HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts);	564 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());

420 *ap = '\0';	565 *ap = '\0';

421 }	566 }

422 } else {	567 } else {

423 al = decode_flags(&flags, ap + 1);	568 al = decode_flags(&flags, ap + 1, dict);

424 flag_qsort(flags, 0, al);	569 flag_qsort(flags, 0, al);

425 }	570 }

426 } else {	571 } else {

427 al = 0;	572 al = 0;

428 ap = NULL;	573 ap = NULL;

429 flags = NULL;	574 flags = NULL;

430 }	575 }

431	576

432 wl = strlen(ts);	577 int captype;

	578 int wbl = strlen(ts);

	579 int wcl = get_clen_and_captype(ts, wbl, &captype);

	580 // add the word and its index plus its capitalized form optionally

	581 if (add_word(ts,wbl,wcl,flags,al,dp, false) \|\|

	582 » add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {

	583 » delete dict;

	584 » return 5;

	585 }

	586 }

433	587

434 // add the word and its index	588 delete dict;

435 if (add_word(ts,wl,flags,al,dp)) return 5;

436

437 }

438

439 fclose(rawdict);

440 #endif	589 #endif

441 return 0;	590 return 0;

442 }	591 }

443	592

444

445 // the hash function is a simple load and rotate	593 // the hash function is a simple load and rotate

446 // algorithm borrowed	594 // algorithm borrowed

447	595

448 int HashMgr::hash(const char * word) const	596 int HashMgr::hash(const char * word) const

449 {	597 {

450 #ifdef HUNSPELL_CHROME_CLIENT	598 #ifdef HUNSPELL_CHROME_CLIENT

451 return 0;	599 return 0;

452 #else	600 #else

453 long hv = 0;	601 long hv = 0;

454 for (int i=0; i < 4 && *word != 0; i++)	602 for (int i=0; i < 4 && *word != 0; i++)

455 hv = (hv << 8) \| (*word++);	603 hv = (hv << 8) \| (*word++);

456 while (*word != 0) {	604 while (*word != 0) {

457 ROTATE(hv,ROTATE_LEN);	605 ROTATE(hv,ROTATE_LEN);

458 hv ^= (*word++);	606 hv ^= (*word++);

459 }	607 }

460 return (unsigned long) hv % tablesize;	608 return (unsigned long) hv % tablesize;

461 #endif	609 #endif

462 }	610 }

463	611

464 int HashMgr::decode_flags(unsigned short ** result, char * flags) {	612 int HashMgr::decode_flags(unsigned short ** result, char * flags) {

465 int len;	613 int len;

466 switch (flag_mode) {	614 switch (flag_mode) {

467 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)	615 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)

468 len = strlen(flags);	616 len = strlen(flags);

469 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG fla gvector is odd: %s\n", flags);	617 if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: bad flagvector\n");

470 len = len/2;	618 len /= 2;

471 result = (unsigned short ) malloc(len * sizeof(short));	619 result = (unsigned short ) malloc(len * sizeof(short));

	620 if (!*result) return -1;

472 for (int i = 0; i < len; i++) {	621 for (int i = 0; i < len; i++) {

473 (result)[i] = (((unsigned short) flags[i 2]) << 8) + (unsigned sh ort) flags[i * 2 + 1];	622 (result)[i] = (((unsigned short) flags[i 2]) << 8) + (unsigned sh ort) flags[i * 2 + 1];

474 }	623 }

475 break;	624 break;

476 }	625 }

477 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452 1 23 233)	626 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 452 1 23 233)

	627 int i;

478 len = 1;	628 len = 1;

479 char * src = flags;	629 char * src = flags;

480 unsigned short * dest;	630 unsigned short * dest;

481 char * p;	631 char * p;

482 for (p = flags; *p; p++) {	632 for (p = flags; *p; p++) {

483 if (*p == ',') len++;	633 if (*p == ',') len++;

484 }	634 }

485 result = (unsigned short ) malloc(len * sizeof(short));	635 result = (unsigned short ) malloc(len * sizeof(short));

	636 if (!*result) return -1;

486 dest = *result;	637 dest = *result;

487 for (p = flags; *p; p++) {	638 for (p = flags; *p; p++) {

488 if (*p == ',') {	639 if (*p == ',') {

489 *dest = (unsigned short) atoi(src);	640 i = atoi(src);

	641 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d i s too large (max: %d)\n", i, DEFAULTFLAGS - 1);

	642 *dest = (unsigned short) i;

490 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\ n");	643 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\ n");

491 src = p + 1;	644 src = p + 1;

492 dest++;	645 dest++;

493 }	646 }

494 }	647 }

495 *dest = (unsigned short) atoi(src);	648 i = atoi(src);

	649 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to o large (max: %d)\n", i, DEFAULTFLAGS - 1);

	650 *dest = (unsigned short) i;

496 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");	651 if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

497 break;	652 break;

498 }	653 }

499 case FLAG_UNI: { // UTF-8 characters	654 case FLAG_UNI: { // UTF-8 characters

500 w_char w[MAXDELEN/2];	655 w_char w[BUFSIZE/2];

501 len = u8_u16(w, MAXDELEN/2, flags);	656 len = u8_u16(w, BUFSIZE/2, flags);

502 result = (unsigned short ) malloc(len * sizeof(short));	657 result = (unsigned short ) malloc(len * sizeof(short));

	658 if (!*result) return -1;

503 memcpy(result, w, len sizeof(short));	659 memcpy(result, w, len sizeof(short));

504 break;	660 break;

505 }	661 }

506 default: { // Ispell's one-character flags (erfg -> e r f g)	662 default: { // Ispell's one-character flags (erfg -> e r f g)

507 unsigned short * dest;	663 unsigned short * dest;

508 len = strlen(flags);	664 len = strlen(flags);

509 result = (unsigned short ) malloc(len * sizeof(short));	665 result = (unsigned short ) malloc(len * sizeof(short));

	666 if (!*result) return -1;

510 dest = *result;	667 dest = *result;

511 for (unsigned char * p = (unsigned char ) flags; p; p++) {	668 for (unsigned char * p = (unsigned char ) flags; p; p++) {

512 dest = (unsigned short) p;	669 dest = (unsigned short) p;

513 dest++;	670 dest++;

514 }	671 }

515 }	672 }

516 }	673 }

517 return len;	674 return len;

518 }	675 }

519	676

520 unsigned short HashMgr::decode_flag(const char * f) {	677 unsigned short HashMgr::decode_flag(const char * f) {

521 unsigned short s = 0;	678 unsigned short s = 0;

	679 int i;

522 switch (flag_mode) {	680 switch (flag_mode) {

523 case FLAG_LONG:	681 case FLAG_LONG:

524 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];	682 s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];

525 break;	683 break;

526 case FLAG_NUM:	684 case FLAG_NUM:

527 s = (unsigned short) atoi(f);	685 i = atoi(f);

	686 if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is to o large (max: %d)\n", i, DEFAULTFLAGS - 1);

	687 s = (unsigned short) i;

528 break;	688 break;

529 case FLAG_UNI:	689 case FLAG_UNI:

530 u8_u16((w_char *) &s, 1, f);	690 u8_u16((w_char *) &s, 1, f);

531 break;	691 break;

532 default:	692 default:

533 s = (unsigned short) ((unsigned char )f);	693 s = (unsigned short) ((unsigned char )f);

534 }	694 }

535 if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");	695 if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");

536 return s;	696 return s;

537 }	697 }

538	698

539 char * HashMgr::encode_flag(unsigned short f) {	699 char * HashMgr::encode_flag(unsigned short f) {

540 unsigned char ch[10];	700 unsigned char ch[10];

541 if (f==0) return mystrdup("(NULL)");	701 if (f==0) return mystrdup("(NULL)");

542 if (flag_mode == FLAG_LONG) {	702 if (flag_mode == FLAG_LONG) {

543 ch[0] = (unsigned char) (f >> 8);	703 ch[0] = (unsigned char) (f >> 8);

544 ch[1] = (unsigned char) (f - ((f >> 8) << 8));	704 ch[1] = (unsigned char) (f - ((f >> 8) << 8));

545 ch[2] = '\0';	705 ch[2] = '\0';

(...skipping 16 matching lines...) Expand all Loading...
562 // Read in the regular commands from the affix file. We care about the FLAG	722 // Read in the regular commands from the affix file. We care about the FLAG

563 // line becuase the AF lines depend on this value, and the IGNORE line.	723 // line becuase the AF lines depend on this value, and the IGNORE line.

564 // The rest of the commands will be read by the affix manager.	724 // The rest of the commands will be read by the affix manager.

565 char line[MAXDELEN+1];	725 char line[MAXDELEN+1];

566 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();	726 hunspell::LineIterator iterator = bdict_reader->GetOtherLineIterator();

567 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {	727 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {

568 // Parse in the ignored characters (for example, Arabic optional	728 // Parse in the ignored characters (for example, Arabic optional

569 // diacritics characters.	729 // diacritics characters.

570 if (strncmp(line,"IGNORE",6) == 0) {	730 if (strncmp(line,"IGNORE",6) == 0) {

571 parse_array(line, &ignorechars, &ignorechars_utf16,	731 parse_array(line, &ignorechars, &ignorechars_utf16,

572 &ignorechars_utf16_len, "IGNORE", utf8);	732 &ignorechars_utf16_len, utf8, 0);

573 }	733 }

574 // Retrieve the format of an AF line.	734 // Retrieve the format of an AF line.

575 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {	735 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {

576 if (strstr(line, "long")) flag_mode = FLAG_LONG;	736 if (strstr(line, "long")) flag_mode = FLAG_LONG;

577 if (strstr(line, "num")) flag_mode = FLAG_NUM;	737 if (strstr(line, "num")) flag_mode = FLAG_NUM;

578 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;	738 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;

579 }	739 }

580 }	740 }

581	741

582 // Read in all the AF lines which tell us the rules for each affix group ID.	742 // Read in all the AF lines which tell us the rules for each affix group ID.

583 iterator = bdict_reader->GetAfLineIterator();	743 iterator = bdict_reader->GetAfLineIterator();

584 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {	744 while (iterator.AdvanceAndCopy(line, MAXDELEN)) {

585 int rv = parse_aliasf(line, &iterator);	745 int rv = parse_aliasf(line, &iterator);

586 if (rv)	746 if (rv)

587 return rv;	747 return rv;

588 }	748 }

589	749

590 return 0;	750 return 0;

591 }	751 }

592 #else	752 #else

593 // read in aff file and set flag mode	753 // read in aff file and set flag mode

594 int HashMgr::load_config(FILE* aff_handle)	754 int HashMgr::load_config(FILE* aff_handle, const char * key)

595 {	755 {

	756 char * line; // io buffers

596 int firstline = 1;	757 int firstline = 1;

597

598 // io buffers

599 char line[MAXDELEN+1];

600	758

601 // open the affix file	759 // open the affix file

602 FILE * afflst;	760 FileMgr * afflst = new FileMgr(affpath, key);

603 afflst = _fdopen(_dup(_fileno(aff_handle)), "r");

604 if (!afflst) {	761 if (!afflst) {

605 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");	762 HUNSPELL_WARNING(stderr, "Error - could not open affix description file\n");

606 return 1;	763 return 1;

607 }	764 }

608 fseek(afflst, 0, SEEK_SET);

609	765

610 // read in each line ignoring any that do not	766 // read in each line ignoring any that do not

611 // start with a known line type indicator	767 // start with a known line type indicator

612	768

613 while (fgets(line,MAXDELEN,afflst)) {	769 while ((line = afflst->getline())) {

614 mychomp(line);	770 mychomp(line);

615	771

616 /* remove byte order mark */	772 /* remove byte order mark */

617 if (firstline) {	773 if (firstline) {

618 firstline = 0;	774 firstline = 0;

619 if (strncmp(line,"\xef\xbb\xbf",3) == 0) memmove(line, line+3, strlen(l ine+3)+1);	775 if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(l ine+3)+1);

620 }	776 }

621	777

622 /* parse in the try string */	778 /* parse in the try string */

623 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {	779 if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {

624 if (flag_mode != FLAG_CHAR) {	780 if (flag_mode != FLAG_CHAR) {

625 HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n");	781 HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions o f the FLAG affix file parameter\n", afflst->getlinenum());

626 }	782 }

627 if (strstr(line, "long")) flag_mode = FLAG_LONG;	783 if (strstr(line, "long")) flag_mode = FLAG_LONG;

628 if (strstr(line, "num")) flag_mode = FLAG_NUM;	784 if (strstr(line, "num")) flag_mode = FLAG_NUM;

629 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;	785 if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;

630 if (flag_mode == FLAG_CHAR) {	786 if (flag_mode == FLAG_CHAR) {

631 HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF -8' parameter: %s\n", line);	787 HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `lon g' or `UTF-8' parameter\n", afflst->getlinenum());

632 }	788 }

633 }	789 }

634 if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UT F-8")) utf8 = 1;	790 if (strncmp(line,"FORBIDDENWORD",13) == 0) {

	791 char * st = NULL;

	792 if (parse_string(line, &st, afflst->getlinenum())) {

	793 delete afflst;

	794 return 1;

	795 }

	796 forbiddenword = decode_flag(st);

	797 free(st);

	798 }

	799 if (strncmp(line, "SET", 3) == 0) {

	800 » if (parse_string(line, &enc, afflst->getlinenum())) {

	801 delete afflst;

	802 return 1;

	803 } »

	804 » if (strcmp(enc, "UTF-8") == 0) {

	805 » utf8 = 1;

	806 #ifndef OPENOFFICEORG

	807 #ifndef MOZILLA_CLIENT

	808 » initialize_utf_tbl();

	809 #endif

	810 #endif

	811 » } else csconv = get_current_cs(enc);

	812 » }

	813 if (strncmp(line, "LANG", 4) == 0) {

	814 » if (parse_string(line, &lang, afflst->getlinenum())) {

	815 delete afflst;

	816 return 1;

	817 } »

	818 » langnum = get_lang_num(lang);

	819 » }

635	820

636 /* parse in the ignored characters (for example, Arabic optional diacriti cs characters */	821 /* parse in the ignored characters (for example, Arabic optional diacriti cs characters */

637 if (strncmp(line,"IGNORE",6) == 0) {	822 if (strncmp(line,"IGNORE",6) == 0) {

638 if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_u tf16_len, "IGNORE", utf8)) {	823 if (parse_array(line, &ignorechars, &ignorechars_utf16,

639 fclose(afflst);	824 &ignorechars_utf16_len, utf8, afflst->getlinenum())) {

	825 delete afflst;

640 return 1;	826 return 1;

641 }	827 }

642 }	828 }

643	829

644 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {	830 if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {

645 if (parse_aliasf(line, afflst)) {	831 if (parse_aliasf(line, afflst)) {

646 fclose(afflst);	832 delete afflst;

647 return 1;	833 return 1;

648 }	834 }

649 }	835 }

650	836

651 #ifdef HUNSPELL_EXPERIMENTAL

652 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {	837 if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {

653 if (parse_aliasm(line, afflst)) {	838 if (parse_aliasm(line, afflst)) {

654 fclose(afflst);	839 delete afflst;

655 return 1;	840 return 1;

656 }	841 }

657 }	842 }

658 #endif	843

659 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;	844 if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;

660 if (((strncmp(line,"SFX",3) == 0) \|\| (strncmp(line,"PFX",3) == 0)) && is space(line[3])) break;	845 if (((strncmp(line,"SFX",3) == 0) \|\| (strncmp(line,"PFX",3) == 0)) && iss pace(line[3])) break;

661 }	846 }

662 fclose(afflst);	847 if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);

	848 delete afflst;

663 return 0;	849 return 0;

664 }	850 }

665 #endif // HUNSPELL_CHROME_CLIENT	851 #endif // HUNSPELL_CHROME_CLIENT

666	852

667 /* parse in the ALIAS table */	853 /* parse in the ALIAS table */

668 #ifdef HUNSPELL_CHROME_CLIENT	854 #ifdef HUNSPELL_CHROME_CLIENT

669 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator)	855 int HashMgr::parse_aliasf(char* line, hunspell::LineIterator* iterator)

670 {	856 {

671 #else	857 #else

672 int HashMgr::parse_aliasf(char * line, FILE * af)	858 int HashMgr::parse_aliasf(char * line, FileMgr * af)

673 {	859 {

674 #endif	860 #endif

675 if (numaliasf != 0) {	861 if (numaliasf != 0) {

676 HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tabl es used\n");	862 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");

677 return 1;	863 return 1;

678 }	864 }

679 char * tp = line;	865 char * tp = line;

680 char * piece;	866 char * piece;

681 int i = 0;	867 int i = 0;

682 int np = 0;	868 int np = 0;

683 piece = mystrsep(&tp, 0);	869 piece = mystrsep(&tp, 0);

684 while (piece) {	870 while (piece) {

685 if (*piece != '\0') {	871 if (*piece != '\0') {

686 switch(i) {	872 switch(i) {

687 case 0: { np++; break; }	873 case 0: { np++; break; }

688 case 1: {	874 case 1: {

689 numaliasf = atoi(piece);	875 numaliasf = atoi(piece);

690 if (numaliasf < 1) {	876 if (numaliasf < 1) {

691 numaliasf = 0;	877 numaliasf = 0;

692 aliasf = NULL;	878 aliasf = NULL;

693 aliasflen = NULL;	879 aliasflen = NULL;

694 HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n");	880 HUNSPELL_WARNING(stderr, "error: bad entry number\n");

695 free(piece);

696 return 1;	881 return 1;

697 }	882 }

698 aliasf = (unsigned short *) malloc(numaliasf sizeof(un signed short *));	883 aliasf = (unsigned short *) malloc(numaliasf sizeof(un signed short *));

699 aliasflen = (unsigned short ) malloc(numaliasf sizeof( short));	884 aliasflen = (unsigned short ) malloc(numaliasf sizeof( short));

700 if (!aliasf \|\| !aliasflen) {	885 if (!aliasf \|\| !aliasflen) {

701 numaliasf = 0;	886 numaliasf = 0;

702 if (aliasf) free(aliasf);	887 if (aliasf) free(aliasf);

703 if (aliasflen) free(aliasflen);	888 if (aliasflen) free(aliasflen);

704 aliasf = NULL;	889 aliasf = NULL;

705 aliasflen = NULL;	890 aliasflen = NULL;

706 return 1;	891 return 1;

707 }	892 }

708 np++;	893 np++;

709 break;	894 break;

710 }	895 }

711 default: break;	896 default: break;

712 }	897 }

713 i++;	898 i++;

714 }	899 }

715 free(piece);

716 piece = mystrsep(&tp, 0);	900 piece = mystrsep(&tp, 0);

717 }	901 }

718 if (np != 2) {	902 if (np != 2) {

719 numaliasf = 0;	903 numaliasf = 0;

720 free(aliasf);	904 free(aliasf);

721 free(aliasflen);	905 free(aliasflen);

722 aliasf = NULL;	906 aliasf = NULL;

723 aliasflen = NULL;	907 aliasflen = NULL;

724 HUNSPELL_WARNING(stderr, "error: missing AF table information\n");	908 HUNSPELL_WARNING(stderr, "error: missing data\n");

725 return 1;	909 return 1;

726 }	910 }

727	911

728 /* now parse the numaliasf lines to read in the remainder of the table */	912 /* now parse the numaliasf lines to read in the remainder of the table */

729 char * nl = line;	913 char * nl = line;

730 for (int j=0; j < numaliasf; j++) {	914 for (int j=0; j < numaliasf; j++) {

731 #ifdef HUNSPELL_CHROME_CLIENT	915 #ifdef HUNSPELL_CHROME_CLIENT

732 if (!iterator->AdvanceAndCopy(nl, MAXDELEN))	916 if (!iterator->AdvanceAndCopy(nl, MAXDELEN))

733 return 1;	917 return 1;

734 #else	918 #else

735 if (!fgets(nl,MAXDELEN,af)) return 1;	919 if (!(nl = af->getline())) return 1;

736 #endif	920 #endif

737 mychomp(nl);	921 » mychomp(nl);

738 tp = nl;	922 tp = nl;

739 i = 0;	923 i = 0;

740 aliasf[j] = NULL;	924 aliasf[j] = NULL;

741 aliasflen[j] = 0;	925 aliasflen[j] = 0;

742 piece = mystrsep(&tp, 0);	926 piece = mystrsep(&tp, 0);

743 while (piece) {	927 while (piece) {

744 if (*piece != '\0') {	928 if (*piece != '\0') {

745 switch(i) {	929 switch(i) {

746 case 0: {	930 case 0: {

747 if (strncmp(piece,"AF",2) != 0) {	931 if (strncmp(piece,"AF",2) != 0) {

748 numaliasf = 0;	932 numaliasf = 0;

749 free(aliasf);	933 free(aliasf);

750 free(aliasflen);	934 free(aliasflen);

751 aliasf = NULL;	935 aliasf = NULL;

752 aliasflen = NULL;	936 aliasflen = NULL;

753 HUNSPELL_WARNING(stderr, "error: AF table is co rrupt\n");	937 HUNSPELL_WARNING(stderr, "error: table is corru pt\n");

754 free(piece);

755 return 1;	938 return 1;

756 }	939 }

757 break;	940 break;

758 }	941 }

759 case 1: {	942 case 1: {

760 aliasflen[j] = (unsigned short) decode_flags(&(alias f[j]), piece);	943 aliasflen[j] = (unsigned short) decode_flags(&(alias f[j]), piece);

761 flag_qsort(aliasf[j], 0, aliasflen[j]);	944 flag_qsort(aliasf[j], 0, aliasflen[j]);

762 break;	945 break;

763 }	946 }

764 default: break;	947 default: break;

765 }	948 }

766 i++;	949 i++;

767 }	950 }

768 free(piece);

769 piece = mystrsep(&tp, 0);	951 piece = mystrsep(&tp, 0);

770 }	952 }

771 if (!aliasf[j]) {	953 if (!aliasf[j]) {

772 free(aliasf);	954 free(aliasf);

773 free(aliasflen);	955 free(aliasflen);

774 aliasf = NULL;	956 aliasf = NULL;

775 aliasflen = NULL;	957 aliasflen = NULL;

776 numaliasf = 0;	958 numaliasf = 0;

777 HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");	959 HUNSPELL_WARNING(stderr, "error: table is corrupt\n");

778 return 1;	960 return 1;

779 }	961 }

780 }	962 }

781 return 0;	963 return 0;

782 }	964 }

783	965

784 #ifdef HUNSPELL_CHROME_CLIENT	966 #ifdef HUNSPELL_CHROME_CLIENT

785 hentry* HashMgr::AffixIDsToHentry(char* word,	967 hentry* HashMgr::AffixIDsToHentry(char* word,

786 int* affix_ids,	968 int* affix_ids,

787 int affix_count) const	969 int affix_count) const

(...skipping 15 matching lines...) Expand all Loading...
803	985

804 // We can get a number of prefixes per word. There will normally be only one,	986 // We can get a number of prefixes per word. There will normally be only one,

805 // but if not, there will be a linked list of "hentry"s for the "homonym"s	987 // but if not, there will be a linked list of "hentry"s for the "homonym"s

806 // for the word.	988 // for the word.

807 struct hentry* first_he = NULL;	989 struct hentry* first_he = NULL;

808 struct hentry* prev_he = NULL; // For making linked list.	990 struct hentry* prev_he = NULL; // For making linked list.

809 for (int i = 0; i < affix_count; i++) {	991 for (int i = 0; i < affix_count; i++) {

810 struct hentry* he = new hentry;	992 struct hentry* he = new hentry;

811 if (i == 0)	993 if (i == 0)

812 first_he = he;	994 first_he = he;

813 he->word = word;	995 he->word = *word;

814 he->wlen = word_len;	996 he->blen = word_len;

815 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i],	997 he->alen = (short)const_cast<HashMgr*>(this)->get_aliasf(affix_ids[i],

816 &he->astr);	998 &he->astr);

817 he->next = NULL;	999 he->next = NULL;

818 he->next_homonym = NULL;	1000 he->next_homonym = NULL;

819 if (prev_he)	1001 if (prev_he)

820 prev_he->next_homonym = he;	1002 prev_he->next_homonym = he;

821 prev_he = he;	1003 prev_he = he;

822 }	1004 }

823	1005

824 cache[std_word] = first_he; // Save this word in the cache for later.	1006 cache[std_word] = first_he; // Save this word in the cache for later.

(...skipping 22 matching lines...) Expand all Loading...
847 int HashMgr::get_aliasf(int index, unsigned short ** fvec) {	1029 int HashMgr::get_aliasf(int index, unsigned short ** fvec) {

848 if ((index > 0) && (index <= numaliasf)) {	1030 if ((index > 0) && (index <= numaliasf)) {

849 *fvec = aliasf[index - 1];	1031 *fvec = aliasf[index - 1];

850 return aliasflen[index - 1];	1032 return aliasflen[index - 1];

851 }	1033 }

852 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index);	1034 HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index);

853 *fvec = NULL;	1035 *fvec = NULL;

854 return 0;	1036 return 0;

855 }	1037 }

856	1038

857 #ifdef HUNSPELL_EXPERIMENTAL

858 /* parse morph alias definitions */	1039 /* parse morph alias definitions */

859 int HashMgr::parse_aliasm(char * line, FILE * af)	1040 int HashMgr::parse_aliasm(char * line, FileMgr * af)

860 {	1041 {

861 if (numaliasm != 0) {	1042 if (numaliasm != 0) {

862 HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological d escriptions) tables used\n");	1043 HUNSPELL_WARNING(stderr, "error: multiple table definitions\n");

863 return 1;	1044 return 1;

864 }	1045 }

865 char * tp = line;	1046 char * tp = line;

866 char * piece;	1047 char * piece;

867 int i = 0;	1048 int i = 0;

868 int np = 0;	1049 int np = 0;

869 piece = mystrsep(&tp, 0);	1050 piece = mystrsep(&tp, 0);

870 while (piece) {	1051 while (piece) {

871 if (*piece != '\0') {	1052 if (*piece != '\0') {

872 switch(i) {	1053 switch(i) {

873 case 0: { np++; break; }	1054 case 0: { np++; break; }

874 case 1: {	1055 case 1: {

875 numaliasm = atoi(piece);	1056 numaliasm = atoi(piece);

876 if (numaliasm < 1) {	1057 if (numaliasm < 1) {

877 HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n");	1058 HUNSPELL_WARNING(stderr, "error: line %d: bad entry nu mber\n", af->getlinenum());

878 free(piece);

879 return 1;	1059 return 1;

880 }	1060 }

881 aliasm = (char *) malloc(numaliasm sizeof(char *));	1061 aliasm = (char *) malloc(numaliasm sizeof(char *));

882 if (!aliasm) {	1062 if (!aliasm) {

883 numaliasm = 0;	1063 numaliasm = 0;

884 return 1;	1064 return 1;

885 }	1065 }

886 np++;	1066 np++;

887 break;	1067 break;

888 }	1068 }

889 default: break;	1069 default: break;

890 }	1070 }

891 i++;	1071 i++;

892 }	1072 }

893 free(piece);

894 piece = mystrsep(&tp, 0);	1073 piece = mystrsep(&tp, 0);

895 }	1074 }

896 if (np != 2) {	1075 if (np != 2) {

897 numaliasm = 0;	1076 numaliasm = 0;

898 free(aliasm);	1077 free(aliasm);

899 aliasm = NULL;	1078 aliasm = NULL;

900 HUNSPELL_WARNING(stderr, "error: missing AM alias information\n");	1079 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum( ));

901 return 1;	1080 return 1;

902 }	1081 }

903	1082

904 /* now parse the numaliasm lines to read in the remainder of the table */	1083 /* now parse the numaliasm lines to read in the remainder of the table */

905 char * nl = line;	1084 char * nl = line;

906 for (int j=0; j < numaliasm; j++) {	1085 for (int j=0; j < numaliasm; j++) {

907 if (!fgets(nl,MAXDELEN,af)) return 1;	1086 if (!(nl = af->getline())) return 1;

908 mychomp(nl);	1087 mychomp(nl);

909 tp = nl;	1088 tp = nl;

910 i = 0;	1089 i = 0;

911 aliasm[j] = NULL;	1090 aliasm[j] = NULL;

912 piece = mystrsep(&tp, 0);	1091 piece = mystrsep(&tp, ' ');

913 while (piece) {	1092 while (piece) {

914 if (*piece != '\0') {	1093 if (*piece != '\0') {

915 switch(i) {	1094 switch(i) {

916 case 0: {	1095 case 0: {

917 if (strncmp(piece,"AM",2) != 0) {	1096 if (strncmp(piece,"AM",2) != 0) {

918 HUNSPELL_WARNING(stderr, "error: AM table is co rrupt\n");	1097 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());

919 free(piece);

920 numaliasm = 0;	1098 numaliasm = 0;

921 free(aliasm);	1099 free(aliasm);

922 aliasm = NULL;	1100 aliasm = NULL;

923 return 1;	1101 return 1;

924 }	1102 }

925 break;	1103 break;

926 }	1104 }

927 case 1: {	1105 case 1: {

	1106 // add the remaining of the line

	1107 if (*tp) {

	1108 *(tp - 1) = ' ';

	1109 tp = tp + strlen(tp);

	1110 }

928 if (complexprefixes) {	1111 if (complexprefixes) {

929 if (utf8) reverseword_utf(piece);	1112 if (utf8) reverseword_utf(piece);

930 else reverseword(piece);	1113 else reverseword(piece);

931 }	1114 }

932 aliasm[j] = mystrdup(piece);	1115 aliasm[j] = mystrdup(piece);

	1116 if (!aliasm[j]) {

	1117 numaliasm = 0;

	1118 free(aliasm);

	1119 aliasm = NULL;

	1120 return 1;

	1121 }

933 break; }	1122 break; }

934 default: break;	1123 default: break;

935 }	1124 }

936 i++;	1125 i++;

937 }	1126 }

938 free(piece);	1127 piece = mystrsep(&tp, ' ');

939 piece = mystrsep(&tp, 0);

940 }	1128 }

941 if (!aliasm[j]) {	1129 if (!aliasm[j]) {

942 numaliasm = 0;	1130 numaliasm = 0;

943 free(aliasm);	1131 free(aliasm);

944 aliasm = NULL;	1132 aliasm = NULL;

945 HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");	1133 HUNSPELL_WARNING(stderr, "error: table is corrupt\n");

946 return 1;	1134 return 1;

947 }	1135 }

948 }	1136 }

949 return 0;	1137 return 0;

950 }	1138 }

951	1139

952 int HashMgr::is_aliasm() {	1140 int HashMgr::is_aliasm() {

953 return (aliasm != NULL);	1141 return (aliasm != NULL);

954 }	1142 }

955	1143

956 char * HashMgr::get_aliasm(int index) {	1144 char * HashMgr::get_aliasm(int index) {

957 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];	1145 if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];

958 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);	1146 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);

959 return NULL;	1147 return NULL;

960 }	1148 }

961 #endif

OLD	NEW

« no previous file with comments | « chrome/third_party/hunspell/src/hunspell/hashmgr.hxx ('k') | chrome/third_party/hunspell/src/hunspell/htypes.hxx » ('j') | no next file with comments »