OLD | NEW |
1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both | 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both |
2 * licenses follows. | 2 * licenses follows. |
3 */ | 3 */ |
4 | 4 |
5 /* LibHnj - a library for high quality hyphenation and justification | 5 /* LibHnj - a library for high quality hyphenation and justification |
6 * Copyright (C) 1998 Raph Levien, | 6 * Copyright (C) 1998 Raph Levien, |
7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), | 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), |
8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) | 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) |
9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) | 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) |
10 * | 10 * |
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
235 HashEntry *e; | 235 HashEntry *e; |
236 | 236 |
237 for (i = 0; i < HASH_SIZE; i++) | 237 for (i = 0; i < HASH_SIZE; i++) |
238 for (e = global->entries[i]; e; e = e->next) | 238 for (e = global->entries[i]; e; e = e->next) |
239 if (e->val == state) | 239 if (e->val == state) |
240 return e->key; | 240 return e->key; |
241 return NULL; | 241 return NULL; |
242 } | 242 } |
243 #endif | 243 #endif |
244 | 244 |
| 245 #ifdef HYPHEN_CHROME_CLIENT |
| 246 typedef struct { |
| 247 const unsigned char *data; |
| 248 size_t offset; |
| 249 size_t size; |
| 250 } hnj_file; |
| 251 |
| 252 static hnj_file * |
| 253 hnj_fopen (const unsigned char *data, size_t size) |
| 254 { |
| 255 hnj_file *f; |
| 256 |
| 257 f = hnj_malloc (sizeof(hnj_file)); |
| 258 if (f == NULL) |
| 259 return NULL; |
| 260 f->offset = 0; |
| 261 f->data = data; |
| 262 f->size = size; |
| 263 return f; |
| 264 } |
| 265 |
| 266 static void |
| 267 hnj_fclose (hnj_file *f) |
| 268 { |
| 269 hnj_free (f); |
| 270 } |
| 271 |
| 272 static char * |
| 273 hnj_fgets (char *s, int size, hnj_file *f) |
| 274 { |
| 275 int i; |
| 276 |
| 277 if (f->offset >= f->size) |
| 278 return NULL; |
| 279 for (i = 0; i < size - 1; i++) { |
| 280 char c; |
| 281 |
| 282 if (f->offset >= f->size) |
| 283 break; |
| 284 c = f->data[f->offset++]; |
| 285 if (c == '\r' || c == '\n') |
| 286 break; |
| 287 s[i] = c; |
| 288 } |
| 289 s[i] = '\0'; |
| 290 return s; |
| 291 } |
| 292 #else |
| 293 typedef FILE hnj_file; |
| 294 #define hnj_fopen(fn, mode) fopen((fn), (mode)) |
| 295 #define hnj_fclose(f) fclose(f) |
| 296 #define hnj_fgets(s, size, f) fgets((s), (size), (f)) |
| 297 #endif |
| 298 |
| 299 #ifdef HYPHEN_CHROME_CLIENT |
| 300 HyphenDict * |
| 301 hnj_hyphen_load (const unsigned char *data, size_t size) |
| 302 #else |
245 HyphenDict * | 303 HyphenDict * |
246 hnj_hyphen_load (const char *fn) | 304 hnj_hyphen_load (const char *fn) |
| 305 #endif |
247 { | 306 { |
248 HyphenDict *dict[2]; | 307 HyphenDict *dict[2]; |
249 HashTab *hashtab; | 308 HashTab *hashtab; |
250 FILE *f; | 309 hnj_file *f; |
251 char buf[MAX_CHARS]; | 310 char buf[MAX_CHARS]; |
252 char word[MAX_CHARS]; | 311 char word[MAX_CHARS]; |
253 char pattern[MAX_CHARS]; | 312 char pattern[MAX_CHARS]; |
254 char * repl; | 313 char * repl; |
255 signed char replindex; | 314 signed char replindex; |
256 signed char replcut; | 315 signed char replcut; |
257 int state_num = 0, last_state; | 316 int state_num = 0, last_state; |
258 int i, j, k; | 317 int i, j, k; |
259 char ch; | 318 char ch; |
260 int found; | 319 int found; |
261 HashEntry *e; | 320 HashEntry *e; |
262 int nextlevel = 0; | 321 int nextlevel = 0; |
263 | 322 |
264 f = fopen (fn, "r"); | 323 #ifdef HYPHEN_CHROME_CLIENT |
| 324 f = hnj_fopen (data, size); |
| 325 #else |
| 326 f = hnj_fopen (fn, "r"); |
| 327 #endif |
265 if (f == NULL) | 328 if (f == NULL) |
266 return NULL; | 329 return NULL; |
267 | 330 |
268 // loading one or two dictionaries (separated by NEXTLEVEL keyword) | 331 // loading one or two dictionaries (separated by NEXTLEVEL keyword) |
269 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { | 332 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { |
270 hashtab = hnj_hash_new (); | 333 hashtab = hnj_hash_new (); |
271 #ifdef VERBOSE | 334 #ifdef VERBOSE |
272 global = hashtab; | 335 global = hashtab; |
273 #endif | 336 #endif |
274 hnj_hash_insert (hashtab, "", 0); | 337 hnj_hash_insert (hashtab, "", 0); |
275 dict[k] = hnj_malloc (sizeof(HyphenDict)); | 338 dict[k] = hnj_malloc (sizeof(HyphenDict)); |
276 dict[k]->num_states = 1; | 339 dict[k]->num_states = 1; |
277 dict[k]->states = hnj_malloc (sizeof(HyphenState)); | 340 dict[k]->states = hnj_malloc (sizeof(HyphenState)); |
278 dict[k]->states[0].match = NULL; | 341 dict[k]->states[0].match = NULL; |
279 dict[k]->states[0].repl = NULL; | 342 dict[k]->states[0].repl = NULL; |
280 dict[k]->states[0].fallback_state = -1; | 343 dict[k]->states[0].fallback_state = -1; |
281 dict[k]->states[0].num_trans = 0; | 344 dict[k]->states[0].num_trans = 0; |
282 dict[k]->states[0].trans = NULL; | 345 dict[k]->states[0].trans = NULL; |
283 dict[k]->nextlevel = NULL; | 346 dict[k]->nextlevel = NULL; |
284 dict[k]->lhmin = 0; | 347 dict[k]->lhmin = 0; |
285 dict[k]->rhmin = 0; | 348 dict[k]->rhmin = 0; |
286 dict[k]->clhmin = 0; | 349 dict[k]->clhmin = 0; |
287 dict[k]->crhmin = 0; | 350 dict[k]->crhmin = 0; |
288 | 351 |
289 /* read in character set info */ | 352 /* read in character set info */ |
290 if (k == 0) { | 353 if (k == 0) { |
291 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; | 354 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; |
292 if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { | 355 if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { |
293 for (i=0;i<MAX_NAME;i++) | 356 for (i=0;i<MAX_NAME;i++) |
294 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) | 357 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) |
295 dict[k]->cset[i] = 0; | 358 dict[k]->cset[i] = 0; |
296 } else { | 359 } else { |
297 dict[k]->cset[0] = 0; | 360 dict[k]->cset[0] = 0; |
298 } | 361 } |
299 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); | 362 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); |
300 } else { | 363 } else { |
301 strcpy(dict[k]->cset, dict[0]->cset); | 364 strcpy(dict[k]->cset, dict[0]->cset); |
302 dict[k]->utf8 = dict[0]->utf8; | 365 dict[k]->utf8 = dict[0]->utf8; |
303 } | 366 } |
304 | 367 |
305 while (fgets (buf, sizeof(buf), f) != NULL) | 368 while (hnj_fgets (buf, sizeof(buf), f) != NULL) |
306 { | 369 { |
307 if (buf[0] != '%') | 370 if (buf[0] != '%') |
308 { | 371 { |
309 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { | 372 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { |
310 nextlevel = 1; | 373 nextlevel = 1; |
311 break; | 374 break; |
312 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { | 375 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { |
313 dict[k]->lhmin = atoi(buf + 13); | 376 dict[k]->lhmin = atoi(buf + 13); |
314 continue; | 377 continue; |
315 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { | 378 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
361 i = 0; | 424 i = 0; |
362 if (!repl) { | 425 if (!repl) { |
363 /* Optimize away leading zeroes */ | 426 /* Optimize away leading zeroes */ |
364 for (; pattern[i] == '0'; i++); | 427 for (; pattern[i] == '0'; i++); |
365 } else { | 428 } else { |
366 if (*word == '.') i++; | 429 if (*word == '.') i++; |
367 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ | 430 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ |
368 if (dict[k]->utf8) { | 431 if (dict[k]->utf8) { |
369 int pu = -1; /* unicode character position */ | 432 int pu = -1; /* unicode character position */ |
370 int ps = -1; /* unicode start position (original replinde
x) */ | 433 int ps = -1; /* unicode start position (original replinde
x) */ |
371 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ | 434 size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position *
/ |
372 for (; pc < (strlen(word) + 1); pc++) { | 435 for (; pc < (strlen(word) + 1); pc++) { |
373 /* beginning of an UTF-8 character (not '10' start bits) */ | 436 /* beginning of an UTF-8 character (not '10' start bits) */ |
374 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; | 437 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; |
375 if ((ps < 0) && (replindex == pu)) { | 438 if ((ps < 0) && (replindex == pu)) { |
376 ps = replindex; | 439 ps = replindex; |
377 replindex = (signed char) pc; | 440 replindex = (signed char) pc; |
378 } | 441 } |
379 if ((ps >= 0) && ((pu - ps) == replcut)) { | 442 if ((ps >= 0) && ((pu - ps) == replcut)) { |
380 replcut = (signed char) (pc - replindex); | 443 replcut = (signed char) (pc - replindex); |
381 break; | 444 break; |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
454 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, | 517 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, |
455 dict[k]->states[e->val].trans[j].new_state); | 518 dict[k]->states[e->val].trans[j].new_state); |
456 } | 519 } |
457 #endif | 520 #endif |
458 | 521 |
459 #ifndef VERBOSE | 522 #ifndef VERBOSE |
460 hnj_hash_free (hashtab); | 523 hnj_hash_free (hashtab); |
461 #endif | 524 #endif |
462 state_num = 0; | 525 state_num = 0; |
463 } | 526 } |
464 fclose(f); | 527 hnj_fclose(f); |
465 if (k == 2) dict[0]->nextlevel = dict[1]; | 528 if (k == 2) dict[0]->nextlevel = dict[1]; |
466 return dict[0]; | 529 return dict[0]; |
467 } | 530 } |
468 | 531 |
469 void hnj_hyphen_free (HyphenDict *dict) | 532 void hnj_hyphen_free (HyphenDict *dict) |
470 { | 533 { |
471 int state_num; | 534 int state_num; |
472 HyphenState *hstate; | 535 HyphenState *hstate; |
473 | 536 |
474 for (state_num = 0; state_num < dict->num_states; state_num++) | 537 for (state_num = 0; state_num < dict->num_states; state_num++) |
(...skipping 591 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1066 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, | 1129 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, |
1067 clhmin, crhmin, 1, 1); | 1130 clhmin, crhmin, 1, 1); |
1068 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, | 1131 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, |
1069 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); | 1132 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); |
1070 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, | 1133 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, |
1071 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); | 1134 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); |
1072 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); | 1135 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); |
1073 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); | 1136 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); |
1074 return 0; | 1137 return 0; |
1075 } | 1138 } |
OLD | NEW |