| OLD | NEW |
| 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both | 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both |
| 2 * licenses follows. | 2 * licenses follows. |
| 3 */ | 3 */ |
| 4 | 4 |
| 5 /* LibHnj - a library for high quality hyphenation and justification | 5 /* LibHnj - a library for high quality hyphenation and justification |
| 6 * Copyright (C) 1998 Raph Levien, | 6 * Copyright (C) 1998 Raph Levien, |
| 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), | 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), |
| 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) | 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) |
| 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) | 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) |
| 10 * | 10 * |
| (...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 235 HashEntry *e; | 235 HashEntry *e; |
| 236 | 236 |
| 237 for (i = 0; i < HASH_SIZE; i++) | 237 for (i = 0; i < HASH_SIZE; i++) |
| 238 for (e = global->entries[i]; e; e = e->next) | 238 for (e = global->entries[i]; e; e = e->next) |
| 239 if (e->val == state) | 239 if (e->val == state) |
| 240 return e->key; | 240 return e->key; |
| 241 return NULL; | 241 return NULL; |
| 242 } | 242 } |
| 243 #endif | 243 #endif |
| 244 | 244 |
| 245 #ifdef HYPHEN_CHROME_CLIENT | 245 HyphenDict * |
| 246 typedef struct { | 246 hnj_hyphen_load (const char *fn) |
| 247 const unsigned char *data; | |
| 248 size_t offset; | |
| 249 size_t size; | |
| 250 } hnj_file; | |
| 251 | |
| 252 static hnj_file * | |
| 253 hnj_fopen (const unsigned char *data, size_t size) | |
| 254 { | 247 { |
| 255 hnj_file *f; | 248 HyphenDict *result; |
| 256 | 249 FILE *f; |
| 257 f = hnj_malloc (sizeof(hnj_file)); | 250 f = fopen (fn, "r"); |
| 258 if (f == NULL) | 251 if (f == NULL) |
| 259 return NULL; | 252 return NULL; |
| 260 f->offset = 0; | 253 |
| 261 f->data = data; | 254 result = hnj_hyphen_load_file(f); |
| 262 f->size = size; | 255 |
| 263 return f; | 256 fclose(f); |
| 257 return result; |
| 264 } | 258 } |
| 265 | 259 |
| 266 static void | |
| 267 hnj_fclose (hnj_file *f) | |
| 268 { | |
| 269 hnj_free (f); | |
| 270 } | |
| 271 | |
| 272 static char * | |
| 273 hnj_fgets (char *s, int size, hnj_file *f) | |
| 274 { | |
| 275 int i; | |
| 276 | |
| 277 if (f->offset >= f->size) | |
| 278 return NULL; | |
| 279 for (i = 0; i < size - 1; i++) { | |
| 280 char c; | |
| 281 | |
| 282 if (f->offset >= f->size) | |
| 283 break; | |
| 284 c = f->data[f->offset++]; | |
| 285 if (c == '\r' || c == '\n') | |
| 286 break; | |
| 287 s[i] = c; | |
| 288 } | |
| 289 s[i] = '\0'; | |
| 290 return s; | |
| 291 } | |
| 292 #else | |
| 293 typedef FILE hnj_file; | |
| 294 #define hnj_fopen(fn, mode) fopen((fn), (mode)) | |
| 295 #define hnj_fclose(f) fclose(f) | |
| 296 #define hnj_fgets(s, size, f) fgets((s), (size), (f)) | |
| 297 #endif | |
| 298 | |
| 299 #ifdef HYPHEN_CHROME_CLIENT | |
| 300 HyphenDict * | 260 HyphenDict * |
| 301 hnj_hyphen_load (const unsigned char *data, size_t size) | 261 hnj_hyphen_load_file (FILE *f) |
| 302 #else | |
| 303 HyphenDict * | |
| 304 hnj_hyphen_load (const char *fn) | |
| 305 #endif | |
| 306 { | 262 { |
| 307 HyphenDict *dict[2]; | 263 HyphenDict *dict[2]; |
| 308 HashTab *hashtab; | 264 HashTab *hashtab; |
| 309 hnj_file *f; | |
| 310 char buf[MAX_CHARS]; | 265 char buf[MAX_CHARS]; |
| 311 char word[MAX_CHARS]; | 266 char word[MAX_CHARS]; |
| 312 char pattern[MAX_CHARS]; | 267 char pattern[MAX_CHARS]; |
| 313 char * repl; | 268 char * repl; |
| 314 signed char replindex; | 269 signed char replindex; |
| 315 signed char replcut; | 270 signed char replcut; |
| 316 int state_num = 0, last_state; | 271 int state_num = 0, last_state; |
| 317 int i, j, k; | 272 int i, j, k; |
| 318 char ch; | 273 char ch; |
| 319 int found; | 274 int found; |
| 320 HashEntry *e; | 275 HashEntry *e; |
| 321 int nextlevel = 0; | 276 int nextlevel = 0; |
| 322 | 277 |
| 323 #ifdef HYPHEN_CHROME_CLIENT | |
| 324 f = hnj_fopen (data, size); | |
| 325 #else | |
| 326 f = hnj_fopen (fn, "r"); | |
| 327 #endif | |
| 328 if (f == NULL) | |
| 329 return NULL; | |
| 330 | |
| 331 // loading one or two dictionaries (separated by NEXTLEVEL keyword) | 278 // loading one or two dictionaries (separated by NEXTLEVEL keyword) |
| 332 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { | 279 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { |
| 333 hashtab = hnj_hash_new (); | 280 hashtab = hnj_hash_new (); |
| 334 #ifdef VERBOSE | 281 #ifdef VERBOSE |
| 335 global = hashtab; | 282 global = hashtab; |
| 336 #endif | 283 #endif |
| 337 hnj_hash_insert (hashtab, "", 0); | 284 hnj_hash_insert (hashtab, "", 0); |
| 338 dict[k] = hnj_malloc (sizeof(HyphenDict)); | 285 dict[k] = hnj_malloc (sizeof(HyphenDict)); |
| 339 dict[k]->num_states = 1; | 286 dict[k]->num_states = 1; |
| 340 dict[k]->states = hnj_malloc (sizeof(HyphenState)); | 287 dict[k]->states = hnj_malloc (sizeof(HyphenState)); |
| 341 dict[k]->states[0].match = NULL; | 288 dict[k]->states[0].match = NULL; |
| 342 dict[k]->states[0].repl = NULL; | 289 dict[k]->states[0].repl = NULL; |
| 343 dict[k]->states[0].fallback_state = -1; | 290 dict[k]->states[0].fallback_state = -1; |
| 344 dict[k]->states[0].num_trans = 0; | 291 dict[k]->states[0].num_trans = 0; |
| 345 dict[k]->states[0].trans = NULL; | 292 dict[k]->states[0].trans = NULL; |
| 346 dict[k]->nextlevel = NULL; | 293 dict[k]->nextlevel = NULL; |
| 347 dict[k]->lhmin = 0; | 294 dict[k]->lhmin = 0; |
| 348 dict[k]->rhmin = 0; | 295 dict[k]->rhmin = 0; |
| 349 dict[k]->clhmin = 0; | 296 dict[k]->clhmin = 0; |
| 350 dict[k]->crhmin = 0; | 297 dict[k]->crhmin = 0; |
| 351 | 298 |
| 352 /* read in character set info */ | 299 /* read in character set info */ |
| 353 if (k == 0) { | 300 if (k == 0) { |
| 354 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; | 301 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; |
| 355 if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { | 302 if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { |
| 356 for (i=0;i<MAX_NAME;i++) | 303 for (i=0;i<MAX_NAME;i++) |
| 357 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) | 304 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) |
| 358 dict[k]->cset[i] = 0; | 305 dict[k]->cset[i] = 0; |
| 359 } else { | 306 } else { |
| 360 dict[k]->cset[0] = 0; | 307 dict[k]->cset[0] = 0; |
| 361 } | 308 } |
| 362 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); | 309 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); |
| 363 } else { | 310 } else { |
| 364 strcpy(dict[k]->cset, dict[0]->cset); | 311 strcpy(dict[k]->cset, dict[0]->cset); |
| 365 dict[k]->utf8 = dict[0]->utf8; | 312 dict[k]->utf8 = dict[0]->utf8; |
| 366 } | 313 } |
| 367 | 314 |
| 368 while (hnj_fgets (buf, sizeof(buf), f) != NULL) | 315 while (fgets (buf, sizeof(buf), f) != NULL) |
| 369 { | 316 { |
| 370 if (buf[0] != '%') | 317 if (buf[0] != '%') |
| 371 { | 318 { |
| 372 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { | 319 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { |
| 373 nextlevel = 1; | 320 nextlevel = 1; |
| 374 break; | 321 break; |
| 375 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { | 322 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { |
| 376 dict[k]->lhmin = atoi(buf + 13); | 323 dict[k]->lhmin = atoi(buf + 13); |
| 377 continue; | 324 continue; |
| 378 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { | 325 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 424 i = 0; | 371 i = 0; |
| 425 if (!repl) { | 372 if (!repl) { |
| 426 /* Optimize away leading zeroes */ | 373 /* Optimize away leading zeroes */ |
| 427 for (; pattern[i] == '0'; i++); | 374 for (; pattern[i] == '0'; i++); |
| 428 } else { | 375 } else { |
| 429 if (*word == '.') i++; | 376 if (*word == '.') i++; |
| 430 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ | 377 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ |
| 431 if (dict[k]->utf8) { | 378 if (dict[k]->utf8) { |
| 432 int pu = -1; /* unicode character position */ | 379 int pu = -1; /* unicode character position */ |
| 433 int ps = -1; /* unicode start position (original replinde
x) */ | 380 int ps = -1; /* unicode start position (original replinde
x) */ |
| 434 size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position *
/ | 381 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ |
| 435 for (; pc < (strlen(word) + 1); pc++) { | 382 for (; pc < (strlen(word) + 1); pc++) { |
| 436 /* beginning of an UTF-8 character (not '10' start bits) */ | 383 /* beginning of an UTF-8 character (not '10' start bits) */ |
| 437 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; | 384 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; |
| 438 if ((ps < 0) && (replindex == pu)) { | 385 if ((ps < 0) && (replindex == pu)) { |
| 439 ps = replindex; | 386 ps = replindex; |
| 440 replindex = (signed char) pc; | 387 replindex = (signed char) pc; |
| 441 } | 388 } |
| 442 if ((ps >= 0) && ((pu - ps) == replcut)) { | 389 if ((ps >= 0) && ((pu - ps) == replcut)) { |
| 443 replcut = (signed char) (pc - replindex); | 390 replcut = (signed char) (pc - replindex); |
| 444 break; | 391 break; |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 517 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, | 464 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, |
| 518 dict[k]->states[e->val].trans[j].new_state); | 465 dict[k]->states[e->val].trans[j].new_state); |
| 519 } | 466 } |
| 520 #endif | 467 #endif |
| 521 | 468 |
| 522 #ifndef VERBOSE | 469 #ifndef VERBOSE |
| 523 hnj_hash_free (hashtab); | 470 hnj_hash_free (hashtab); |
| 524 #endif | 471 #endif |
| 525 state_num = 0; | 472 state_num = 0; |
| 526 } | 473 } |
| 527 hnj_fclose(f); | |
| 528 if (k == 2) dict[0]->nextlevel = dict[1]; | 474 if (k == 2) dict[0]->nextlevel = dict[1]; |
| 529 return dict[0]; | 475 return dict[0]; |
| 530 } | 476 } |
| 531 | 477 |
| 532 void hnj_hyphen_free (HyphenDict *dict) | 478 void hnj_hyphen_free (HyphenDict *dict) |
| 533 { | 479 { |
| 534 int state_num; | 480 int state_num; |
| 535 HyphenState *hstate; | 481 HyphenState *hstate; |
| 536 | 482 |
| 537 for (state_num = 0; state_num < dict->num_states; state_num++) | 483 for (state_num = 0; state_num < dict->num_states; state_num++) |
| (...skipping 591 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1129 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, | 1075 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, |
| 1130 clhmin, crhmin, 1, 1); | 1076 clhmin, crhmin, 1, 1); |
| 1131 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, | 1077 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, |
| 1132 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); | 1078 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); |
| 1133 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, | 1079 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, |
| 1134 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); | 1080 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); |
| 1135 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); | 1081 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); |
| 1136 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); | 1082 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); |
| 1137 return 0; | 1083 return 0; |
| 1138 } | 1084 } |
| OLD | NEW |