OLD | NEW |
1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both | 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both |
2 * licenses follows. | 2 * licenses follows. |
3 */ | 3 */ |
4 | 4 |
5 /* LibHnj - a library for high quality hyphenation and justification | 5 /* LibHnj - a library for high quality hyphenation and justification |
6 * Copyright (C) 1998 Raph Levien, | 6 * Copyright (C) 1998 Raph Levien, |
7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), | 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), |
8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) | 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) |
9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) | 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) |
10 * | 10 * |
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
235 HashEntry *e; | 235 HashEntry *e; |
236 | 236 |
237 for (i = 0; i < HASH_SIZE; i++) | 237 for (i = 0; i < HASH_SIZE; i++) |
238 for (e = global->entries[i]; e; e = e->next) | 238 for (e = global->entries[i]; e; e = e->next) |
239 if (e->val == state) | 239 if (e->val == state) |
240 return e->key; | 240 return e->key; |
241 return NULL; | 241 return NULL; |
242 } | 242 } |
243 #endif | 243 #endif |
244 | 244 |
245 #ifdef HYPHEN_CHROME_CLIENT | 245 HyphenDict * |
246 typedef struct { | 246 hnj_hyphen_load (const char *fn) |
247 const unsigned char *data; | |
248 size_t offset; | |
249 size_t size; | |
250 } hnj_file; | |
251 | |
252 static hnj_file * | |
253 hnj_fopen (const unsigned char *data, size_t size) | |
254 { | 247 { |
255 hnj_file *f; | 248 HyphenDict *result; |
256 | 249 FILE *f; |
257 f = hnj_malloc (sizeof(hnj_file)); | 250 f = fopen (fn, "r"); |
258 if (f == NULL) | 251 if (f == NULL) |
259 return NULL; | 252 return NULL; |
260 f->offset = 0; | 253 |
261 f->data = data; | 254 result = hnj_hyphen_load_file(f); |
262 f->size = size; | 255 |
263 return f; | 256 fclose(f); |
| 257 return result; |
264 } | 258 } |
265 | 259 |
266 static void | |
267 hnj_fclose (hnj_file *f) | |
268 { | |
269 hnj_free (f); | |
270 } | |
271 | |
272 static char * | |
273 hnj_fgets (char *s, int size, hnj_file *f) | |
274 { | |
275 int i; | |
276 | |
277 if (f->offset >= f->size) | |
278 return NULL; | |
279 for (i = 0; i < size - 1; i++) { | |
280 char c; | |
281 | |
282 if (f->offset >= f->size) | |
283 break; | |
284 c = f->data[f->offset++]; | |
285 if (c == '\r' || c == '\n') | |
286 break; | |
287 s[i] = c; | |
288 } | |
289 s[i] = '\0'; | |
290 return s; | |
291 } | |
292 #else | |
293 typedef FILE hnj_file; | |
294 #define hnj_fopen(fn, mode) fopen((fn), (mode)) | |
295 #define hnj_fclose(f) fclose(f) | |
296 #define hnj_fgets(s, size, f) fgets((s), (size), (f)) | |
297 #endif | |
298 | |
299 #ifdef HYPHEN_CHROME_CLIENT | |
300 HyphenDict * | 260 HyphenDict * |
301 hnj_hyphen_load (const unsigned char *data, size_t size) | 261 hnj_hyphen_load_file (FILE *f) |
302 #else | |
303 HyphenDict * | |
304 hnj_hyphen_load (const char *fn) | |
305 #endif | |
306 { | 262 { |
307 HyphenDict *dict[2]; | 263 HyphenDict *dict[2]; |
308 HashTab *hashtab; | 264 HashTab *hashtab; |
309 hnj_file *f; | |
310 char buf[MAX_CHARS]; | 265 char buf[MAX_CHARS]; |
311 char word[MAX_CHARS]; | 266 char word[MAX_CHARS]; |
312 char pattern[MAX_CHARS]; | 267 char pattern[MAX_CHARS]; |
313 char * repl; | 268 char * repl; |
314 signed char replindex; | 269 signed char replindex; |
315 signed char replcut; | 270 signed char replcut; |
316 int state_num = 0, last_state; | 271 int state_num = 0, last_state; |
317 int i, j, k; | 272 int i, j, k; |
318 char ch; | 273 char ch; |
319 int found; | 274 int found; |
320 HashEntry *e; | 275 HashEntry *e; |
321 int nextlevel = 0; | 276 int nextlevel = 0; |
322 | 277 |
323 #ifdef HYPHEN_CHROME_CLIENT | |
324 f = hnj_fopen (data, size); | |
325 #else | |
326 f = hnj_fopen (fn, "r"); | |
327 #endif | |
328 if (f == NULL) | |
329 return NULL; | |
330 | |
331 // loading one or two dictionaries (separated by NEXTLEVEL keyword) | 278 // loading one or two dictionaries (separated by NEXTLEVEL keyword) |
332 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { | 279 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { |
333 hashtab = hnj_hash_new (); | 280 hashtab = hnj_hash_new (); |
334 #ifdef VERBOSE | 281 #ifdef VERBOSE |
335 global = hashtab; | 282 global = hashtab; |
336 #endif | 283 #endif |
337 hnj_hash_insert (hashtab, "", 0); | 284 hnj_hash_insert (hashtab, "", 0); |
338 dict[k] = hnj_malloc (sizeof(HyphenDict)); | 285 dict[k] = hnj_malloc (sizeof(HyphenDict)); |
339 dict[k]->num_states = 1; | 286 dict[k]->num_states = 1; |
340 dict[k]->states = hnj_malloc (sizeof(HyphenState)); | 287 dict[k]->states = hnj_malloc (sizeof(HyphenState)); |
341 dict[k]->states[0].match = NULL; | 288 dict[k]->states[0].match = NULL; |
342 dict[k]->states[0].repl = NULL; | 289 dict[k]->states[0].repl = NULL; |
343 dict[k]->states[0].fallback_state = -1; | 290 dict[k]->states[0].fallback_state = -1; |
344 dict[k]->states[0].num_trans = 0; | 291 dict[k]->states[0].num_trans = 0; |
345 dict[k]->states[0].trans = NULL; | 292 dict[k]->states[0].trans = NULL; |
346 dict[k]->nextlevel = NULL; | 293 dict[k]->nextlevel = NULL; |
347 dict[k]->lhmin = 0; | 294 dict[k]->lhmin = 0; |
348 dict[k]->rhmin = 0; | 295 dict[k]->rhmin = 0; |
349 dict[k]->clhmin = 0; | 296 dict[k]->clhmin = 0; |
350 dict[k]->crhmin = 0; | 297 dict[k]->crhmin = 0; |
351 | 298 |
352 /* read in character set info */ | 299 /* read in character set info */ |
353 if (k == 0) { | 300 if (k == 0) { |
354 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; | 301 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; |
355 if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { | 302 if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { |
356 for (i=0;i<MAX_NAME;i++) | 303 for (i=0;i<MAX_NAME;i++) |
357 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) | 304 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) |
358 dict[k]->cset[i] = 0; | 305 dict[k]->cset[i] = 0; |
359 } else { | 306 } else { |
360 dict[k]->cset[0] = 0; | 307 dict[k]->cset[0] = 0; |
361 } | 308 } |
362 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); | 309 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); |
363 } else { | 310 } else { |
364 strcpy(dict[k]->cset, dict[0]->cset); | 311 strcpy(dict[k]->cset, dict[0]->cset); |
365 dict[k]->utf8 = dict[0]->utf8; | 312 dict[k]->utf8 = dict[0]->utf8; |
366 } | 313 } |
367 | 314 |
368 while (hnj_fgets (buf, sizeof(buf), f) != NULL) | 315 while (fgets (buf, sizeof(buf), f) != NULL) |
369 { | 316 { |
370 if (buf[0] != '%') | 317 if (buf[0] != '%') |
371 { | 318 { |
372 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { | 319 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { |
373 nextlevel = 1; | 320 nextlevel = 1; |
374 break; | 321 break; |
375 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { | 322 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { |
376 dict[k]->lhmin = atoi(buf + 13); | 323 dict[k]->lhmin = atoi(buf + 13); |
377 continue; | 324 continue; |
378 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { | 325 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
424 i = 0; | 371 i = 0; |
425 if (!repl) { | 372 if (!repl) { |
426 /* Optimize away leading zeroes */ | 373 /* Optimize away leading zeroes */ |
427 for (; pattern[i] == '0'; i++); | 374 for (; pattern[i] == '0'; i++); |
428 } else { | 375 } else { |
429 if (*word == '.') i++; | 376 if (*word == '.') i++; |
430 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ | 377 /* convert UTF-8 char. positions of discretionary hyph. replacements
to 8-bit */ |
431 if (dict[k]->utf8) { | 378 if (dict[k]->utf8) { |
432 int pu = -1; /* unicode character position */ | 379 int pu = -1; /* unicode character position */ |
433 int ps = -1; /* unicode start position (original replinde
x) */ | 380 int ps = -1; /* unicode start position (original replinde
x) */ |
434 size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position *
/ | 381 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ |
435 for (; pc < (strlen(word) + 1); pc++) { | 382 for (; pc < (strlen(word) + 1); pc++) { |
436 /* beginning of an UTF-8 character (not '10' start bits) */ | 383 /* beginning of an UTF-8 character (not '10' start bits) */ |
437 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; | 384 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; |
438 if ((ps < 0) && (replindex == pu)) { | 385 if ((ps < 0) && (replindex == pu)) { |
439 ps = replindex; | 386 ps = replindex; |
440 replindex = (signed char) pc; | 387 replindex = (signed char) pc; |
441 } | 388 } |
442 if ((ps >= 0) && ((pu - ps) == replcut)) { | 389 if ((ps >= 0) && ((pu - ps) == replcut)) { |
443 replcut = (signed char) (pc - replindex); | 390 replcut = (signed char) (pc - replindex); |
444 break; | 391 break; |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
517 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, | 464 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, |
518 dict[k]->states[e->val].trans[j].new_state); | 465 dict[k]->states[e->val].trans[j].new_state); |
519 } | 466 } |
520 #endif | 467 #endif |
521 | 468 |
522 #ifndef VERBOSE | 469 #ifndef VERBOSE |
523 hnj_hash_free (hashtab); | 470 hnj_hash_free (hashtab); |
524 #endif | 471 #endif |
525 state_num = 0; | 472 state_num = 0; |
526 } | 473 } |
527 hnj_fclose(f); | |
528 if (k == 2) dict[0]->nextlevel = dict[1]; | 474 if (k == 2) dict[0]->nextlevel = dict[1]; |
529 return dict[0]; | 475 return dict[0]; |
530 } | 476 } |
531 | 477 |
532 void hnj_hyphen_free (HyphenDict *dict) | 478 void hnj_hyphen_free (HyphenDict *dict) |
533 { | 479 { |
534 int state_num; | 480 int state_num; |
535 HyphenState *hstate; | 481 HyphenState *hstate; |
536 | 482 |
537 for (state_num = 0; state_num < dict->num_states; state_num++) | 483 for (state_num = 0; state_num < dict->num_states; state_num++) |
(...skipping 591 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1129 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, | 1075 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, |
1130 clhmin, crhmin, 1, 1); | 1076 clhmin, crhmin, 1, 1); |
1131 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, | 1077 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, |
1132 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); | 1078 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); |
1133 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, | 1079 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, |
1134 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); | 1080 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); |
1135 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); | 1081 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos
, cut); |
1136 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); | 1082 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut
); |
1137 return 0; | 1083 return 0; |
1138 } | 1084 } |
OLD | NEW |