Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: third_party/hyphen/hyphen.c

Issue 9545017: Adds a hy-phen-ator. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both 1 /* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
2 * licenses follows. 2 * licenses follows.
3 */ 3 */
4 4
5 /* LibHnj - a library for high quality hyphenation and justification 5 /* LibHnj - a library for high quality hyphenation and justification
6 * Copyright (C) 1998 Raph Levien, 6 * Copyright (C) 1998 Raph Levien,
7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 7 * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) 8 * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) 9 * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo)
10 * 10 *
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 HashEntry *e; 235 HashEntry *e;
236 236
237 for (i = 0; i < HASH_SIZE; i++) 237 for (i = 0; i < HASH_SIZE; i++)
238 for (e = global->entries[i]; e; e = e->next) 238 for (e = global->entries[i]; e; e = e->next)
239 if (e->val == state) 239 if (e->val == state)
240 return e->key; 240 return e->key;
241 return NULL; 241 return NULL;
242 } 242 }
243 #endif 243 #endif
244 244
245 #ifdef HYPHEN_CHROME_CLIENT
246 typedef struct {
247 const unsigned char *data;
248 size_t offset;
249 size_t size;
250 } hnj_file;
251
252 static hnj_file *
253 hnj_fopen (const unsigned char *data, size_t size)
254 {
255 hnj_file *f;
256
257 f = hnj_malloc (sizeof(hnj_file));
258 if (f == NULL)
259 return NULL;
260 f->offset = 0;
261 f->data = data;
262 f->size = size;
263 return f;
264 }
265
266 static void
267 hnj_fclose (hnj_file *f)
268 {
269 hnj_free (f);
270 }
271
272 static char *
273 hnj_fgets (char *s, int size, hnj_file *f)
274 {
275 int i;
276
277 if (f->offset >= f->size)
278 return NULL;
279 for (i = 0; i < size - 1; i++) {
280 char c;
281
282 if (f->offset >= f->size)
283 break;
284 c = f->data[f->offset++];
285 if (c == '\r' || c == '\n')
286 break;
287 s[i] = c;
288 }
289 s[i] = '\0';
290 return s;
291 }
292 #else
293 typedef FILE hnj_file;
294 #define hnj_fopen(fn, mode) fopen((fn), (mode))
295 #define hnj_fclose(f) fclose(f)
296 #define hnj_fgets(s, size, f) fgets((s), (size), (f))
297 #endif
298
299 #ifdef HYPHEN_CHROME_CLIENT
300 HyphenDict *
301 hnj_hyphen_load (const unsigned char *data, size_t size)
302 #else
245 HyphenDict * 303 HyphenDict *
246 hnj_hyphen_load (const char *fn) 304 hnj_hyphen_load (const char *fn)
305 #endif
247 { 306 {
248 HyphenDict *dict[2]; 307 HyphenDict *dict[2];
249 HashTab *hashtab; 308 HashTab *hashtab;
250 FILE *f; 309 hnj_file *f;
251 char buf[MAX_CHARS]; 310 char buf[MAX_CHARS];
252 char word[MAX_CHARS]; 311 char word[MAX_CHARS];
253 char pattern[MAX_CHARS]; 312 char pattern[MAX_CHARS];
254 char * repl; 313 char * repl;
255 signed char replindex; 314 signed char replindex;
256 signed char replcut; 315 signed char replcut;
257 int state_num = 0, last_state; 316 int state_num = 0, last_state;
258 int i, j, k; 317 int i, j, k;
259 char ch; 318 char ch;
260 int found; 319 int found;
261 HashEntry *e; 320 HashEntry *e;
262 int nextlevel = 0; 321 int nextlevel = 0;
263 322
264 f = fopen (fn, "r"); 323 #ifdef HYPHEN_CHROME_CLIENT
324 f = hnj_fopen (data, size);
325 #else
326 f = hnj_fopen (fn, "r");
327 #endif
265 if (f == NULL) 328 if (f == NULL)
266 return NULL; 329 return NULL;
267 330
268 // loading one or two dictionaries (separated by NEXTLEVEL keyword) 331 // loading one or two dictionaries (separated by NEXTLEVEL keyword)
269 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 332 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
270 hashtab = hnj_hash_new (); 333 hashtab = hnj_hash_new ();
271 #ifdef VERBOSE 334 #ifdef VERBOSE
272 global = hashtab; 335 global = hashtab;
273 #endif 336 #endif
274 hnj_hash_insert (hashtab, "", 0); 337 hnj_hash_insert (hashtab, "", 0);
275 dict[k] = hnj_malloc (sizeof(HyphenDict)); 338 dict[k] = hnj_malloc (sizeof(HyphenDict));
276 dict[k]->num_states = 1; 339 dict[k]->num_states = 1;
277 dict[k]->states = hnj_malloc (sizeof(HyphenState)); 340 dict[k]->states = hnj_malloc (sizeof(HyphenState));
278 dict[k]->states[0].match = NULL; 341 dict[k]->states[0].match = NULL;
279 dict[k]->states[0].repl = NULL; 342 dict[k]->states[0].repl = NULL;
280 dict[k]->states[0].fallback_state = -1; 343 dict[k]->states[0].fallback_state = -1;
281 dict[k]->states[0].num_trans = 0; 344 dict[k]->states[0].num_trans = 0;
282 dict[k]->states[0].trans = NULL; 345 dict[k]->states[0].trans = NULL;
283 dict[k]->nextlevel = NULL; 346 dict[k]->nextlevel = NULL;
284 dict[k]->lhmin = 0; 347 dict[k]->lhmin = 0;
285 dict[k]->rhmin = 0; 348 dict[k]->rhmin = 0;
286 dict[k]->clhmin = 0; 349 dict[k]->clhmin = 0;
287 dict[k]->crhmin = 0; 350 dict[k]->crhmin = 0;
288 351
289 /* read in character set info */ 352 /* read in character set info */
290 if (k == 0) { 353 if (k == 0) {
291 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; 354 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
292 if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { 355 if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
293 for (i=0;i<MAX_NAME;i++) 356 for (i=0;i<MAX_NAME;i++)
294 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) 357 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
295 dict[k]->cset[i] = 0; 358 dict[k]->cset[i] = 0;
296 } else { 359 } else {
297 dict[k]->cset[0] = 0; 360 dict[k]->cset[0] = 0;
298 } 361 }
299 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); 362 dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
300 } else { 363 } else {
301 strcpy(dict[k]->cset, dict[0]->cset); 364 strcpy(dict[k]->cset, dict[0]->cset);
302 dict[k]->utf8 = dict[0]->utf8; 365 dict[k]->utf8 = dict[0]->utf8;
303 } 366 }
304 367
305 while (fgets (buf, sizeof(buf), f) != NULL) 368 while (hnj_fgets (buf, sizeof(buf), f) != NULL)
306 { 369 {
307 if (buf[0] != '%') 370 if (buf[0] != '%')
308 { 371 {
309 if (strncmp(buf, "NEXTLEVEL", 9) == 0) { 372 if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
310 nextlevel = 1; 373 nextlevel = 1;
311 break; 374 break;
312 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { 375 } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
313 dict[k]->lhmin = atoi(buf + 13); 376 dict[k]->lhmin = atoi(buf + 13);
314 continue; 377 continue;
315 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { 378 } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
361 i = 0; 424 i = 0;
362 if (!repl) { 425 if (!repl) {
363 /* Optimize away leading zeroes */ 426 /* Optimize away leading zeroes */
364 for (; pattern[i] == '0'; i++); 427 for (; pattern[i] == '0'; i++);
365 } else { 428 } else {
366 if (*word == '.') i++; 429 if (*word == '.') i++;
367 /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ 430 /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
368 if (dict[k]->utf8) { 431 if (dict[k]->utf8) {
369 int pu = -1; /* unicode character position */ 432 int pu = -1; /* unicode character position */
370 int ps = -1; /* unicode start position (original replinde x) */ 433 int ps = -1; /* unicode start position (original replinde x) */
371 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ 434 size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position * /
372 for (; pc < (strlen(word) + 1); pc++) { 435 for (; pc < (strlen(word) + 1); pc++) {
373 /* beginning of an UTF-8 character (not '10' start bits) */ 436 /* beginning of an UTF-8 character (not '10' start bits) */
374 if ((((unsigned char) word[pc]) >> 6) != 2) pu++; 437 if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
375 if ((ps < 0) && (replindex == pu)) { 438 if ((ps < 0) && (replindex == pu)) {
376 ps = replindex; 439 ps = replindex;
377 replindex = (signed char) pc; 440 replindex = (signed char) pc;
378 } 441 }
379 if ((ps >= 0) && ((pu - ps) == replcut)) { 442 if ((ps >= 0) && ((pu - ps) == replcut)) {
380 replcut = (signed char) (pc - replindex); 443 replcut = (signed char) (pc - replindex);
381 break; 444 break;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
454 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, 517 printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch,
455 dict[k]->states[e->val].trans[j].new_state); 518 dict[k]->states[e->val].trans[j].new_state);
456 } 519 }
457 #endif 520 #endif
458 521
459 #ifndef VERBOSE 522 #ifndef VERBOSE
460 hnj_hash_free (hashtab); 523 hnj_hash_free (hashtab);
461 #endif 524 #endif
462 state_num = 0; 525 state_num = 0;
463 } 526 }
464 fclose(f); 527 hnj_fclose(f);
465 if (k == 2) dict[0]->nextlevel = dict[1]; 528 if (k == 2) dict[0]->nextlevel = dict[1];
466 return dict[0]; 529 return dict[0];
467 } 530 }
468 531
469 void hnj_hyphen_free (HyphenDict *dict) 532 void hnj_hyphen_free (HyphenDict *dict)
470 { 533 {
471 int state_num; 534 int state_num;
472 HyphenState *hstate; 535 HyphenState *hstate;
473 536
474 for (state_num = 0; state_num < dict->num_states; state_num++) 537 for (state_num = 0; state_num < dict->num_states; state_num++)
(...skipping 591 matching lines...) Expand 10 before | Expand all | Expand 10 after
1066 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, 1129 hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
1067 clhmin, crhmin, 1, 1); 1130 clhmin, crhmin, 1, 1);
1068 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, 1131 hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
1069 rep, pos, cut, (lhmin > 0 ? lhmin : 2)); 1132 rep, pos, cut, (lhmin > 0 ? lhmin : 2));
1070 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, 1133 hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
1071 rep, pos, cut, (rhmin > 0 ? rhmin : 2)); 1134 rep, pos, cut, (rhmin > 0 ? rhmin : 2));
1072 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos , cut); 1135 if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos , cut);
1073 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut ); 1136 if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut );
1074 return 0; 1137 return 0;
1075 } 1138 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698