third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc - Issue 1476763003: Roll HarfBuzz to 1.1.1

Side by Side Diff: third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc

Issue 1476763003: Roll HarfBuzz to 1.1.1 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: windows line height rebaseline Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright © 2011,2012 Google, Inc.	2 * Copyright © 2011,2012 Google, Inc.

3 *	3 *

4 * This is part of HarfBuzz, a text shaping library.	4 * This is part of HarfBuzz, a text shaping library.

5 *	5 *

6 * Permission is hereby granted, without written agreement and without	6 * Permission is hereby granted, without written agreement and without

7 * license or royalty fees, to use, copy, modify, and distribute this	7 * license or royalty fees, to use, copy, modify, and distribute this

8 * software and its documentation for any purpose, provided that the	8 * software and its documentation for any purpose, provided that the

9 * above copyright notice and the following two paragraphs appear in	9 * above copyright notice and the following two paragraphs appear in

10 * all copies of this software.	10 * all copies of this software.

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 * though their NFC may be different.	55 * though their NFC may be different.

56 *	56 *

57 * - When a font has a precomposed character for a sequence but the 'ccmp'	57 * - When a font has a precomposed character for a sequence but the 'ccmp'

58 * feature in the font is not adequate, use the precomposed character	58 * feature in the font is not adequate, use the precomposed character

59 * which typically has better mark positioning.	59 * which typically has better mark positioning.

60 *	60 *

61 * - When a font does not support a combining mark, but supports it precompose d	61 * - When a font does not support a combining mark, but supports it precompose d

62 * with previous base, use that. This needs the itemizer to have this	62 * with previous base, use that. This needs the itemizer to have this

63 * knowledge too. We need to provide assistance to the itemizer.	63 * knowledge too. We need to provide assistance to the itemizer.

64 *	64 *

65 * - When a font does not support a character but supports its decomposition,	65 * - When a font does not support a character but supports its canonical

66 * well, use the decomposition (preferring the canonical decomposition, but	66 * decomposition, well, use the decomposition.

67 * falling back to the compatibility decomposition if necessary). The

68 * compatibility decomposition is really nice to have, for characters like

69 * ellipsis, or various-sized space characters.

70 *	67 *

71 * - The complex shapers can customize the compose and decompose functions to	68 * - The complex shapers can customize the compose and decompose functions to

72 * offload some of their requirements to the normalizer. For example, the	69 * offload some of their requirements to the normalizer. For example, the

73 * Indic shaper may want to disallow recomposing of two matras.	70 * Indic shaper may want to disallow recomposing of two matras.

74 *

75 * - We try compatibility decomposition if decomposing through canonical

76 * decomposition alone failed to find a sequence that the font supports.

77 * We don't try compatibility decomposition recursively during the canonical

78 * decomposition phase. This has minimal impact. There are only a handful

79 * of Greek letter that have canonical decompositions that include character s

80 * with compatibility decomposition. Those can be found using this command:

81 *

82 * egrep "`echo -n ';('; grep ';<' UnicodeData.txt \| cut -d';' -f1 \| tr '\n ' '\|'; echo ') '`" UnicodeData.txt

83 */	71 */

84	72

85 static bool	73 static bool

86 decompose_unicode (const hb_ot_shape_normalize_context_t *c,	74 decompose_unicode (const hb_ot_shape_normalize_context_t *c,

87 hb_codepoint_t ab,	75 hb_codepoint_t ab,

88 hb_codepoint_t *a,	76 hb_codepoint_t *a,

89 hb_codepoint_t *b)	77 hb_codepoint_t *b)

90 {	78 {

91 return c->unicode->decompose (ab, a, b);	79 return c->unicode->decompose (ab, a, b);

92 }	80 }

(...skipping 10 matching lines...) Expand all Loading...
103 static inline void	91 static inline void

104 set_glyph (hb_glyph_info_t &info, hb_font_t *font)	92 set_glyph (hb_glyph_info_t &info, hb_font_t *font)

105 {	93 {

106 font->get_glyph (info.codepoint, 0, &info.glyph_index());	94 font->get_glyph (info.codepoint, 0, &info.glyph_index());

107 }	95 }

108	96

109 static inline void	97 static inline void

110 output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph)	98 output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph)

111 {	99 {

112 buffer->cur().glyph_index() = glyph;	100 buffer->cur().glyph_index() = glyph;

113 buffer->output_glyph (unichar);	101 buffer->output_glyph (unichar); /* This is very confusing indeed. */

114 _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer->unicode);	102 _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer);

115 }	103 }

116	104

117 static inline void	105 static inline void

118 next_char (hb_buffer_t *buffer, hb_codepoint_t glyph)	106 next_char (hb_buffer_t *buffer, hb_codepoint_t glyph)

119 {	107 {

120 buffer->cur().glyph_index() = glyph;	108 buffer->cur().glyph_index() = glyph;

121 buffer->next_glyph ();	109 buffer->next_glyph ();

122 }	110 }

123	111

124 static inline void	112 static inline void

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171 return 0;	159 return 0;

172 }	160 }

173	161

174 static inline void	162 static inline void

175 decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor test)	163 decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor test)

176 {	164 {

177 hb_buffer_t * const buffer = c->buffer;	165 hb_buffer_t * const buffer = c->buffer;

178 hb_codepoint_t u = buffer->cur().codepoint;	166 hb_codepoint_t u = buffer->cur().codepoint;

179 hb_codepoint_t glyph;	167 hb_codepoint_t glyph;

180	168

181 /* Kind of a cute waterfall here... */

182 if (shortest && c->font->get_glyph (u, 0, &glyph))	169 if (shortest && c->font->get_glyph (u, 0, &glyph))

	170 {

183 next_char (buffer, glyph);	171 next_char (buffer, glyph);

184 else if (decompose (c, shortest, u))	172 return;

	173 }

	174

	175 if (decompose (c, shortest, u))

	176 {

185 skip_char (buffer);	177 skip_char (buffer);

186 else if (!shortest && c->font->get_glyph (u, 0, &glyph))	178 return;

	179 }

	180

	181 if (!shortest && c->font->get_glyph (u, 0, &glyph))

	182 {

187 next_char (buffer, glyph);	183 next_char (buffer, glyph);

188 else	184 return;

189 next_char (buffer, glyph); /* glyph is initialized in earlier branches. */	185 }

	186

	187 if (_hb_glyph_info_is_unicode_space (&buffer->cur()))

	188 {

	189 hb_codepoint_t space_glyph;

	190 hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_typ e (u);

	191 if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_glyph (0x002 0u, 0, &space_glyph))

	192 {

	193 _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type );

	194 next_char (buffer, space_glyph);

	195 buffer->scratch_flags \|= HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK;

	196 return;

	197 }

	198 }

	199

	200 if (u == 0x2011u)

	201 {

	202 /* U+2011 is the only sensible character that is a no-break version of anoth er character

	203 * and not a space. The space ones are handled already. Handle this lone o ne. */

	204 hb_codepoint_t other_glyph;

	205 if (c->font->get_glyph (0x2010u, 0, &other_glyph))

	206 {

	207 next_char (buffer, other_glyph);

	208 return;

	209 }

	210 }

	211

	212 next_char (buffer, glyph); /* glyph is initialized in earlier branches. */

190 }	213 }

191	214

192 static inline void	215 static inline void

193 handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, uns igned int end, bool short_circuit)	216 handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, uns igned int end, bool short_circuit)

194 {	217 {

195 /* TODO Currently if there's a variation-selector we give-up, it's just too ha rd. */	218 /* TODO Currently if there's a variation-selector we give-up, it's just too ha rd. */

196 hb_buffer_t * const buffer = c->buffer;	219 hb_buffer_t * const buffer = c->buffer;

197 hb_font_t * const font = c->font;	220 hb_font_t * const font = c->font;

198 for (; buffer->idx < end - 1;) {	221 for (; buffer->idx < end - 1 && !buffer->in_error;) {

199 if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepo int))) {	222 if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepo int))) {

200 /* The next two lines are some ugly lines... But work. */	223 /* The next two lines are some ugly lines... But work. */

201 if (font->get_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, & buffer->cur().glyph_index()))	224 if (font->get_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, & buffer->cur().glyph_index()))

202 {	225 {

203 buffer->replace_glyphs (2, 1, &buffer->cur().codepoint);	226 buffer->replace_glyphs (2, 1, &buffer->cur().codepoint);

204 }	227 }

205 else	228 else

206 {	229 {

207 /* Just pass on the two characters separately, let GSUB do its magic. */	230 /* Just pass on the two characters separately, let GSUB do its magic. */

208 set_glyph (buffer->cur(), font);	231 set_glyph (buffer->cur(), font);

(...skipping 15 matching lines...) Expand all Loading...
224 if (likely (buffer->idx < end)) {	247 if (likely (buffer->idx < end)) {

225 set_glyph (buffer->cur(), font);	248 set_glyph (buffer->cur(), font);

226 buffer->next_glyph ();	249 buffer->next_glyph ();

227 }	250 }

228 }	251 }

229	252

230 static inline void	253 static inline void

231 decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit)	254 decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit)

232 {	255 {

233 hb_buffer_t * const buffer = c->buffer;	256 hb_buffer_t * const buffer = c->buffer;

234 for (unsigned int i = buffer->idx; i < end; i++)	257 for (unsigned int i = buffer->idx; i < end && !buffer->in_error; i++)

235 if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepo int))) {	258 if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepo int))) {

236 handle_variation_selector_cluster (c, end, short_circuit);	259 handle_variation_selector_cluster (c, end, short_circuit);

237 return;	260 return;

238 }	261 }

239	262

240 while (buffer->idx < end)	263 while (buffer->idx < end && !buffer->in_error)

241 decompose_current_character (c, short_circuit);	264 decompose_current_character (c, short_circuit);

242 }	265 }

243	266

244 static inline void	267 static inline void

245 decompose_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, b ool might_short_circuit, bool always_short_circuit)	268 decompose_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, b ool might_short_circuit, bool always_short_circuit)

246 {	269 {

247 if (likely (c->buffer->idx + 1 == end))	270 if (likely (c->buffer->idx + 1 == end))

248 decompose_current_character (c, might_short_circuit);	271 decompose_current_character (c, might_short_circuit);

249 else	272 else

250 decompose_multi_char_cluster (c, end, always_short_circuit);	273 decompose_multi_char_cluster (c, end, always_short_circuit);

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
290 * separate rounds: decompose, reorder, recompose (if desired). Currently	313 * separate rounds: decompose, reorder, recompose (if desired). Currently

291 * this makes two buffer swaps. We can make it faster by moving the last	314 * this makes two buffer swaps. We can make it faster by moving the last

292 * two rounds into the inner loop for the first round, but it's more readable	315 * two rounds into the inner loop for the first round, but it's more readable

293 * this way. */	316 * this way. */

294	317

295	318

296 /* First round, decompose */	319 /* First round, decompose */

297	320

298 buffer->clear_output ();	321 buffer->clear_output ();

299 count = buffer->len;	322 count = buffer->len;

300 for (buffer->idx = 0; buffer->idx < count;)	323 for (buffer->idx = 0; buffer->idx < count && !buffer->in_error;)

301 {	324 {

302 unsigned int end;	325 unsigned int end;

303 for (end = buffer->idx + 1; end < count; end++)	326 for (end = buffer->idx + 1; end < count; end++)

304 if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_gener al_category (&buffer->info[end]))))	327 if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_gener al_category (&buffer->info[end]))))

305 break;	328 break;

306	329

307 decompose_cluster (&c, end, might_short_circuit, always_short_circuit);	330 decompose_cluster (&c, end, might_short_circuit, always_short_circuit);

308 }	331 }

309 buffer->swap_buffers ();	332 buffer->swap_buffers ();

310	333

(...skipping 29 matching lines...) Expand all Loading...
340	363

341 /* Third round, recompose */	364 /* Third round, recompose */

342	365

343 /* As noted in the comment earlier, we don't try to combine	366 /* As noted in the comment earlier, we don't try to combine

344 * ccc=0 chars with their previous Starter. */	367 * ccc=0 chars with their previous Starter. */

345	368

346 buffer->clear_output ();	369 buffer->clear_output ();

347 count = buffer->len;	370 count = buffer->len;

348 unsigned int starter = 0;	371 unsigned int starter = 0;

349 buffer->next_glyph ();	372 buffer->next_glyph ();

350 while (buffer->idx < count)	373 while (buffer->idx < count && !buffer->in_error)

351 {	374 {

352 hb_codepoint_t composed, glyph;	375 hb_codepoint_t composed, glyph;

353 if (/* We don't try to compose a non-mark character with it's preceding star ter.	376 if (/* We don't try to compose a non-mark character with it's preceding star ter.

354 * This is both an optimization to avoid trying to compose every two nei ghboring	377 * This is both an optimization to avoid trying to compose every two nei ghboring

355 * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul	378 * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul

356 * fonts are not designed to mix-and-match pre-composed syllables and Ja mo. */	379 * fonts are not designed to mix-and-match pre-composed syllables and Ja mo. */

357 HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())) &&	380 HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())) &&

358 /* If there's anything between the starter and this char, they should ha ve CCC	381 /* If there's anything between the starter and this char, they should ha ve CCC

359 * smaller than this character's. */	382 * smaller than this character's. */

360 (starter == buffer->out_len - 1 \|\|	383 (starter == buffer->out_len - 1 \|\|

361 _hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_gly ph_info_get_modified_combining_class (&buffer->cur())) &&	384 _hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_gly ph_info_get_modified_combining_class (&buffer->cur())) &&

362 /* And compose. */	385 /* And compose. */

363 c.compose (&c,	386 c.compose (&c,

364 buffer->out_info[starter].codepoint,	387 buffer->out_info[starter].codepoint,

365 buffer->cur().codepoint,	388 buffer->cur().codepoint,

366 &composed) &&	389 &composed) &&

367 /* And the font has glyph for the composite. */	390 /* And the font has glyph for the composite. */

368 font->get_glyph (composed, 0, &glyph))	391 font->get_glyph (composed, 0, &glyph))

369 {	392 {

370 /* Composes. */	393 /* Composes. */

371 buffer->next_glyph (); /* Copy to out-buffer. */	394 buffer->next_glyph (); /* Copy to out-buffer. */

372 if (unlikely (buffer->in_error))	395 if (unlikely (buffer->in_error))

373 return;	396 return;

374 buffer->merge_out_clusters (starter, buffer->out_len);	397 buffer->merge_out_clusters (starter, buffer->out_len);

375 buffer->out_len--; /* Remove the second composable. */	398 buffer->out_len--; /* Remove the second composable. */

376 /* Modify starter and carry on. */	399 /* Modify starter and carry on. */

377 buffer->out_info[starter].codepoint = composed;	400 buffer->out_info[starter].codepoint = composed;

378 buffer->out_info[starter].glyph_index() = glyph;	401 buffer->out_info[starter].glyph_index() = glyph;

379 _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unic ode);	402 _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);

380	403

381 continue;	404 continue;

382 }	405 }

383	406

384 /* Blocked, or doesn't compose. */	407 /* Blocked, or doesn't compose. */

385 buffer->next_glyph ();	408 buffer->next_glyph ();

386	409

387 if (_hb_glyph_info_get_modified_combining_class (&buffer->prev()) == 0)	410 if (_hb_glyph_info_get_modified_combining_class (&buffer->prev()) == 0)

388 starter = buffer->out_len - 1;	411 starter = buffer->out_len - 1;

389 }	412 }

390 buffer->swap_buffers ();	413 buffer->swap_buffers ();

391	414

392 }	415 }

OLD	NEW

« no previous file with comments | « third_party/harfbuzz-ng/src/hb-ot-shape-fallback-private.hh ('k') | third_party/harfbuzz-ng/src/hb-private.hh » ('j') | no next file with comments »