Index: third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc |
diff --git a/third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc b/third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc |
index 4b70a9d1a4936cb571e7546b33f8f9225627f7cf..c553f7288fcf1c0de452029ea56cec1e92c45124 100644 |
--- a/third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc |
+++ b/third_party/harfbuzz-ng/src/hb-ot-shape-normalize.cc |
@@ -62,24 +62,12 @@ |
* with previous base, use that. This needs the itemizer to have this |
* knowledge too. We need to provide assistance to the itemizer. |
* |
- * - When a font does not support a character but supports its decomposition, |
- * well, use the decomposition (preferring the canonical decomposition, but |
- * falling back to the compatibility decomposition if necessary). The |
- * compatibility decomposition is really nice to have, for characters like |
- * ellipsis, or various-sized space characters. |
+ * - When a font does not support a character but supports its canonical |
+ * decomposition, well, use the decomposition. |
* |
* - The complex shapers can customize the compose and decompose functions to |
* offload some of their requirements to the normalizer. For example, the |
* Indic shaper may want to disallow recomposing of two matras. |
- * |
- * - We try compatibility decomposition if decomposing through canonical |
- * decomposition alone failed to find a sequence that the font supports. |
- * We don't try compatibility decomposition recursively during the canonical |
- * decomposition phase. This has minimal impact. There are only a handful |
- * of Greek letter that have canonical decompositions that include characters |
- * with compatibility decomposition. Those can be found using this command: |
- * |
- * egrep "`echo -n ';('; grep ';<' UnicodeData.txt | cut -d';' -f1 | tr '\n' '|'; echo ') '`" UnicodeData.txt |
*/ |
static bool |
@@ -110,8 +98,8 @@ static inline void |
output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph) |
{ |
buffer->cur().glyph_index() = glyph; |
- buffer->output_glyph (unichar); |
- _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer->unicode); |
+ buffer->output_glyph (unichar); /* This is very confusing indeed. */ |
+ _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer); |
} |
static inline void |
@@ -178,15 +166,50 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor |
hb_codepoint_t u = buffer->cur().codepoint; |
hb_codepoint_t glyph; |
- /* Kind of a cute waterfall here... */ |
if (shortest && c->font->get_glyph (u, 0, &glyph)) |
+ { |
next_char (buffer, glyph); |
- else if (decompose (c, shortest, u)) |
+ return; |
+ } |
+ |
+ if (decompose (c, shortest, u)) |
+ { |
skip_char (buffer); |
- else if (!shortest && c->font->get_glyph (u, 0, &glyph)) |
+ return; |
+ } |
+ |
+ if (!shortest && c->font->get_glyph (u, 0, &glyph)) |
+ { |
next_char (buffer, glyph); |
- else |
- next_char (buffer, glyph); /* glyph is initialized in earlier branches. */ |
+ return; |
+ } |
+ |
+ if (_hb_glyph_info_is_unicode_space (&buffer->cur())) |
+ { |
+ hb_codepoint_t space_glyph; |
+ hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_type (u); |
+ if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_glyph (0x0020u, 0, &space_glyph)) |
+ { |
+ _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type); |
+ next_char (buffer, space_glyph); |
+ buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK; |
+ return; |
+ } |
+ } |
+ |
+ if (u == 0x2011u) |
+ { |
+ /* U+2011 is the only sensible character that is a no-break version of another character |
+ * and not a space. The space ones are handled already. Handle this lone one. */ |
+ hb_codepoint_t other_glyph; |
+ if (c->font->get_glyph (0x2010u, 0, &other_glyph)) |
+ { |
+ next_char (buffer, other_glyph); |
+ return; |
+ } |
+ } |
+ |
+ next_char (buffer, glyph); /* glyph is initialized in earlier branches. */ |
} |
static inline void |
@@ -195,7 +218,7 @@ handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, uns |
/* TODO Currently if there's a variation-selector we give-up, it's just too hard. */ |
hb_buffer_t * const buffer = c->buffer; |
hb_font_t * const font = c->font; |
- for (; buffer->idx < end - 1;) { |
+ for (; buffer->idx < end - 1 && !buffer->in_error;) { |
if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepoint))) { |
/* The next two lines are some ugly lines... But work. */ |
if (font->get_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, &buffer->cur().glyph_index())) |
@@ -231,13 +254,13 @@ static inline void |
decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit) |
{ |
hb_buffer_t * const buffer = c->buffer; |
- for (unsigned int i = buffer->idx; i < end; i++) |
+ for (unsigned int i = buffer->idx; i < end && !buffer->in_error; i++) |
if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepoint))) { |
handle_variation_selector_cluster (c, end, short_circuit); |
return; |
} |
- while (buffer->idx < end) |
+ while (buffer->idx < end && !buffer->in_error) |
decompose_current_character (c, short_circuit); |
} |
@@ -297,7 +320,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, |
buffer->clear_output (); |
count = buffer->len; |
- for (buffer->idx = 0; buffer->idx < count;) |
+ for (buffer->idx = 0; buffer->idx < count && !buffer->in_error;) |
{ |
unsigned int end; |
for (end = buffer->idx + 1; end < count; end++) |
@@ -347,7 +370,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, |
count = buffer->len; |
unsigned int starter = 0; |
buffer->next_glyph (); |
- while (buffer->idx < count) |
+ while (buffer->idx < count && !buffer->in_error) |
{ |
hb_codepoint_t composed, glyph; |
if (/* We don't try to compose a non-mark character with it's preceding starter. |
@@ -376,7 +399,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, |
/* Modify starter and carry on. */ |
buffer->out_info[starter].codepoint = composed; |
buffer->out_info[starter].glyph_index() = glyph; |
- _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode); |
+ _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); |
continue; |
} |