third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc - Issue 70193010: Update harfbuzz-ng to 0.9.24

Unified Diff: third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

Issue 70193010: Update harfbuzz-ng to 0.9.24 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/harfbuzz-ng/src/hb-ot-shape-complex-arabic-fallback.hh ('k') | third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic-machine.hh » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

diff --git a/third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc b/third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

index eb1e0be9defec9fe889967910650a7cbf8c04f4a..9edefe305dc8e8a04553034466460f1e01db46af 100644

--- a/third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

+++ b/third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

@@ -128,14 +128,6 @@ static const hb_codepoint_t ra_chars[] = {

0x179A, /* Khmer */ /* No Reph, Visual Repha */

};

-static inline indic_position_t

-consonant_position (hb_codepoint_t u)

- if ((u & ~0x007F) == 0x1780)

- return POS_BELOW_C; /* In Khmer coeng model, post and below forms should not be reordered. */

- return POS_BASE_C; /* Will recategorize later based on font lookups. */

static inline bool

is_ra (hb_codepoint_t u)

{

@@ -149,7 +141,7 @@ static inline bool

is_one_of (const hb_glyph_info_t &info, unsigned int flags)

{

/* If it ligated, all bets are off. */

- if (is_a_ligature (info)) return false;

+ if (_hb_glyph_info_ligated (&info)) return false;

return !!(FLAG (info.indic_category()) & flags);

}

@@ -160,12 +152,14 @@ is_joiner (const hb_glyph_info_t &info)

return is_one_of (info, JOINER_FLAGS);

}

+#define MEDIAL_FLAGS (FLAG (OT_CM) | FLAG (OT_CM2))

/* Note:

* We treat Vowels and placeholders as if they were consonants. This is safe because Vowels

* cannot happen in a consonant syllable. The plus side however is, we can call the

* consonant syllable logic from the vowel syllable function and get it all right! */

static inline bool

is_consonant (const hb_glyph_info_t &info)

{

@@ -194,15 +188,15 @@ set_indic_properties (hb_glyph_info_t &info)

/* The spec says U+0952 is OT_A. However, testing shows that Uniscribe

- * treats U+0951..U+0952 all as OT_VD.

+ * treats U+0951..U+0954 all behave similarly.

* TESTS:

* U+092E,U+0947,U+0952

* U+092E,U+0952,U+0947

* U+092E,U+0947,U+0951

* U+092E,U+0951,U+0947

- * */

+ */

if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))

- cat = OT_VD;

+ cat = OT_A;

if (unlikely (u == 0x17D1))

cat = OT_X;

@@ -220,7 +214,10 @@ set_indic_properties (hb_glyph_info_t &info)

else if (unlikely (u == 0x200C)) cat = OT_ZWNJ;

else if (unlikely (u == 0x200D)) cat = OT_ZWJ;

else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE;

- else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */

+ else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. Move it to the end. */

+ else if (unlikely (u == 0xA982)) cat = OT_SM; /* Javanese repha. */

+ else if (unlikely (u == 0xA9BE)) cat = OT_CM2; /* Javanese medial ya. */

+ else if (unlikely (u == 0xA9BD)) { cat = OT_M; pos = POS_POST_C; } /* Javanese vocalic r. */

if (cat == OT_Repha) {

/* There are two kinds of characters marked as Repha:

@@ -241,7 +238,7 @@ set_indic_properties (hb_glyph_info_t &info)

if ((FLAG (cat) & CONSONANT_FLAGS))

{

- pos = consonant_position (u);

+ pos = POS_BASE_C;

if (is_ra (u))

cat = OT_Ra;

}

@@ -249,7 +246,7 @@ set_indic_properties (hb_glyph_info_t &info)

{

pos = matra_position (u, pos);

}

- else if (cat == OT_SM || cat == OT_VD)

+ else if ((FLAG (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Avag))))

{

pos = POS_SMVD;

}

@@ -277,16 +274,16 @@ set_indic_properties (hb_glyph_info_t &info)

enum base_position_t {

BASE_POS_FIRST,

+ BASE_POS_LAST_SINHALA,

BASE_POS_LAST

};

enum reph_position_t {

- REPH_POS_DEFAULT = POS_BEFORE_POST,

REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,

REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,

REPH_POS_AFTER_SUB = POS_AFTER_SUB,

REPH_POS_BEFORE_POST = POS_BEFORE_POST,

- REPH_POS_AFTER_POST = POS_AFTER_POST

+ REPH_POS_AFTER_POST = POS_AFTER_POST,

+ REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH

};

enum reph_mode_t {

REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */

@@ -294,6 +291,15 @@ enum reph_mode_t {

REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */

REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */

};

+enum blwf_mode_t {

+ BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */

+ BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */

+};

+enum pref_len_t {

+ PREF_LEN_1 = 1,

+ PREF_LEN_2 = 2,

+ PREF_LEN_DONT_CARE = PREF_LEN_2

+};

struct indic_config_t

{

hb_script_t script;

@@ -302,23 +308,27 @@ struct indic_config_t

base_position_t base_pos;

reph_position_t reph_pos;

reph_mode_t reph_mode;

+ blwf_mode_t blwf_mode;

+ pref_len_t pref_len;

};

static const indic_config_t indic_configs[] =

{

/* Default. Should be first. */

- {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_DEFAULT, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_DEVANAGARI,true, 0x094D,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT},

- {HB_SCRIPT_BENGALI, true, 0x09CD,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_GURMUKHI, true, 0x0A4D,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_GUJARATI, true, 0x0ACD,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT},

- {HB_SCRIPT_ORIYA, true, 0x0B4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_TAMIL, true, 0x0BCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_TELUGU, true, 0x0C4D,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT},

- {HB_SCRIPT_KANNADA, true, 0x0CCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT},

- {HB_SCRIPT_MALAYALAM, true, 0x0D4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA},

- {HB_SCRIPT_SINHALA, false,0x0DCA,BASE_POS_FIRST,REPH_POS_AFTER_MAIN, REPH_MODE_EXPLICIT},

- {HB_SCRIPT_KHMER, false,0x17D2,BASE_POS_FIRST,REPH_POS_DEFAULT, REPH_MODE_VIS_REPHA},

+ {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_1},

+ {HB_SCRIPT_DEVANAGARI,true, 0x094D,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_BENGALI, true, 0x09CD,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_GURMUKHI, true, 0x0A4D,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_GUJARATI, true, 0x0ACD,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_ORIYA, true, 0x0B4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_TAMIL, true, 0x0BCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_2},

+ {HB_SCRIPT_TELUGU, true, 0x0C4D,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY, PREF_LEN_2},

+ {HB_SCRIPT_KANNADA, true, 0x0CCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY, PREF_LEN_2},

+ {HB_SCRIPT_MALAYALAM, true, 0x0D4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_2},

+ {HB_SCRIPT_SINHALA, false,0x0DCA,BASE_POS_LAST_SINHALA,

+ REPH_POS_AFTER_MAIN, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE},

+ {HB_SCRIPT_KHMER, false,0x17D2,BASE_POS_FIRST,REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_2},

+ {HB_SCRIPT_JAVANESE, false,0xA9C0,BASE_POS_FIRST,REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_1},

};

@@ -345,15 +355,17 @@ indic_features[] =

{HB_TAG('r','k','r','f'), F_GLOBAL},

{HB_TAG('p','r','e','f'), F_NONE},

{HB_TAG('b','l','w','f'), F_NONE},

- {HB_TAG('h','a','l','f'), F_NONE},

{HB_TAG('a','b','v','f'), F_NONE},

+ {HB_TAG('h','a','l','f'), F_NONE},

{HB_TAG('p','s','t','f'), F_NONE},

- {HB_TAG('c','f','a','r'), F_NONE},

{HB_TAG('v','a','t','u'), F_GLOBAL},

{HB_TAG('c','j','c','t'), F_GLOBAL},

+ {HB_TAG('c','f','a','r'), F_NONE},

* Other features.

* These features are applied all at once, after final_reordering.

+ * Default Bengali font in Windows for example has intermixed

+ * lookups for init,pres,abvs,blws features.

{HB_TAG('i','n','i','t'), F_NONE},

{HB_TAG('p','r','e','s'), F_GLOBAL},

@@ -377,12 +389,12 @@ enum {

_RKRF,

PREF,

BLWF,

- HALF,

ABVF,

+ HALF,

PSTF,

- CFAR,

_VATU,

_CJCT,

+ CFAR,

INIT,

_PRES,

@@ -410,6 +422,10 @@ static void

final_reordering (const hb_ot_shape_plan_t *plan,

hb_font_t *font,

hb_buffer_t *buffer);

+static void

+clear_syllables (const hb_ot_shape_plan_t *plan,

+ hb_font_t *font,

+ hb_buffer_t *buffer);

static void

collect_features_indic (hb_ot_shape_planner_t *plan)

@@ -435,14 +451,26 @@ collect_features_indic (hb_ot_shape_planner_t *plan)

for (; i < INDIC_NUM_FEATURES; i++) {

map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ);

}

+ map->add_global_bool_feature (HB_TAG('c','a','l','t'));

+ map->add_global_bool_feature (HB_TAG('c','l','i','g'));

+ map->add_gsub_pause (clear_syllables);

}

static void

override_features_indic (hb_ot_shape_planner_t *plan)

{

- /* Uniscribe does not apply 'kern'. */

+ /* Uniscribe does not apply 'kern' in Khmer. */

if (hb_options ().uniscribe_bug_compatible)

- plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);

+ {

+ switch ((hb_tag_t) plan->props.script)

+ {

+ case HB_SCRIPT_KHMER:

+ plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);

+ break;

+ }

plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);

}

@@ -450,8 +478,9 @@ override_features_indic (hb_ot_shape_planner_t *plan)

struct would_substitute_feature_t

{

- inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag)

+ inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)

{

+ zero_context = zero_context_;

map->get_stage_lookups (0/*GSUB*/,

map->get_feature_stage (0/*GSUB*/, feature_tag),

&lookups, &count);

@@ -459,7 +488,6 @@ struct would_substitute_feature_t

inline bool would_substitute (const hb_codepoint_t *glyphs,

unsigned int glyphs_count,

- bool zero_context,

hb_face_t *face) const

{

for (unsigned int i = 0; i < count; i++)

@@ -471,6 +499,7 @@ struct would_substitute_feature_t

private:

const hb_ot_map_t::lookup_map_t *lookups;

unsigned int count;

+ bool zero_context;

};

struct indic_shape_plan_t

@@ -526,10 +555,13 @@ data_create_indic (const hb_ot_shape_plan_t *plan)

indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FF) != '2');

indic_plan->virama_glyph = (hb_codepoint_t) -1;

- indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'));

- indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'));

- indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'));

- indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'));

+ /* Use zero-context would_substitute() matching for new-spec of the main

+ * Indic scripts, but not for old-spec or scripts with one spec only. */

+ bool zero_context = indic_plan->config->has_old_spec || !indic_plan->is_old_spec;

+ indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);

+ indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);

+ indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);

+ indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);

for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)

indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?

@@ -546,7 +578,8 @@ data_destroy_indic (void *data)

static indic_position_t

consonant_position_from_face (const indic_shape_plan_t *indic_plan,

- const hb_codepoint_t glyphs[2],

+ const hb_codepoint_t consonant,

+ const hb_codepoint_t virama,

hb_face_t *face)

{

/* For old-spec, the order of glyphs is Consonant,Virama,

@@ -559,16 +592,19 @@ consonant_position_from_face (const indic_shape_plan_t *indic_plan,

* 930,94D in 'blwf', not the expected 94D,930 (with new-spec

* table). As such, we simply match both sequences. Seems

* to work. */

- bool zero_context = indic_plan->is_old_spec ? false : true;

- hb_codepoint_t glyphs_r[2] = {glyphs[1], glyphs[0]};

- if (indic_plan->pref.would_substitute (glyphs , 2, zero_context, face) ||

- indic_plan->pref.would_substitute (glyphs_r, 2, zero_context, face))

- return POS_POST_C;

- if (indic_plan->blwf.would_substitute (glyphs , 2, zero_context, face) ||

- indic_plan->blwf.would_substitute (glyphs_r, 2, zero_context, face))

+ hb_codepoint_t glyphs[3] = {virama, consonant, virama};

+ if (indic_plan->blwf.would_substitute (glyphs , 2, face) ||

+ indic_plan->blwf.would_substitute (glyphs+1, 2, face))

return POS_BELOW_C;

- if (indic_plan->pstf.would_substitute (glyphs , 2, zero_context, face) ||

- indic_plan->pstf.would_substitute (glyphs_r, 2, zero_context, face))

+ if (indic_plan->pstf.would_substitute (glyphs , 2, face) ||

+ indic_plan->pstf.would_substitute (glyphs+1, 2, face))

+ return POS_POST_C;

+ unsigned int pref_len = indic_plan->config->pref_len;

+ if ((pref_len == PREF_LEN_2 &&

+ (indic_plan->pref.would_substitute (glyphs , 2, face) ||

+ indic_plan->pref.would_substitute (glyphs+1, 2, face)))

+ || (pref_len == PREF_LEN_1 &&

+ indic_plan->pref.would_substitute (glyphs+1, 1, face)))

return POS_POST_C;

return POS_BASE_C;

}

@@ -578,6 +614,7 @@ enum syllable_type_t {

consonant_syllable,

vowel_syllable,

standalone_cluster,

+ avagraha_cluster,

broken_cluster,

non_indic_cluster,

};

@@ -627,15 +664,18 @@ update_consonant_positions (const hb_ot_shape_plan_t *plan,

{

const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;

- hb_codepoint_t glyphs[2];

- if (indic_plan->get_virama_glyph (font, &glyphs[0]))

+ if (indic_plan->config->base_pos != BASE_POS_LAST)

+ return;

+ hb_codepoint_t virama;

+ if (indic_plan->get_virama_glyph (font, &virama))

{

hb_face_t *face = font->face;

unsigned int count = buffer->len;

for (unsigned int i = 0; i < count; i++)

if (buffer->info[i].indic_position() == POS_BASE_C) {

- glyphs[1] = buffer->info[i].codepoint;

- buffer->info[i].indic_position() = consonant_position_from_face (indic_plan, glyphs, face);

+ hb_codepoint_t consonant = buffer->info[i].codepoint;

+ buffer->info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face);

}

@@ -676,7 +716,8 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

* and has more than one consonant, Ra is excluded from candidates for

* base consonants. */

unsigned int limit = start;

- if (indic_plan->mask_array[RPHF] &&

+ if (indic_plan->config->reph_pos != REPH_POS_DONT_CARE &&

+ indic_plan->mask_array[RPHF] &&

start + 3 <= end &&

(

(indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||

@@ -685,7 +726,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

{

/* See if it matches the 'rphf' feature. */

hb_codepoint_t glyphs[2] = {info[start].codepoint, info[start + 1].codepoint};

- if (indic_plan->rphf.would_substitute (glyphs, ARRAY_LENGTH (glyphs), true, face))

+ if (indic_plan->rphf.would_substitute (glyphs, ARRAY_LENGTH (glyphs), face))

{

limit += 2;

while (limit < end && is_joiner (info[limit]))

@@ -757,9 +798,12 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

}

break;

- case BASE_POS_FIRST:

+ case BASE_POS_LAST_SINHALA:

{

- /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */

+ /* Sinhala base positioning is slightly different from main Indic, in that:

+ * 1. It's ZWJ behavior is different,

+ * 2. We don't need to look into the font for consonant positions.

+ */

if (!has_reph)

base = limit;

@@ -767,7 +811,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

/* Find the last base consonant that is not blocked by ZWJ. If there is

* a ZWJ right before a base consonant, that would request a subjoined form. */

for (unsigned int i = limit; i < end; i++)

- if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)

+ if (is_consonant (info[i]))

{

if (limit < i && info[i - 1].indic_category() == OT_ZWJ)

break;

@@ -777,7 +821,23 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

/* Mark all subsequent consonants as below. */

for (unsigned int i = base + 1; i < end; i++)

- if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C)

+ if (is_consonant (info[i]))

+ info[i].indic_position() = POS_BELOW_C;

+ }

+ break;

+ case BASE_POS_FIRST:

+ {

+ /* The first consonant is always the base. */

+ assert (indic_plan->config->reph_mode == REPH_MODE_VIS_REPHA);

+ assert (!has_reph);

+ base = start;

+ /* Mark all subsequent consonants as below. */

+ for (unsigned int i = base + 1; i < end; i++)

+ if (is_consonant (info[i]))

info[i].indic_position() = POS_BELOW_C;

}

break;

@@ -876,7 +936,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

indic_position_t last_pos = POS_START;

for (unsigned int i = start; i < end; i++)

{

- if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | HALANT_OR_COENG_FLAGS)))

+ if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS)))

{

info[i].indic_position() = last_pos;

if (unlikely (info[i].indic_category() == OT_H &&

@@ -902,33 +962,68 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

}

- /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */

+ /* For post-base consonants let them own anything before them

+ * since the last consonant or matra. */

{

- unsigned int last_halant = end;

+ unsigned int last = base;

for (unsigned int i = base + 1; i < end; i++)

- if (is_halant_or_coeng (info[i]))

- last_halant = i;

- else if (is_consonant (info[i])) {

- for (unsigned int j = last_halant; j < i; j++)

- if (info[j].indic_position() != POS_SMVD)

+ if (is_consonant (info[i]))

+ {

+ for (unsigned int j = last + 1; j < i; j++)

+ if (info[j].indic_position() < POS_SMVD)

info[j].indic_position() = info[i].indic_position();

- }

+ last = i;

+ } else if (info[i].indic_category() == OT_M)

+ last = i;

}

{

- /* Things are out-of-control for post base positions, they may shuffle

- * around like crazy, so merge clusters. For pre-base stuff, we handle

- * cluster issues in final reordering. */

- buffer->merge_clusters (base, end);

+ /* Use syllable() for sort accounting temporarily. */

+ unsigned int syllable = info[start].syllable();

+ for (unsigned int i = start; i < end; i++)

+ info[i].syllable() = i - start;

/* Sit tight, rock 'n roll! */

hb_bubble_sort (info + start, end - start, compare_indic_order);

/* Find base again */

base = end;

for (unsigned int i = start; i < end; i++)

- if (info[i].indic_position() == POS_BASE_C) {

- base = i;

+ if (info[i].indic_position() == POS_BASE_C)

+ {

+ base = i;

break;

}

+ /* Things are out-of-control for post base positions, they may shuffle

+ * around like crazy. In old-spec mode, we move halants around, so in

+ * that case merge all clusters after base. Otherwise, check the sort

+ * order and merge as needed.

+ * For pre-base stuff, we handle cluster issues in final reordering. */

+ if (indic_plan->is_old_spec || end - base > 127)

+ buffer->merge_clusters (base, end);

+ else

+ {

+ /* Note! syllable() is a one-byte field. */

+ for (unsigned int i = base; i < end; i++)

+ if (info[i].syllable() != 255)

+ {

+ unsigned int max = i;

+ unsigned int j = start + info[i].syllable();

+ while (j != i)

+ {

+ max = MAX (max, j);

+ unsigned int next = start + info[j].syllable();

+ info[j].syllable() = 255; /* So we don't process j later again. */

+ j = next;

+ }

+ if (i != max)

+ buffer->merge_clusters (i, max + 1);

+ }

+ /* Put syllable back in. */

+ for (unsigned int i = start; i < end; i++)

+ info[i].syllable() = syllable;

}

/* Setup masks now */

@@ -942,6 +1037,9 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

/* Pre-base */

mask = indic_plan->mask_array[HALF];

+ if (!indic_plan->is_old_spec &&

+ indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)

+ mask |= indic_plan->mask_array[BLWF];

for (unsigned int i = start; i < base; i++)

info[i].mask |= mask;

/* Base */

@@ -986,15 +1084,19 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

}

- if (indic_plan->mask_array[PREF] && base + 2 < end)

+ unsigned int pref_len = indic_plan->config->pref_len;

+ if (indic_plan->mask_array[PREF] && base + pref_len < end)

{

+ assert (1 <= pref_len && pref_len <= 2);

/* Find a Halant,Ra sequence and mark it for pre-base reordering processing. */

- for (unsigned int i = base + 1; i + 1 < end; i++) {

- hb_codepoint_t glyphs[2] = {info[i].codepoint, info[i + 1].codepoint};

- if (indic_plan->pref.would_substitute (glyphs, ARRAY_LENGTH (glyphs), true, face))

+ for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {

+ hb_codepoint_t glyphs[2];

+ for (unsigned int j = 0; j < pref_len; j++)

+ glyphs[j] = info[i + j].codepoint;

+ if (indic_plan->pref.would_substitute (glyphs, pref_len, face))

{

- info[i++].mask |= indic_plan->mask_array[PREF];

+ for (unsigned int j = 0; j < pref_len; j++)

+ info[i++].mask |= indic_plan->mask_array[PREF];

/* Mark the subsequent stuff with 'cfar'. Used in Khmer.

* Read the feature spec.

@@ -1002,8 +1104,9 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,

* U+1784,U+17D2,U+179A,U+17D2,U+1782

* U+1784,U+17D2,U+1782,U+17D2,U+179A

- for (; i < end; i++)

- info[i].mask |= indic_plan->mask_array[CFAR];

+ if (indic_plan->mask_array[CFAR])

+ for (; i < end; i++)

+ info[i].mask |= indic_plan->mask_array[CFAR];

break;

}

@@ -1074,6 +1177,16 @@ initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,

}

static void

+initial_reordering_avagraha_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,

+ hb_face_t *face HB_UNUSED,

+ hb_buffer_t *buffer HB_UNUSED,

+ unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)

+ /* Nothing to do right now. If we ever switch to using the output

+ * buffer in the reordering process, we'd need to next_glyph() here. */

+static void

initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,

hb_face_t *face HB_UNUSED,

hb_buffer_t *buffer HB_UNUSED,

@@ -1095,6 +1208,7 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,

case consonant_syllable: initial_reordering_consonant_syllable (plan, face, buffer, start, end); return;

case vowel_syllable: initial_reordering_vowel_syllable (plan, face, buffer, start, end); return;

case standalone_cluster: initial_reordering_standalone_cluster (plan, face, buffer, start, end); return;

+ case avagraha_cluster: initial_reordering_avagraha_cluster (plan, face, buffer, start, end); return;

case broken_cluster: initial_reordering_broken_cluster (plan, face, buffer, start, end); return;

case non_indic_cluster: initial_reordering_non_indic_cluster (plan, face, buffer, start, end); return;

}

@@ -1264,9 +1378,9 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

info[new_pos] = tmp;

if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */

base--;

+ buffer->merge_clusters (new_pos, MIN (end, base + 1));

new_pos--;

}

- buffer->merge_clusters (new_pos, MIN (end, base + 1));

} else {

for (unsigned int i = start; i < base; i++)

if (info[i].indic_position () == POS_PRE_M) {

@@ -1286,17 +1400,24 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

* before post-base consonant forms, and after post-base consonant forms.

- /* If there's anything after the Ra that has the REPH pos, it ought to be halant.

- * Which means that the font has failed to ligate the Reph. In which case, we

- * shouldn't move. */

+ /* Two cases:

+ *

+ * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then

+ * we should only move it if the sequence ligated to the repha form.

+ *

+ * - If repha is encoded separately and in the logical position, we should only

+ * move it if it did NOT ligate. If it ligated, it's probably the font trying

+ * to make it work without the reordering.

+ */

if (start + 1 < end &&

info[start].indic_position() == POS_RA_TO_BECOME_REPH &&

- info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH)

+ ((info[start].indic_category() == OT_Repha) ^

+ _hb_glyph_info_ligated (&info[start])))

{

unsigned int new_reph_pos;

reph_position_t reph_pos = indic_plan->config->reph_pos;

- /* XXX Figure out old behavior too */

+ assert (reph_pos != REPH_POS_DONT_CARE);

/* 1. If reph should be positioned after post-base consonant forms,

* proceed to step 5.

@@ -1338,7 +1459,6 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

if (reph_pos == REPH_POS_AFTER_MAIN)

{

new_reph_pos = base;

- /* XXX Skip potential pre-base reordering Ra. */

while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN)

new_reph_pos++;

if (new_reph_pos < end)

@@ -1411,8 +1531,7 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

reph_move:

{

- /* Yay, one big cluster! Merge before moving. */

- buffer->merge_clusters (start, end);

+ buffer->merge_clusters (start, new_reph_pos + 1);

/* Move */

hb_glyph_info_t reph = info[start];

@@ -1432,6 +1551,7 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

if (indic_plan->mask_array[PREF] && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */

{

+ unsigned int pref_len = indic_plan->config->pref_len;

for (unsigned int i = base + 1; i < end; i++)

if ((info[i].mask & indic_plan->mask_array[PREF]) != 0)

{

@@ -1439,7 +1559,13 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

* of the <pref> feature. (Note that a font may shape a Ra consonant with

* the feature generally but block it in certain contexts.)

- if (i + 1 == end || (info[i + 1].mask & indic_plan->mask_array[PREF]) == 0)

+ /* Note: We just check that something got substituted. We don't check that

+ * the <pref> feature actually did it...

+ *

+ * If pref len is longer than one, then only reorder if it ligated. If

+ * pref len is one, only reorder if it didn't ligate with other things. */

+ if (_hb_glyph_info_substituted (&info[i]) &&

+ ((pref_len == 1) ^ _hb_glyph_info_ligated (&info[i])))

{

* 2. Try to find a target position the same way as for pre-base matra.

@@ -1460,7 +1586,7 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

!(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS)))

new_pos--;

- /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a

+ /* In Khmer coeng model, a H,Ra can go *after* matras. If it goes after a

* split matra, it should be reordered to *before* the left part of such matra. */

if (new_pos > start && info[new_pos - 1].indic_category() == OT_M)

{

@@ -1510,11 +1636,20 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,

if (hb_options ().uniscribe_bug_compatible)

{

- /* Uniscribe merges the entire cluster.

- * This means, half forms are submerged into the main consonants cluster.

- * This is unnecessary, and makes cursor positioning harder, but that's what

- * Uniscribe does. */

- buffer->merge_clusters (start, end);

+ switch ((hb_tag_t) plan->props.script)

+ {

+ case HB_SCRIPT_TAMIL:

+ case HB_SCRIPT_SINHALA:

+ break;

+ default:

+ /* Uniscribe merges the entire cluster... Except for Tamil & Sinhala.

+ * This means, half forms are submerged into the main consonants cluster.

+ * This is unnecessary, and makes cursor positioning harder, but that's what

+ * Uniscribe does. */

+ buffer->merge_clusters (start, end);

+ break;

+ }

}

@@ -1538,15 +1673,23 @@ final_reordering (const hb_ot_shape_plan_t *plan,

}

final_reordering_syllable (plan, buffer, last, count);

- /* Zero syllables now... */

- for (unsigned int i = 0; i < count; i++)

- info[i].syllable() = 0;

HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);

HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);

}

+static void

+clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,

+ hb_font_t *font HB_UNUSED,

+ hb_buffer_t *buffer)

+ hb_glyph_info_t *info = buffer->info;

+ unsigned int count = buffer->len;

+ for (unsigned int i = 0; i < count; i++)

+ info[i].syllable() = 0;

static hb_ot_shape_normalization_mode_t

normalization_preference_indic (const hb_segment_properties_t *props HB_UNUSED)

{

@@ -1626,7 +1769,7 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c,

if (hb_options ().uniscribe_bug_compatible ||

(c->font->get_glyph (ab, 0, &glyph) &&

- indic_plan->pstf.would_substitute (&glyph, 1, true, c->font->face)))

+ indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))

{

/* Ok, safe to use Uniscribe-style decomposition. */

*a = 0x0DD9;