third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc - Issue 1580513002: Roll HarfBuzz to 1.1.3

Side by Side Diff: third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

Issue 1580513002: Roll HarfBuzz to 1.1.3 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: build fix Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/harfbuzz-ng/src/hb-ot-shape-complex-hebrew.cc ('k') | third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic-private.hh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright © 2011,2012 Google, Inc.	2 * Copyright © 2011,2012 Google, Inc.

3 *	3 *

4 * This is part of HarfBuzz, a text shaping library.	4 * This is part of HarfBuzz, a text shaping library.

5 *	5 *

6 * Permission is hereby granted, without written agreement and without	6 * Permission is hereby granted, without written agreement and without

7 * license or royalty fees, to use, copy, modify, and distribute this	7 * license or royalty fees, to use, copy, modify, and distribute this

8 * software and its documentation for any purpose, provided that the	8 * software and its documentation for any purpose, provided that the

9 * above copyright notice and the following two paragraphs appear in	9 * above copyright notice and the following two paragraphs appear in

10 * all copies of this software.	10 * all copies of this software.

(...skipping 158 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
169 hb_codepoint_t u = info.codepoint;	169 hb_codepoint_t u = info.codepoint;

170 unsigned int type = hb_indic_get_categories (u);	170 unsigned int type = hb_indic_get_categories (u);

171 indic_category_t cat = (indic_category_t) (type & 0x7Fu);	171 indic_category_t cat = (indic_category_t) (type & 0x7Fu);

172 indic_position_t pos = (indic_position_t) (type >> 8);	172 indic_position_t pos = (indic_position_t) (type >> 8);

173	173

174	174

175 /*	175 /*

176 * Re-assign category	176 * Re-assign category

177 */	177 */

178	178

179

180 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe

181 * treats a whole bunch of characters similarly.

182 * TESTS: For example, for U+0951:

183 * U+092E,U+0947,U+0952

184 * U+092E,U+0952,U+0947

185 * U+092E,U+0947,U+0951

186 * U+092E,U+0951,U+0947

187 * U+092E,U+0951,U+0952

188 * U+092E,U+0952,U+0951

189 */

190 if (unlikely (hb_in_ranges (u, 0x0951u, 0x0952u,

191 0x1CD0u, 0x1CD2u,

192 0x1CD4u, 0x1CE1u) \|\|

193 u == 0x1CF4u))

194 cat = OT_A;

195 /* The following act more like the Bindus. */	179 /* The following act more like the Bindus. */

196 else if (unlikely (hb_in_range (u, 0x0953u, 0x0954u)))	180 if (unlikely (hb_in_range (u, 0x0953u, 0x0954u)))

197 cat = OT_SM;	181 cat = OT_SM;

198 /* The following act like consonants. */	182 /* The following act like consonants. */

199 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u,	183 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u,

200 0x1CF5u, 0x1CF6u)))	184 0x1CF5u, 0x1CF6u)))

201 cat = OT_C;	185 cat = OT_C;

202 /* TODO: The following should only be allowed after a Visarga.	186 /* TODO: The following should only be allowed after a Visarga.

203 * For now, just treat them like regular tone marks. */	187 * For now, just treat them like regular tone marks. */

204 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u)))	188 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u)))

205 cat = OT_A;	189 cat = OT_A;

206 /* TODO: The following should only be allowed after some of	190 /* TODO: The following should only be allowed after some of

207 * the nasalization marks, maybe only for U+1CE9..U+1CF1.	191 * the nasalization marks, maybe only for U+1CE9..U+1CF1.

208 * For now, just treat them like tone marks. */	192 * For now, just treat them like tone marks. */

209 else if (unlikely (u == 0x1CEDu))	193 else if (unlikely (u == 0x1CEDu))

210 cat = OT_A;	194 cat = OT_A;

211 /* The following take marks in standalone clusters, similar to Avagraha. */	195 /* The following take marks in standalone clusters, similar to Avagraha. */

212 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u,	196 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u,

213 0x1CE9u, 0x1CECu,	197 0x1CE9u, 0x1CECu,

214 0x1CEEu, 0x1CF1u)))	198 0x1CEEu, 0x1CF1u)))

215 {	199 {

216 cat = OT_Symbol;	200 cat = OT_Symbol;

217 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol);	201 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol);

218 }	202 }

219 else if (unlikely (hb_in_range (u, 0x17CDu, 0x17D1u) \|\|	203 else if (unlikely (u == 0x17DDu)) /* https://github.com/roozbehp/unicode-data/ issues/2 */

220 » » u == 0x17CBu \|\| u == 0x17D3u \|\| u == 0x17DDu)) /* Khmer Var ious signs */

221 {	204 {

222 /* These are like Top Matras. */

223 cat = OT_M;	205 cat = OT_M;

224 pos = POS_ABOVE_C;	206 pos = POS_ABOVE_C;

225 }	207 }

226 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b e repositioned. */	208 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b e repositioned. */

227 else if (unlikely (u == 0x17D2u)) cat = OT_Coeng; /* Khmer coeng */

228 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u)))	209 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u)))

229 cat = OT_PLACEHOLDER;	210 cat = OT_PLACEHOLDER;

230 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;	211 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;

231 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */	212 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */

232 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */	213 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */

233 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes e vocalic r. */	214 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes e vocalic r. */

234	215

235	216

236 /*	217 /*

237 * Re-assign position.	218 * Re-assign position.

(...skipping 312 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
550 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)	531 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)

551 if (plan->props.script == indic_configs[i].script) {	532 if (plan->props.script == indic_configs[i].script) {

552 indic_plan->config = &indic_configs[i];	533 indic_plan->config = &indic_configs[i];

553 break;	534 break;

554 }	535 }

555	536

556 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos en_script[0] & 0x000000FFu) != '2');	537 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos en_script[0] & 0x000000FFu) != '2');

557 indic_plan->virama_glyph = (hb_codepoint_t) -1;	538 indic_plan->virama_glyph = (hb_codepoint_t) -1;

558	539

559 /* Use zero-context would_substitute() matching for new-spec of the main	540 /* Use zero-context would_substitute() matching for new-spec of the main

560 * Indic scripts, and scripts with one spec only, but not for old-specs. */	541 * Indic scripts, and scripts with one spec only, but not for old-specs.

561 bool zero_context = !indic_plan->is_old_spec;	542 * The new-spec for all dual-spec scripts says zero-context matching happens.

	543 *

	544 * However, testing with Malayalam shows that old and new spec both allow

	545 * context. Testing with Bengali new-spec however shows that it doesn't.

	546 * So, the heuristic here is the way it is. It should only be changed,

	547 * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.

	548 */

	549 bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIP T_MALAYALAM;

562 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);	550 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);

563 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);	551 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);

564 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);	552 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);

565 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);	553 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);

566	554

567 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)	555 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)

568 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?	556 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?

569 0 : plan->map.get_1_mask (indic_features[i].tag );	557 0 : plan->map.get_1_mask (indic_features[i].tag );

570	558

571 return indic_plan;	559 return indic_plan;

(...skipping 769 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1341 base = i;	1329 base = i;

1342 while (base < end && is_halant_or_coeng (info[base]))	1330 while (base < end && is_halant_or_coeng (info[base]))

1343 base++;	1331 base++;

1344 info[base].indic_position() = POS_BASE_C;	1332 info[base].indic_position() = POS_BASE_C;

1345	1333

1346 try_pref = false;	1334 try_pref = false;

1347 }	1335 }

1348 break;	1336 break;

1349 }	1337 }

1350 }	1338 }

	1339 /* For Malayalam, skip over unformed below- (but NOT post-) forms. */

	1340 if (buffer->props.script == HB_SCRIPT_MALAYALAM)

	1341 {

	1342 for (unsigned int i = base + 1; i < end; i++)

	1343 {

	1344 while (i < end && is_joiner (info[i]))

	1345 i++;

	1346 if (i == end \|\| !is_halant_or_coeng (info[i]))

	1347 break;

	1348 i++; /* Skip halant. */

	1349 while (i < end && is_joiner (info[i]))

	1350 i++;

	1351 if (i < end && is_consonant (info[i]) && info[i].indic_position() == P OS_BELOW_C)

	1352 {

	1353 base = i;

	1354 info[base].indic_position() = POS_BASE_C;

	1355 }

	1356 }

	1357 }

1351	1358

1352 if (start < base && info[base].indic_position() > POS_BASE_C)	1359 if (start < base && info[base].indic_position() > POS_BASE_C)

1353 base--;	1360 base--;

1354 break;	1361 break;

1355 }	1362 }

1356 if (base == end && start < base &&	1363 if (base == end && start < base &&

1357 is_one_of (info[base - 1], FLAG (OT_ZWJ)))	1364 is_one_of (info[base - 1], FLAG (OT_ZWJ)))

1358 base--;	1365 base--;

1359 if (base < end)	1366 if (base < end)

1360 while (start < base &&	1367 while (start < base &&

(...skipping 438 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1799 (c->font->get_glyph (ab, 0, &glyph) &&	1806 (c->font->get_glyph (ab, 0, &glyph) &&

1800 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))	1807 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))

1801 {	1808 {

1802 /* Ok, safe to use Uniscribe-style decomposition. */	1809 /* Ok, safe to use Uniscribe-style decomposition. */

1803 *a = 0x0DD9u;	1810 *a = 0x0DD9u;

1804 *b = ab;	1811 *b = ab;

1805 return true;	1812 return true;

1806 }	1813 }

1807 }	1814 }

1808	1815

1809 return c->unicode->decompose (ab, a, b);	1816 return (bool) c->unicode->decompose (ab, a, b);

1810 }	1817 }

1811	1818

1812 static bool	1819 static bool

1813 compose_indic (const hb_ot_shape_normalize_context_t *c,	1820 compose_indic (const hb_ot_shape_normalize_context_t *c,

1814 hb_codepoint_t a,	1821 hb_codepoint_t a,

1815 hb_codepoint_t b,	1822 hb_codepoint_t b,

1816 hb_codepoint_t *ab)	1823 hb_codepoint_t *ab)

1817 {	1824 {

1818 /* Avoid recomposing split matras. */	1825 /* Avoid recomposing split matras. */

1819 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))	1826 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))

1820 return false;	1827 return false;

1821	1828

1822 /* Composition-exclusion exceptions that we want to recompose. */	1829 /* Composition-exclusion exceptions that we want to recompose. */

1823 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; }	1830 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; }

1824	1831

1825 return c->unicode->compose (a, b, ab);	1832 return (bool) c->unicode->compose (a, b, ab);

1826 }	1833 }

1827	1834

1828	1835

1829 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =	1836 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =

1830 {	1837 {

1831 "indic",	1838 "indic",

1832 collect_features_indic,	1839 collect_features_indic,

1833 override_features_indic,	1840 override_features_indic,

1834 data_create_indic,	1841 data_create_indic,

1835 data_destroy_indic,	1842 data_destroy_indic,

1836 NULL, /* preprocess_text */	1843 NULL, /* preprocess_text */

1837 NULL, /* postprocess_glyphs */	1844 NULL, /* postprocess_glyphs */

1838 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,	1845 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,

1839 decompose_indic,	1846 decompose_indic,

1840 compose_indic,	1847 compose_indic,

1841 setup_masks_indic,	1848 setup_masks_indic,

1842 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,	1849 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,

1843 false, /* fallback_position */	1850 false, /* fallback_position */

1844 };	1851 };

OLD	NEW