| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright © 2011,2012 Google, Inc. | 2 * Copyright © 2011,2012 Google, Inc. |
| 3 * | 3 * |
| 4 * This is part of HarfBuzz, a text shaping library. | 4 * This is part of HarfBuzz, a text shaping library. |
| 5 * | 5 * |
| 6 * Permission is hereby granted, without written agreement and without | 6 * Permission is hereby granted, without written agreement and without |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | 7 * license or royalty fees, to use, copy, modify, and distribute this |
| 8 * software and its documentation for any purpose, provided that the | 8 * software and its documentation for any purpose, provided that the |
| 9 * above copyright notice and the following two paragraphs appear in | 9 * above copyright notice and the following two paragraphs appear in |
| 10 * all copies of this software. | 10 * all copies of this software. |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 169 hb_codepoint_t u = info.codepoint; | 169 hb_codepoint_t u = info.codepoint; |
| 170 unsigned int type = hb_indic_get_categories (u); | 170 unsigned int type = hb_indic_get_categories (u); |
| 171 indic_category_t cat = (indic_category_t) (type & 0x7Fu); | 171 indic_category_t cat = (indic_category_t) (type & 0x7Fu); |
| 172 indic_position_t pos = (indic_position_t) (type >> 8); | 172 indic_position_t pos = (indic_position_t) (type >> 8); |
| 173 | 173 |
| 174 | 174 |
| 175 /* | 175 /* |
| 176 * Re-assign category | 176 * Re-assign category |
| 177 */ | 177 */ |
| 178 | 178 |
| 179 | |
| 180 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe | |
| 181 * treats a whole bunch of characters similarly. | |
| 182 * TESTS: For example, for U+0951: | |
| 183 * U+092E,U+0947,U+0952 | |
| 184 * U+092E,U+0952,U+0947 | |
| 185 * U+092E,U+0947,U+0951 | |
| 186 * U+092E,U+0951,U+0947 | |
| 187 * U+092E,U+0951,U+0952 | |
| 188 * U+092E,U+0952,U+0951 | |
| 189 */ | |
| 190 if (unlikely (hb_in_ranges (u, 0x0951u, 0x0952u, | |
| 191 0x1CD0u, 0x1CD2u, | |
| 192 0x1CD4u, 0x1CE1u) || | |
| 193 u == 0x1CF4u)) | |
| 194 cat = OT_A; | |
| 195 /* The following act more like the Bindus. */ | 179 /* The following act more like the Bindus. */ |
| 196 else if (unlikely (hb_in_range (u, 0x0953u, 0x0954u))) | 180 if (unlikely (hb_in_range (u, 0x0953u, 0x0954u))) |
| 197 cat = OT_SM; | 181 cat = OT_SM; |
| 198 /* The following act like consonants. */ | 182 /* The following act like consonants. */ |
| 199 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u, | 183 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u, |
| 200 0x1CF5u, 0x1CF6u))) | 184 0x1CF5u, 0x1CF6u))) |
| 201 cat = OT_C; | 185 cat = OT_C; |
| 202 /* TODO: The following should only be allowed after a Visarga. | 186 /* TODO: The following should only be allowed after a Visarga. |
| 203 * For now, just treat them like regular tone marks. */ | 187 * For now, just treat them like regular tone marks. */ |
| 204 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u))) | 188 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u))) |
| 205 cat = OT_A; | 189 cat = OT_A; |
| 206 /* TODO: The following should only be allowed after some of | 190 /* TODO: The following should only be allowed after some of |
| 207 * the nasalization marks, maybe only for U+1CE9..U+1CF1. | 191 * the nasalization marks, maybe only for U+1CE9..U+1CF1. |
| 208 * For now, just treat them like tone marks. */ | 192 * For now, just treat them like tone marks. */ |
| 209 else if (unlikely (u == 0x1CEDu)) | 193 else if (unlikely (u == 0x1CEDu)) |
| 210 cat = OT_A; | 194 cat = OT_A; |
| 211 /* The following take marks in standalone clusters, similar to Avagraha. */ | 195 /* The following take marks in standalone clusters, similar to Avagraha. */ |
| 212 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u, | 196 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u, |
| 213 0x1CE9u, 0x1CECu, | 197 0x1CE9u, 0x1CECu, |
| 214 0x1CEEu, 0x1CF1u))) | 198 0x1CEEu, 0x1CF1u))) |
| 215 { | 199 { |
| 216 cat = OT_Symbol; | 200 cat = OT_Symbol; |
| 217 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol); | 201 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol); |
| 218 } | 202 } |
| 219 else if (unlikely (hb_in_range (u, 0x17CDu, 0x17D1u) || | 203 else if (unlikely (u == 0x17DDu)) /* https://github.com/roozbehp/unicode-data/
issues/2 */ |
| 220 » » u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Var
ious signs */ | |
| 221 { | 204 { |
| 222 /* These are like Top Matras. */ | |
| 223 cat = OT_M; | 205 cat = OT_M; |
| 224 pos = POS_ABOVE_C; | 206 pos = POS_ABOVE_C; |
| 225 } | 207 } |
| 226 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b
e repositioned. */ | 208 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b
e repositioned. */ |
| 227 else if (unlikely (u == 0x17D2u)) cat = OT_Coeng; /* Khmer coeng */ | |
| 228 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) | 209 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) |
| 229 cat = OT_PLACEHOLDER; | 210 cat = OT_PLACEHOLDER; |
| 230 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; | 211 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; |
| 231 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */ | 212 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */ |
| 232 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */ | 213 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */ |
| 233 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes
e vocalic r. */ | 214 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes
e vocalic r. */ |
| 234 | 215 |
| 235 | 216 |
| 236 /* | 217 /* |
| 237 * Re-assign position. | 218 * Re-assign position. |
| (...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 550 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) | 531 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) |
| 551 if (plan->props.script == indic_configs[i].script) { | 532 if (plan->props.script == indic_configs[i].script) { |
| 552 indic_plan->config = &indic_configs[i]; | 533 indic_plan->config = &indic_configs[i]; |
| 553 break; | 534 break; |
| 554 } | 535 } |
| 555 | 536 |
| 556 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos
en_script[0] & 0x000000FFu) != '2'); | 537 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos
en_script[0] & 0x000000FFu) != '2'); |
| 557 indic_plan->virama_glyph = (hb_codepoint_t) -1; | 538 indic_plan->virama_glyph = (hb_codepoint_t) -1; |
| 558 | 539 |
| 559 /* Use zero-context would_substitute() matching for new-spec of the main | 540 /* Use zero-context would_substitute() matching for new-spec of the main |
| 560 * Indic scripts, and scripts with one spec only, but not for old-specs. */ | 541 * Indic scripts, and scripts with one spec only, but not for old-specs. |
| 561 bool zero_context = !indic_plan->is_old_spec; | 542 * The new-spec for all dual-spec scripts says zero-context matching happens. |
| 543 * |
| 544 * However, testing with Malayalam shows that old and new spec both allow |
| 545 * context. Testing with Bengali new-spec however shows that it doesn't. |
| 546 * So, the heuristic here is the way it is. It should *only* be changed, |
| 547 * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
| 548 */ |
| 549 bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIP
T_MALAYALAM; |
| 562 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); | 550 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); |
| 563 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); | 551 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); |
| 564 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); | 552 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); |
| 565 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); | 553 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); |
| 566 | 554 |
| 567 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) | 555 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) |
| 568 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? | 556 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? |
| 569 0 : plan->map.get_1_mask (indic_features[i].tag
); | 557 0 : plan->map.get_1_mask (indic_features[i].tag
); |
| 570 | 558 |
| 571 return indic_plan; | 559 return indic_plan; |
| (...skipping 769 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1341 base = i; | 1329 base = i; |
| 1342 while (base < end && is_halant_or_coeng (info[base])) | 1330 while (base < end && is_halant_or_coeng (info[base])) |
| 1343 base++; | 1331 base++; |
| 1344 info[base].indic_position() = POS_BASE_C; | 1332 info[base].indic_position() = POS_BASE_C; |
| 1345 | 1333 |
| 1346 try_pref = false; | 1334 try_pref = false; |
| 1347 } | 1335 } |
| 1348 break; | 1336 break; |
| 1349 } | 1337 } |
| 1350 } | 1338 } |
| 1339 /* For Malayalam, skip over unformed below- (but NOT post-) forms. */ |
| 1340 if (buffer->props.script == HB_SCRIPT_MALAYALAM) |
| 1341 { |
| 1342 for (unsigned int i = base + 1; i < end; i++) |
| 1343 { |
| 1344 while (i < end && is_joiner (info[i])) |
| 1345 i++; |
| 1346 if (i == end || !is_halant_or_coeng (info[i])) |
| 1347 break; |
| 1348 i++; /* Skip halant. */ |
| 1349 while (i < end && is_joiner (info[i])) |
| 1350 i++; |
| 1351 if (i < end && is_consonant (info[i]) && info[i].indic_position() == P
OS_BELOW_C) |
| 1352 { |
| 1353 base = i; |
| 1354 info[base].indic_position() = POS_BASE_C; |
| 1355 } |
| 1356 } |
| 1357 } |
| 1351 | 1358 |
| 1352 if (start < base && info[base].indic_position() > POS_BASE_C) | 1359 if (start < base && info[base].indic_position() > POS_BASE_C) |
| 1353 base--; | 1360 base--; |
| 1354 break; | 1361 break; |
| 1355 } | 1362 } |
| 1356 if (base == end && start < base && | 1363 if (base == end && start < base && |
| 1357 is_one_of (info[base - 1], FLAG (OT_ZWJ))) | 1364 is_one_of (info[base - 1], FLAG (OT_ZWJ))) |
| 1358 base--; | 1365 base--; |
| 1359 if (base < end) | 1366 if (base < end) |
| 1360 while (start < base && | 1367 while (start < base && |
| (...skipping 438 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1799 (c->font->get_glyph (ab, 0, &glyph) && | 1806 (c->font->get_glyph (ab, 0, &glyph) && |
| 1800 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) | 1807 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) |
| 1801 { | 1808 { |
| 1802 /* Ok, safe to use Uniscribe-style decomposition. */ | 1809 /* Ok, safe to use Uniscribe-style decomposition. */ |
| 1803 *a = 0x0DD9u; | 1810 *a = 0x0DD9u; |
| 1804 *b = ab; | 1811 *b = ab; |
| 1805 return true; | 1812 return true; |
| 1806 } | 1813 } |
| 1807 } | 1814 } |
| 1808 | 1815 |
| 1809 return c->unicode->decompose (ab, a, b); | 1816 return (bool) c->unicode->decompose (ab, a, b); |
| 1810 } | 1817 } |
| 1811 | 1818 |
| 1812 static bool | 1819 static bool |
| 1813 compose_indic (const hb_ot_shape_normalize_context_t *c, | 1820 compose_indic (const hb_ot_shape_normalize_context_t *c, |
| 1814 hb_codepoint_t a, | 1821 hb_codepoint_t a, |
| 1815 hb_codepoint_t b, | 1822 hb_codepoint_t b, |
| 1816 hb_codepoint_t *ab) | 1823 hb_codepoint_t *ab) |
| 1817 { | 1824 { |
| 1818 /* Avoid recomposing split matras. */ | 1825 /* Avoid recomposing split matras. */ |
| 1819 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) | 1826 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) |
| 1820 return false; | 1827 return false; |
| 1821 | 1828 |
| 1822 /* Composition-exclusion exceptions that we want to recompose. */ | 1829 /* Composition-exclusion exceptions that we want to recompose. */ |
| 1823 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } | 1830 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } |
| 1824 | 1831 |
| 1825 return c->unicode->compose (a, b, ab); | 1832 return (bool) c->unicode->compose (a, b, ab); |
| 1826 } | 1833 } |
| 1827 | 1834 |
| 1828 | 1835 |
| 1829 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = | 1836 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = |
| 1830 { | 1837 { |
| 1831 "indic", | 1838 "indic", |
| 1832 collect_features_indic, | 1839 collect_features_indic, |
| 1833 override_features_indic, | 1840 override_features_indic, |
| 1834 data_create_indic, | 1841 data_create_indic, |
| 1835 data_destroy_indic, | 1842 data_destroy_indic, |
| 1836 NULL, /* preprocess_text */ | 1843 NULL, /* preprocess_text */ |
| 1837 NULL, /* postprocess_glyphs */ | 1844 NULL, /* postprocess_glyphs */ |
| 1838 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, | 1845 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
| 1839 decompose_indic, | 1846 decompose_indic, |
| 1840 compose_indic, | 1847 compose_indic, |
| 1841 setup_masks_indic, | 1848 setup_masks_indic, |
| 1842 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, | 1849 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
| 1843 false, /* fallback_position */ | 1850 false, /* fallback_position */ |
| 1844 }; | 1851 }; |
| OLD | NEW |