Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

Issue 1580513002: Roll HarfBuzz to 1.1.3 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: build fix Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright © 2011,2012 Google, Inc. 2 * Copyright © 2011,2012 Google, Inc.
3 * 3 *
4 * This is part of HarfBuzz, a text shaping library. 4 * This is part of HarfBuzz, a text shaping library.
5 * 5 *
6 * Permission is hereby granted, without written agreement and without 6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this 7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the 8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in 9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software. 10 * all copies of this software.
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
169 hb_codepoint_t u = info.codepoint; 169 hb_codepoint_t u = info.codepoint;
170 unsigned int type = hb_indic_get_categories (u); 170 unsigned int type = hb_indic_get_categories (u);
171 indic_category_t cat = (indic_category_t) (type & 0x7Fu); 171 indic_category_t cat = (indic_category_t) (type & 0x7Fu);
172 indic_position_t pos = (indic_position_t) (type >> 8); 172 indic_position_t pos = (indic_position_t) (type >> 8);
173 173
174 174
175 /* 175 /*
176 * Re-assign category 176 * Re-assign category
177 */ 177 */
178 178
179
180 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
181 * treats a whole bunch of characters similarly.
182 * TESTS: For example, for U+0951:
183 * U+092E,U+0947,U+0952
184 * U+092E,U+0952,U+0947
185 * U+092E,U+0947,U+0951
186 * U+092E,U+0951,U+0947
187 * U+092E,U+0951,U+0952
188 * U+092E,U+0952,U+0951
189 */
190 if (unlikely (hb_in_ranges (u, 0x0951u, 0x0952u,
191 0x1CD0u, 0x1CD2u,
192 0x1CD4u, 0x1CE1u) ||
193 u == 0x1CF4u))
194 cat = OT_A;
195 /* The following act more like the Bindus. */ 179 /* The following act more like the Bindus. */
196 else if (unlikely (hb_in_range (u, 0x0953u, 0x0954u))) 180 if (unlikely (hb_in_range (u, 0x0953u, 0x0954u)))
197 cat = OT_SM; 181 cat = OT_SM;
198 /* The following act like consonants. */ 182 /* The following act like consonants. */
199 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u, 183 else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u,
200 0x1CF5u, 0x1CF6u))) 184 0x1CF5u, 0x1CF6u)))
201 cat = OT_C; 185 cat = OT_C;
202 /* TODO: The following should only be allowed after a Visarga. 186 /* TODO: The following should only be allowed after a Visarga.
203 * For now, just treat them like regular tone marks. */ 187 * For now, just treat them like regular tone marks. */
204 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u))) 188 else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u)))
205 cat = OT_A; 189 cat = OT_A;
206 /* TODO: The following should only be allowed after some of 190 /* TODO: The following should only be allowed after some of
207 * the nasalization marks, maybe only for U+1CE9..U+1CF1. 191 * the nasalization marks, maybe only for U+1CE9..U+1CF1.
208 * For now, just treat them like tone marks. */ 192 * For now, just treat them like tone marks. */
209 else if (unlikely (u == 0x1CEDu)) 193 else if (unlikely (u == 0x1CEDu))
210 cat = OT_A; 194 cat = OT_A;
211 /* The following take marks in standalone clusters, similar to Avagraha. */ 195 /* The following take marks in standalone clusters, similar to Avagraha. */
212 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u, 196 else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u,
213 0x1CE9u, 0x1CECu, 197 0x1CE9u, 0x1CECu,
214 0x1CEEu, 0x1CF1u))) 198 0x1CEEu, 0x1CF1u)))
215 { 199 {
216 cat = OT_Symbol; 200 cat = OT_Symbol;
217 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol); 201 ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol);
218 } 202 }
219 else if (unlikely (hb_in_range (u, 0x17CDu, 0x17D1u) || 203 else if (unlikely (u == 0x17DDu)) /* https://github.com/roozbehp/unicode-data/ issues/2 */
220 » » u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Var ious signs */
221 { 204 {
222 /* These are like Top Matras. */
223 cat = OT_M; 205 cat = OT_M;
224 pos = POS_ABOVE_C; 206 pos = POS_ABOVE_C;
225 } 207 }
226 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b e repositioned. */ 208 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to b e repositioned. */
227 else if (unlikely (u == 0x17D2u)) cat = OT_Coeng; /* Khmer coeng */
228 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) 209 else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u)))
229 cat = OT_PLACEHOLDER; 210 cat = OT_PLACEHOLDER;
230 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; 211 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
231 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */ 212 else if (unlikely (u == 0xA982u)) cat = OT_SM; /* Javanese repha. */
232 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */ 213 else if (unlikely (u == 0xA9BEu)) cat = OT_CM2; /* Javanese medial ya. */
233 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes e vocalic r. */ 214 else if (unlikely (u == 0xA9BDu)) { cat = OT_M; pos = POS_POST_C; } /* Javanes e vocalic r. */
234 215
235 216
236 /* 217 /*
237 * Re-assign position. 218 * Re-assign position.
(...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after
550 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) 531 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)
551 if (plan->props.script == indic_configs[i].script) { 532 if (plan->props.script == indic_configs[i].script) {
552 indic_plan->config = &indic_configs[i]; 533 indic_plan->config = &indic_configs[i];
553 break; 534 break;
554 } 535 }
555 536
556 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos en_script[0] & 0x000000FFu) != '2'); 537 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chos en_script[0] & 0x000000FFu) != '2');
557 indic_plan->virama_glyph = (hb_codepoint_t) -1; 538 indic_plan->virama_glyph = (hb_codepoint_t) -1;
558 539
559 /* Use zero-context would_substitute() matching for new-spec of the main 540 /* Use zero-context would_substitute() matching for new-spec of the main
560 * Indic scripts, and scripts with one spec only, but not for old-specs. */ 541 * Indic scripts, and scripts with one spec only, but not for old-specs.
561 bool zero_context = !indic_plan->is_old_spec; 542 * The new-spec for all dual-spec scripts says zero-context matching happens.
543 *
544 * However, testing with Malayalam shows that old and new spec both allow
545 * context. Testing with Bengali new-spec however shows that it doesn't.
546 * So, the heuristic here is the way it is. It should *only* be changed,
547 * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
548 */
549 bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIP T_MALAYALAM;
562 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); 550 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);
563 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); 551 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);
564 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); 552 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);
565 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); 553 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);
566 554
567 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) 555 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)
568 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? 556 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?
569 0 : plan->map.get_1_mask (indic_features[i].tag ); 557 0 : plan->map.get_1_mask (indic_features[i].tag );
570 558
571 return indic_plan; 559 return indic_plan;
(...skipping 769 matching lines...) Expand 10 before | Expand all | Expand 10 after
1341 base = i; 1329 base = i;
1342 while (base < end && is_halant_or_coeng (info[base])) 1330 while (base < end && is_halant_or_coeng (info[base]))
1343 base++; 1331 base++;
1344 info[base].indic_position() = POS_BASE_C; 1332 info[base].indic_position() = POS_BASE_C;
1345 1333
1346 try_pref = false; 1334 try_pref = false;
1347 } 1335 }
1348 break; 1336 break;
1349 } 1337 }
1350 } 1338 }
1339 /* For Malayalam, skip over unformed below- (but NOT post-) forms. */
1340 if (buffer->props.script == HB_SCRIPT_MALAYALAM)
1341 {
1342 for (unsigned int i = base + 1; i < end; i++)
1343 {
1344 while (i < end && is_joiner (info[i]))
1345 i++;
1346 if (i == end || !is_halant_or_coeng (info[i]))
1347 break;
1348 i++; /* Skip halant. */
1349 while (i < end && is_joiner (info[i]))
1350 i++;
1351 if (i < end && is_consonant (info[i]) && info[i].indic_position() == P OS_BELOW_C)
1352 {
1353 base = i;
1354 info[base].indic_position() = POS_BASE_C;
1355 }
1356 }
1357 }
1351 1358
1352 if (start < base && info[base].indic_position() > POS_BASE_C) 1359 if (start < base && info[base].indic_position() > POS_BASE_C)
1353 base--; 1360 base--;
1354 break; 1361 break;
1355 } 1362 }
1356 if (base == end && start < base && 1363 if (base == end && start < base &&
1357 is_one_of (info[base - 1], FLAG (OT_ZWJ))) 1364 is_one_of (info[base - 1], FLAG (OT_ZWJ)))
1358 base--; 1365 base--;
1359 if (base < end) 1366 if (base < end)
1360 while (start < base && 1367 while (start < base &&
(...skipping 438 matching lines...) Expand 10 before | Expand all | Expand 10 after
1799 (c->font->get_glyph (ab, 0, &glyph) && 1806 (c->font->get_glyph (ab, 0, &glyph) &&
1800 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) 1807 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))
1801 { 1808 {
1802 /* Ok, safe to use Uniscribe-style decomposition. */ 1809 /* Ok, safe to use Uniscribe-style decomposition. */
1803 *a = 0x0DD9u; 1810 *a = 0x0DD9u;
1804 *b = ab; 1811 *b = ab;
1805 return true; 1812 return true;
1806 } 1813 }
1807 } 1814 }
1808 1815
1809 return c->unicode->decompose (ab, a, b); 1816 return (bool) c->unicode->decompose (ab, a, b);
1810 } 1817 }
1811 1818
1812 static bool 1819 static bool
1813 compose_indic (const hb_ot_shape_normalize_context_t *c, 1820 compose_indic (const hb_ot_shape_normalize_context_t *c,
1814 hb_codepoint_t a, 1821 hb_codepoint_t a,
1815 hb_codepoint_t b, 1822 hb_codepoint_t b,
1816 hb_codepoint_t *ab) 1823 hb_codepoint_t *ab)
1817 { 1824 {
1818 /* Avoid recomposing split matras. */ 1825 /* Avoid recomposing split matras. */
1819 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) 1826 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
1820 return false; 1827 return false;
1821 1828
1822 /* Composition-exclusion exceptions that we want to recompose. */ 1829 /* Composition-exclusion exceptions that we want to recompose. */
1823 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } 1830 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; }
1824 1831
1825 return c->unicode->compose (a, b, ab); 1832 return (bool) c->unicode->compose (a, b, ab);
1826 } 1833 }
1827 1834
1828 1835
1829 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = 1836 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
1830 { 1837 {
1831 "indic", 1838 "indic",
1832 collect_features_indic, 1839 collect_features_indic,
1833 override_features_indic, 1840 override_features_indic,
1834 data_create_indic, 1841 data_create_indic,
1835 data_destroy_indic, 1842 data_destroy_indic,
1836 NULL, /* preprocess_text */ 1843 NULL, /* preprocess_text */
1837 NULL, /* postprocess_glyphs */ 1844 NULL, /* postprocess_glyphs */
1838 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, 1845 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
1839 decompose_indic, 1846 decompose_indic,
1840 compose_indic, 1847 compose_indic,
1841 setup_masks_indic, 1848 setup_masks_indic,
1842 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 1849 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
1843 false, /* fallback_position */ 1850 false, /* fallback_position */
1844 }; 1851 };
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698