| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright © 2012 Google, Inc. | 2 * Copyright © 2012 Google, Inc. |
| 3 * | 3 * |
| 4 * This is part of HarfBuzz, a text shaping library. | 4 * This is part of HarfBuzz, a text shaping library. |
| 5 * | 5 * |
| 6 * Permission is hereby granted, without written agreement and without | 6 * Permission is hereby granted, without written agreement and without |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | 7 * license or royalty fees, to use, copy, modify, and distribute this |
| 8 * software and its documentation for any purpose, provided that the | 8 * software and its documentation for any purpose, provided that the |
| 9 * above copyright notice and the following two paragraphs appear in | 9 * above copyright notice and the following two paragraphs appear in |
| 10 * all copies of this software. | 10 * all copies of this software. |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH | 27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH |
| 28 #define HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH | 28 #define HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH |
| 29 | 29 |
| 30 #include "hb-private.hh" | 30 #include "hb-private.hh" |
| 31 | 31 |
| 32 | 32 |
| 33 #include "hb-ot-shape-complex-private.hh" | 33 #include "hb-ot-shape-complex-private.hh" |
| 34 #include "hb-ot-shape-private.hh" /* XXX Remove */ | 34 #include "hb-ot-shape-private.hh" /* XXX Remove */ |
| 35 | 35 |
| 36 | 36 |
| 37 /* buffer var allocations */ | |
| 38 #define indic_category() complex_var_u8_0() /* indic_category_t */ | |
| 39 #define indic_position() complex_var_u8_1() /* indic_matra_category_t */ | |
| 40 | |
| 41 | |
| 42 #define INDIC_TABLE_ELEMENT_TYPE uint16_t | 37 #define INDIC_TABLE_ELEMENT_TYPE uint16_t |
| 43 | 38 |
| 44 /* Cateories used in the OpenType spec: | 39 /* Cateories used in the OpenType spec: |
| 45 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx | 40 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx |
| 46 */ | 41 */ |
| 47 /* Note: This enum is duplicated in the -machine.rl source file. | 42 /* Note: This enum is duplicated in the -machine.rl source file. |
| 48 * Not sure how to avoid duplication. */ | 43 * Not sure how to avoid duplication. */ |
| 49 enum indic_category_t { | 44 enum indic_category_t { |
| 50 OT_X = 0, | 45 OT_X = 0, |
| 51 OT_C, | 46 OT_C, |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 95 }; | 90 }; |
| 96 | 91 |
| 97 /* Categories used in IndicSyllabicCategory.txt from UCD. */ | 92 /* Categories used in IndicSyllabicCategory.txt from UCD. */ |
| 98 enum indic_syllabic_category_t { | 93 enum indic_syllabic_category_t { |
| 99 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, | 94 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, |
| 100 | 95 |
| 101 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_X, | 96 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_X, |
| 102 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, | 97 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, |
| 103 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, | 98 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, |
| 104 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, | 99 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, |
| 105 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_C, | 100 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_CM, |
| 106 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, | 101 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, |
| 107 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, | 102 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, |
| 108 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP, | 103 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP, |
| 109 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C, | 104 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C, |
| 110 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha, | 105 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha, |
| 111 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, | 106 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, |
| 112 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, | 107 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, |
| 113 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, | 108 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, |
| 114 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, | 109 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, |
| 115 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, | 110 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, |
| (...skipping 27 matching lines...) Expand all Loading... |
| 143 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M | 138 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M |
| 144 }; | 139 }; |
| 145 | 140 |
| 146 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th
e comma operation | 141 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th
e comma operation |
| 147 * because gcc fails to optimize the latter and fills the table in at runtime. *
/ | 142 * because gcc fails to optimize the latter and fills the table in at runtime. *
/ |
| 148 #define INDIC_COMBINE_CATEGORIES(S,M) \ | 143 #define INDIC_COMBINE_CATEGORIES(S,M) \ |
| 149 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == IN
DIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) +
\ | 144 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == IN
DIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) +
\ |
| 150 ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ | 145 ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ |
| 151 ((M << 8) | S)) | 146 ((M << 8) | S)) |
| 152 | 147 |
| 153 | 148 HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE |
| 154 #include "hb-ot-shape-complex-indic-table.hh" | 149 hb_indic_get_categories (hb_codepoint_t u); |
| 155 | |
| 156 | |
| 157 #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7F) == (Base)) | |
| 158 | |
| 159 #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900)) | |
| 160 #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980)) | |
| 161 #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00)) | |
| 162 #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80)) | |
| 163 #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00)) | |
| 164 #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80)) | |
| 165 #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00)) | |
| 166 #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80)) | |
| 167 #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00)) | |
| 168 #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80)) | |
| 169 #define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780)) | |
| 170 | |
| 171 | |
| 172 #define MATRA_POS_LEFT(u)» POS_PRE_M | |
| 173 #define MATRA_POS_RIGHT(u)» ( \ | |
| 174 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
| 175 » » » » IS_BENG(u) ? POS_AFTER_POST : \ | |
| 176 » » » » IS_GURU(u) ? POS_AFTER_POST : \ | |
| 177 » » » » IS_GUJR(u) ? POS_AFTER_POST : \ | |
| 178 » » » » IS_ORYA(u) ? POS_AFTER_POST : \ | |
| 179 » » » » IS_TAML(u) ? POS_AFTER_POST : \ | |
| 180 » » » » IS_TELU(u) ? (u <= 0x0C42 ? POS_BEFORE_SUB : P
OS_AFTER_SUB) : \ | |
| 181 » » » » IS_KNDA(u) ? (u < 0x0CC3 || u > 0xCD6 ? POS_BE
FORE_SUB : POS_AFTER_SUB) : \ | |
| 182 » » » » IS_MLYM(u) ? POS_AFTER_POST : \ | |
| 183 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
| 184 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
| 185 » » » » /*default*/ POS_AFTER_SUB \ | |
| 186 » » » » ) | |
| 187 #define MATRA_POS_TOP(u)» ( /* BENG and MLYM don't have top matras. */ \ | |
| 188 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
| 189 » » » » IS_GURU(u) ? POS_AFTER_POST : /* Deviate from
spec */ \ | |
| 190 » » » » IS_GUJR(u) ? POS_AFTER_SUB : \ | |
| 191 » » » » IS_ORYA(u) ? POS_AFTER_MAIN : \ | |
| 192 » » » » IS_TAML(u) ? POS_AFTER_SUB : \ | |
| 193 » » » » IS_TELU(u) ? POS_BEFORE_SUB : \ | |
| 194 » » » » IS_KNDA(u) ? POS_BEFORE_SUB : \ | |
| 195 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
| 196 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
| 197 » » » » /*default*/ POS_AFTER_SUB \ | |
| 198 » » » » ) | |
| 199 #define MATRA_POS_BOTTOM(u)» ( \ | |
| 200 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
| 201 » » » » IS_BENG(u) ? POS_AFTER_SUB : \ | |
| 202 » » » » IS_GURU(u) ? POS_AFTER_POST : \ | |
| 203 » » » » IS_GUJR(u) ? POS_AFTER_POST : \ | |
| 204 » » » » IS_ORYA(u) ? POS_AFTER_SUB : \ | |
| 205 » » » » IS_TAML(u) ? POS_AFTER_POST : \ | |
| 206 » » » » IS_TELU(u) ? POS_BEFORE_SUB : \ | |
| 207 » » » » IS_KNDA(u) ? POS_BEFORE_SUB : \ | |
| 208 » » » » IS_MLYM(u) ? POS_AFTER_POST : \ | |
| 209 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
| 210 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
| 211 » » » » /*default*/ POS_AFTER_SUB \ | |
| 212 » » » » ) | |
| 213 | |
| 214 | |
| 215 static inline indic_position_t | |
| 216 matra_position (hb_codepoint_t u, indic_position_t side) | |
| 217 { | |
| 218 switch ((int) side) | |
| 219 { | |
| 220 case POS_PRE_C:» return MATRA_POS_LEFT (u); | |
| 221 case POS_POST_C:» return MATRA_POS_RIGHT (u); | |
| 222 case POS_ABOVE_C:» return MATRA_POS_TOP (u); | |
| 223 case POS_BELOW_C:» return MATRA_POS_BOTTOM (u); | |
| 224 }; | |
| 225 return side; | |
| 226 } | |
| 227 | |
| 228 | |
| 229 | |
| 230 /* XXX | |
| 231 * This is a hack for now. We should move this data into the main Indic table. | |
| 232 * Or completely remove it and just check in the tables. | |
| 233 */ | |
| 234 static const hb_codepoint_t ra_chars[] = { | |
| 235 0x0930, /* Devanagari */ | |
| 236 0x09B0, /* Bengali */ | |
| 237 0x09F0, /* Bengali */ | |
| 238 0x0A30, /* Gurmukhi */» /* No Reph */ | |
| 239 0x0AB0, /* Gujarati */ | |
| 240 0x0B30, /* Oriya */ | |
| 241 0x0BB0, /* Tamil */» » /* No Reph */ | |
| 242 0x0C30, /* Telugu */» » /* Reph formed only with ZWJ */ | |
| 243 0x0CB0, /* Kannada */ | |
| 244 0x0D30, /* Malayalam */» /* No Reph, Logical Repha */ | |
| 245 | |
| 246 0x0DBB, /* Sinhala */»» /* Reph formed only with ZWJ */ | |
| 247 | |
| 248 0x179A, /* Khmer */» » /* No Reph, Visual Repha */ | |
| 249 }; | |
| 250 | |
| 251 static inline indic_position_t | |
| 252 consonant_position (hb_codepoint_t u) | |
| 253 { | |
| 254 if ((u & ~0x007F) == 0x1780) | |
| 255 return POS_BELOW_C; /* In Khmer coeng model, post and below forms should not
be reordered. */ | |
| 256 return POS_BASE_C; /* Will recategorize later based on font lookups. */ | |
| 257 } | |
| 258 | |
| 259 static inline bool | |
| 260 is_ra (hb_codepoint_t u) | |
| 261 { | |
| 262 for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) | |
| 263 if (u == ra_chars[i]) | |
| 264 return true; | |
| 265 return false; | |
| 266 } | |
| 267 | |
| 268 | |
| 269 static inline bool | |
| 270 is_one_of (const hb_glyph_info_t &info, unsigned int flags) | |
| 271 { | |
| 272 /* If it ligated, all bets are off. */ | |
| 273 if (is_a_ligature (info)) return false; | |
| 274 return !!(FLAG (info.indic_category()) & flags); | |
| 275 } | |
| 276 | |
| 277 #define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) | |
| 278 static inline bool | |
| 279 is_joiner (const hb_glyph_info_t &info) | |
| 280 { | |
| 281 return is_one_of (info, JOINER_FLAGS); | |
| 282 } | |
| 283 | |
| 284 /* Note: | |
| 285 * | |
| 286 * We treat Vowels and placeholders as if they were consonants. This is safe be
cause Vowels | |
| 287 * cannot happen in a consonant syllable. The plus side however is, we can call
the | |
| 288 * consonant syllable logic from the vowel syllable function and get it all righ
t! */ | |
| 289 #define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CM) | FLAG (OT_Ra) | FLAG (OT_V)
| FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)) | |
| 290 static inline bool | |
| 291 is_consonant (const hb_glyph_info_t &info) | |
| 292 { | |
| 293 return is_one_of (info, CONSONANT_FLAGS); | |
| 294 } | |
| 295 | |
| 296 #define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng)) | |
| 297 static inline bool | |
| 298 is_halant_or_coeng (const hb_glyph_info_t &info) | |
| 299 { | |
| 300 return is_one_of (info, HALANT_OR_COENG_FLAGS); | |
| 301 } | |
| 302 | |
| 303 static inline void | |
| 304 set_indic_properties (hb_glyph_info_t &info) | |
| 305 { | |
| 306 hb_codepoint_t u = info.codepoint; | |
| 307 unsigned int type = get_indic_categories (u); | |
| 308 indic_category_t cat = (indic_category_t) (type & 0x7F); | |
| 309 indic_position_t pos = (indic_position_t) (type >> 8); | |
| 310 | |
| 311 | |
| 312 /* | |
| 313 * Re-assign category | |
| 314 */ | |
| 315 | |
| 316 | |
| 317 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe | |
| 318 * treats U+0951..U+0952 all as OT_VD. | |
| 319 * TESTS: | |
| 320 * U+092E,U+0947,U+0952 | |
| 321 * U+092E,U+0952,U+0947 | |
| 322 * U+092E,U+0947,U+0951 | |
| 323 * U+092E,U+0951,U+0947 | |
| 324 * */ | |
| 325 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954))) | |
| 326 cat = OT_VD; | |
| 327 | |
| 328 if (unlikely (u == 0x17D1)) | |
| 329 cat = OT_X; | |
| 330 if (cat == OT_X && | |
| 331 unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Vario
us signs */ | |
| 332 { | |
| 333 /* These are like Top Matras. */ | |
| 334 cat = OT_M; | |
| 335 pos = POS_ABOVE_C; | |
| 336 } | |
| 337 if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */ | |
| 338 cat = OT_N; | |
| 339 | |
| 340 if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */ | |
| 341 else if (unlikely (u == 0x200C)) cat = OT_ZWNJ; | |
| 342 else if (unlikely (u == 0x200D)) cat = OT_ZWJ; | |
| 343 else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE; | |
| 344 else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. More like co
nsonant medial. like 0A75. */ | |
| 345 | |
| 346 if (cat == OT_Repha) { | |
| 347 /* There are two kinds of characters marked as Repha: | |
| 348 * - The ones that are GenCat=Mn are already positioned visually, ie. after
base. (eg. Khmer) | |
| 349 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syll
able. (eg. Malayalam) | |
| 350 * | |
| 351 * We recategorize the first kind to look like a Nukta and attached to the b
ase directly. | |
| 352 */ | |
| 353 if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGO
RY_NON_SPACING_MARK) | |
| 354 cat = OT_N; | |
| 355 } | |
| 356 | |
| 357 | |
| 358 | |
| 359 /* | |
| 360 * Re-assign position. | |
| 361 */ | |
| 362 | |
| 363 if ((FLAG (cat) & CONSONANT_FLAGS)) | |
| 364 { | |
| 365 pos = consonant_position (u); | |
| 366 if (is_ra (u)) | |
| 367 cat = OT_Ra; | |
| 368 } | |
| 369 else if (cat == OT_M) | |
| 370 { | |
| 371 pos = matra_position (u, pos); | |
| 372 } | |
| 373 else if (cat == OT_SM || cat == OT_VD) | |
| 374 { | |
| 375 pos = POS_SMVD; | |
| 376 } | |
| 377 | |
| 378 if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub
in the spec. */ | |
| 379 | |
| 380 | |
| 381 | |
| 382 info.indic_category() = cat; | |
| 383 info.indic_position() = pos; | |
| 384 } | |
| 385 | |
| 386 | |
| 387 | 150 |
| 388 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */ | 151 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */ |
| OLD | NEW |