OLD | NEW |
1 /* | 1 /* |
2 * Copyright © 2012 Google, Inc. | 2 * Copyright © 2012 Google, Inc. |
3 * | 3 * |
4 * This is part of HarfBuzz, a text shaping library. | 4 * This is part of HarfBuzz, a text shaping library. |
5 * | 5 * |
6 * Permission is hereby granted, without written agreement and without | 6 * Permission is hereby granted, without written agreement and without |
7 * license or royalty fees, to use, copy, modify, and distribute this | 7 * license or royalty fees, to use, copy, modify, and distribute this |
8 * software and its documentation for any purpose, provided that the | 8 * software and its documentation for any purpose, provided that the |
9 * above copyright notice and the following two paragraphs appear in | 9 * above copyright notice and the following two paragraphs appear in |
10 * all copies of this software. | 10 * all copies of this software. |
(...skipping 16 matching lines...) Expand all Loading... |
27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH | 27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH |
28 #define HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH | 28 #define HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH |
29 | 29 |
30 #include "hb-private.hh" | 30 #include "hb-private.hh" |
31 | 31 |
32 | 32 |
33 #include "hb-ot-shape-complex-private.hh" | 33 #include "hb-ot-shape-complex-private.hh" |
34 #include "hb-ot-shape-private.hh" /* XXX Remove */ | 34 #include "hb-ot-shape-private.hh" /* XXX Remove */ |
35 | 35 |
36 | 36 |
37 /* buffer var allocations */ | |
38 #define indic_category() complex_var_u8_0() /* indic_category_t */ | |
39 #define indic_position() complex_var_u8_1() /* indic_matra_category_t */ | |
40 | |
41 | |
42 #define INDIC_TABLE_ELEMENT_TYPE uint16_t | 37 #define INDIC_TABLE_ELEMENT_TYPE uint16_t |
43 | 38 |
44 /* Cateories used in the OpenType spec: | 39 /* Cateories used in the OpenType spec: |
45 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx | 40 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx |
46 */ | 41 */ |
47 /* Note: This enum is duplicated in the -machine.rl source file. | 42 /* Note: This enum is duplicated in the -machine.rl source file. |
48 * Not sure how to avoid duplication. */ | 43 * Not sure how to avoid duplication. */ |
49 enum indic_category_t { | 44 enum indic_category_t { |
50 OT_X = 0, | 45 OT_X = 0, |
51 OT_C, | 46 OT_C, |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
95 }; | 90 }; |
96 | 91 |
97 /* Categories used in IndicSyllabicCategory.txt from UCD. */ | 92 /* Categories used in IndicSyllabicCategory.txt from UCD. */ |
98 enum indic_syllabic_category_t { | 93 enum indic_syllabic_category_t { |
99 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, | 94 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, |
100 | 95 |
101 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_X, | 96 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_X, |
102 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, | 97 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, |
103 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, | 98 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, |
104 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, | 99 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, |
105 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_C, | 100 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_CM, |
106 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, | 101 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, |
107 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, | 102 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, |
108 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP, | 103 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP, |
109 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C, | 104 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C, |
110 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha, | 105 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha, |
111 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, | 106 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, |
112 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, | 107 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, |
113 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, | 108 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, |
114 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, | 109 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, |
115 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, | 110 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, |
(...skipping 27 matching lines...) Expand all Loading... |
143 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M | 138 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M |
144 }; | 139 }; |
145 | 140 |
146 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th
e comma operation | 141 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th
e comma operation |
147 * because gcc fails to optimize the latter and fills the table in at runtime. *
/ | 142 * because gcc fails to optimize the latter and fills the table in at runtime. *
/ |
148 #define INDIC_COMBINE_CATEGORIES(S,M) \ | 143 #define INDIC_COMBINE_CATEGORIES(S,M) \ |
149 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == IN
DIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) +
\ | 144 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == IN
DIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) +
\ |
150 ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ | 145 ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ |
151 ((M << 8) | S)) | 146 ((M << 8) | S)) |
152 | 147 |
153 | 148 HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE |
154 #include "hb-ot-shape-complex-indic-table.hh" | 149 hb_indic_get_categories (hb_codepoint_t u); |
155 | |
156 | |
157 #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7F) == (Base)) | |
158 | |
159 #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900)) | |
160 #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980)) | |
161 #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00)) | |
162 #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80)) | |
163 #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00)) | |
164 #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80)) | |
165 #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00)) | |
166 #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80)) | |
167 #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00)) | |
168 #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80)) | |
169 #define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780)) | |
170 | |
171 | |
172 #define MATRA_POS_LEFT(u)» POS_PRE_M | |
173 #define MATRA_POS_RIGHT(u)» ( \ | |
174 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
175 » » » » IS_BENG(u) ? POS_AFTER_POST : \ | |
176 » » » » IS_GURU(u) ? POS_AFTER_POST : \ | |
177 » » » » IS_GUJR(u) ? POS_AFTER_POST : \ | |
178 » » » » IS_ORYA(u) ? POS_AFTER_POST : \ | |
179 » » » » IS_TAML(u) ? POS_AFTER_POST : \ | |
180 » » » » IS_TELU(u) ? (u <= 0x0C42 ? POS_BEFORE_SUB : P
OS_AFTER_SUB) : \ | |
181 » » » » IS_KNDA(u) ? (u < 0x0CC3 || u > 0xCD6 ? POS_BE
FORE_SUB : POS_AFTER_SUB) : \ | |
182 » » » » IS_MLYM(u) ? POS_AFTER_POST : \ | |
183 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
184 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
185 » » » » /*default*/ POS_AFTER_SUB \ | |
186 » » » » ) | |
187 #define MATRA_POS_TOP(u)» ( /* BENG and MLYM don't have top matras. */ \ | |
188 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
189 » » » » IS_GURU(u) ? POS_AFTER_POST : /* Deviate from
spec */ \ | |
190 » » » » IS_GUJR(u) ? POS_AFTER_SUB : \ | |
191 » » » » IS_ORYA(u) ? POS_AFTER_MAIN : \ | |
192 » » » » IS_TAML(u) ? POS_AFTER_SUB : \ | |
193 » » » » IS_TELU(u) ? POS_BEFORE_SUB : \ | |
194 » » » » IS_KNDA(u) ? POS_BEFORE_SUB : \ | |
195 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
196 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
197 » » » » /*default*/ POS_AFTER_SUB \ | |
198 » » » » ) | |
199 #define MATRA_POS_BOTTOM(u)» ( \ | |
200 » » » » IS_DEVA(u) ? POS_AFTER_SUB : \ | |
201 » » » » IS_BENG(u) ? POS_AFTER_SUB : \ | |
202 » » » » IS_GURU(u) ? POS_AFTER_POST : \ | |
203 » » » » IS_GUJR(u) ? POS_AFTER_POST : \ | |
204 » » » » IS_ORYA(u) ? POS_AFTER_SUB : \ | |
205 » » » » IS_TAML(u) ? POS_AFTER_POST : \ | |
206 » » » » IS_TELU(u) ? POS_BEFORE_SUB : \ | |
207 » » » » IS_KNDA(u) ? POS_BEFORE_SUB : \ | |
208 » » » » IS_MLYM(u) ? POS_AFTER_POST : \ | |
209 » » » » IS_SINH(u) ? POS_AFTER_SUB : \ | |
210 » » » » IS_KHMR(u) ? POS_AFTER_POST : \ | |
211 » » » » /*default*/ POS_AFTER_SUB \ | |
212 » » » » ) | |
213 | |
214 | |
215 static inline indic_position_t | |
216 matra_position (hb_codepoint_t u, indic_position_t side) | |
217 { | |
218 switch ((int) side) | |
219 { | |
220 case POS_PRE_C:» return MATRA_POS_LEFT (u); | |
221 case POS_POST_C:» return MATRA_POS_RIGHT (u); | |
222 case POS_ABOVE_C:» return MATRA_POS_TOP (u); | |
223 case POS_BELOW_C:» return MATRA_POS_BOTTOM (u); | |
224 }; | |
225 return side; | |
226 } | |
227 | |
228 | |
229 | |
230 /* XXX | |
231 * This is a hack for now. We should move this data into the main Indic table. | |
232 * Or completely remove it and just check in the tables. | |
233 */ | |
234 static const hb_codepoint_t ra_chars[] = { | |
235 0x0930, /* Devanagari */ | |
236 0x09B0, /* Bengali */ | |
237 0x09F0, /* Bengali */ | |
238 0x0A30, /* Gurmukhi */» /* No Reph */ | |
239 0x0AB0, /* Gujarati */ | |
240 0x0B30, /* Oriya */ | |
241 0x0BB0, /* Tamil */» » /* No Reph */ | |
242 0x0C30, /* Telugu */» » /* Reph formed only with ZWJ */ | |
243 0x0CB0, /* Kannada */ | |
244 0x0D30, /* Malayalam */» /* No Reph, Logical Repha */ | |
245 | |
246 0x0DBB, /* Sinhala */»» /* Reph formed only with ZWJ */ | |
247 | |
248 0x179A, /* Khmer */» » /* No Reph, Visual Repha */ | |
249 }; | |
250 | |
251 static inline indic_position_t | |
252 consonant_position (hb_codepoint_t u) | |
253 { | |
254 if ((u & ~0x007F) == 0x1780) | |
255 return POS_BELOW_C; /* In Khmer coeng model, post and below forms should not
be reordered. */ | |
256 return POS_BASE_C; /* Will recategorize later based on font lookups. */ | |
257 } | |
258 | |
259 static inline bool | |
260 is_ra (hb_codepoint_t u) | |
261 { | |
262 for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) | |
263 if (u == ra_chars[i]) | |
264 return true; | |
265 return false; | |
266 } | |
267 | |
268 | |
269 static inline bool | |
270 is_one_of (const hb_glyph_info_t &info, unsigned int flags) | |
271 { | |
272 /* If it ligated, all bets are off. */ | |
273 if (is_a_ligature (info)) return false; | |
274 return !!(FLAG (info.indic_category()) & flags); | |
275 } | |
276 | |
277 #define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) | |
278 static inline bool | |
279 is_joiner (const hb_glyph_info_t &info) | |
280 { | |
281 return is_one_of (info, JOINER_FLAGS); | |
282 } | |
283 | |
284 /* Note: | |
285 * | |
286 * We treat Vowels and placeholders as if they were consonants. This is safe be
cause Vowels | |
287 * cannot happen in a consonant syllable. The plus side however is, we can call
the | |
288 * consonant syllable logic from the vowel syllable function and get it all righ
t! */ | |
289 #define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CM) | FLAG (OT_Ra) | FLAG (OT_V)
| FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)) | |
290 static inline bool | |
291 is_consonant (const hb_glyph_info_t &info) | |
292 { | |
293 return is_one_of (info, CONSONANT_FLAGS); | |
294 } | |
295 | |
296 #define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng)) | |
297 static inline bool | |
298 is_halant_or_coeng (const hb_glyph_info_t &info) | |
299 { | |
300 return is_one_of (info, HALANT_OR_COENG_FLAGS); | |
301 } | |
302 | |
303 static inline void | |
304 set_indic_properties (hb_glyph_info_t &info) | |
305 { | |
306 hb_codepoint_t u = info.codepoint; | |
307 unsigned int type = get_indic_categories (u); | |
308 indic_category_t cat = (indic_category_t) (type & 0x7F); | |
309 indic_position_t pos = (indic_position_t) (type >> 8); | |
310 | |
311 | |
312 /* | |
313 * Re-assign category | |
314 */ | |
315 | |
316 | |
317 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe | |
318 * treats U+0951..U+0952 all as OT_VD. | |
319 * TESTS: | |
320 * U+092E,U+0947,U+0952 | |
321 * U+092E,U+0952,U+0947 | |
322 * U+092E,U+0947,U+0951 | |
323 * U+092E,U+0951,U+0947 | |
324 * */ | |
325 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954))) | |
326 cat = OT_VD; | |
327 | |
328 if (unlikely (u == 0x17D1)) | |
329 cat = OT_X; | |
330 if (cat == OT_X && | |
331 unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Vario
us signs */ | |
332 { | |
333 /* These are like Top Matras. */ | |
334 cat = OT_M; | |
335 pos = POS_ABOVE_C; | |
336 } | |
337 if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */ | |
338 cat = OT_N; | |
339 | |
340 if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */ | |
341 else if (unlikely (u == 0x200C)) cat = OT_ZWNJ; | |
342 else if (unlikely (u == 0x200D)) cat = OT_ZWJ; | |
343 else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE; | |
344 else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. More like co
nsonant medial. like 0A75. */ | |
345 | |
346 if (cat == OT_Repha) { | |
347 /* There are two kinds of characters marked as Repha: | |
348 * - The ones that are GenCat=Mn are already positioned visually, ie. after
base. (eg. Khmer) | |
349 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syll
able. (eg. Malayalam) | |
350 * | |
351 * We recategorize the first kind to look like a Nukta and attached to the b
ase directly. | |
352 */ | |
353 if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGO
RY_NON_SPACING_MARK) | |
354 cat = OT_N; | |
355 } | |
356 | |
357 | |
358 | |
359 /* | |
360 * Re-assign position. | |
361 */ | |
362 | |
363 if ((FLAG (cat) & CONSONANT_FLAGS)) | |
364 { | |
365 pos = consonant_position (u); | |
366 if (is_ra (u)) | |
367 cat = OT_Ra; | |
368 } | |
369 else if (cat == OT_M) | |
370 { | |
371 pos = matra_position (u, pos); | |
372 } | |
373 else if (cat == OT_SM || cat == OT_VD) | |
374 { | |
375 pos = POS_SMVD; | |
376 } | |
377 | |
378 if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub
in the spec. */ | |
379 | |
380 | |
381 | |
382 info.indic_category() = cat; | |
383 info.indic_position() = pos; | |
384 } | |
385 | |
386 | |
387 | 150 |
388 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */ | 151 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */ |
OLD | NEW |