OLD | NEW |
1 /* | 1 /* |
2 * Copyright © 2009 Red Hat, Inc. | 2 * Copyright © 2009 Red Hat, Inc. |
3 * Copyright © 2011 Codethink Limited | 3 * Copyright © 2011 Codethink Limited |
4 * Copyright © 2010,2011,2012 Google, Inc. | 4 * Copyright © 2010,2011,2012 Google, Inc. |
5 * | 5 * |
6 * This is part of HarfBuzz, a text shaping library. | 6 * This is part of HarfBuzz, a text shaping library. |
7 * | 7 * |
8 * Permission is hereby granted, without written agreement and without | 8 * Permission is hereby granted, without written agreement and without |
9 * license or royalty fees, to use, copy, modify, and distribute this | 9 * license or royalty fees, to use, copy, modify, and distribute this |
10 * software and its documentation for any purpose, provided that the | 10 * software and its documentation for any purpose, provided that the |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
95 unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_d
ata.decompose_compatibility); | 95 unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_d
ata.decompose_compatibility); |
96 if (ret == 1 && u == decomposed[0]) { | 96 if (ret == 1 && u == decomposed[0]) { |
97 decomposed[0] = 0; | 97 decomposed[0] = 0; |
98 return 0; | 98 return 0; |
99 } | 99 } |
100 decomposed[ret] = 0; | 100 decomposed[ret] = 0; |
101 return ret; | 101 return ret; |
102 } | 102 } |
103 | 103 |
104 | 104 |
105 unsigned int | 105 inline unsigned int |
106 modified_combining_class (hb_codepoint_t unicode) | 106 modified_combining_class (hb_codepoint_t unicode) |
107 { | 107 { |
108 /* XXX This hack belongs to the Myanmar shaper. */ | 108 /* XXX This hack belongs to the Myanmar shaper. */ |
109 if (unlikely (unicode == 0x1037)) unicode = 0x103A; | 109 if (unlikely (unicode == 0x1037u)) unicode = 0x103Au; |
110 | 110 |
111 /* XXX This hack belongs to the SEA shaper (for Tai Tham): | 111 /* XXX This hack belongs to the SEA shaper (for Tai Tham): |
112 * Reorder SAKOT to ensure it comes after any tone marks. */ | 112 * Reorder SAKOT to ensure it comes after any tone marks. */ |
113 if (unlikely (unicode == 0x1A60)) return 254; | 113 if (unlikely (unicode == 0x1A60u)) return 254; |
| 114 |
| 115 /* XXX This hack belongs to the Tibetan shaper: |
| 116 * Reorder PADMA to ensure it comes after any vowel marks. */ |
| 117 if (unlikely (unicode == 0x0FC6u)) return 254; |
114 | 118 |
115 return _hb_modified_combining_class[combining_class (unicode)]; | 119 return _hb_modified_combining_class[combining_class (unicode)]; |
116 } | 120 } |
117 | 121 |
118 inline hb_bool_t | 122 static inline hb_bool_t |
119 is_variation_selector (hb_codepoint_t unicode) | 123 is_variation_selector (hb_codepoint_t unicode) |
120 { | 124 { |
121 return unlikely (hb_in_ranges<hb_codepoint_t> (unicode, | 125 /* U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the |
122 » » » » » » 0x180B, 0x180D, /* MONGOLIAN
FREE VARIATION SELECTOR ONE..THREE */ | 126 * Arabic shaper. No need to match them here. */ |
123 » » » » » » 0xFE00, 0xFE0F, /* VARIATION
SELECTOR-1..16 */ | 127 return unlikely (hb_in_ranges (unicode, |
124 » » » » » » 0xE0100, 0xE01EF)); /* VARIA
TION SELECTOR-17..256 */ | 128 » » » » 0xFE00u, 0xFE0Fu, /* VARIATION SELECTOR-1..16
*/ |
| 129 » » » » 0xE0100u, 0xE01EFu)); /* VARIATION SELECTOR-
17..256 */ |
125 } | 130 } |
126 | 131 |
127 /* Default_Ignorable codepoints: | 132 /* Default_Ignorable codepoints: |
128 * | 133 * |
129 * Note that as of Oct 2012 (Unicode 6.2), U+180E MONGOLIAN VOWEL SEPARATOR | |
130 * is NOT Default_Ignorable, but it really behaves in a way that it should | |
131 * be. That has been reported to the Unicode Technical Committee for | |
132 * consideration. As such, we include it here, since Uniscribe removes it. | |
133 * It *is* in Unicode 6.3 however. U+061C ARABIC LETTER MARK from Unicode | |
134 * 6.3 is also added manually. The new Unicode 6.3 bidi formatting | |
135 * characters are encoded in a block that was Default_Ignorable already. | |
136 * | |
137 * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable, | 134 * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable, |
138 * we do NOT want to hide them, as the way Uniscribe has implemented them | 135 * we do NOT want to hide them, as the way Uniscribe has implemented them |
139 * is with regular spacing glyphs, and that's the way fonts are made to work. | 136 * is with regular spacing glyphs, and that's the way fonts are made to work. |
140 * As such, we make exceptions for those four. | 137 * As such, we make exceptions for those four. |
141 * | 138 * |
142 * Gathered from: | 139 * Unicode 7.0: |
143 * http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:DI:]&abb=on&ucd=on&
esc=on | 140 * $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's
/;.*#/#/' |
144 * | 141 * 00AD # Cf SOFT HYPHEN |
145 * Last updated to the page with the following versions: | 142 * 034F # Mn COMBINING GRAPHEME JOINER |
146 * Version 3.6; ICU version: 50.0.1.0; Unicode version: 6.1.0.0 | 143 * 061C # Cf ARABIC LETTER MARK |
147 * | 144 * 115F..1160 # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER |
148 * 4,167 Code Points | 145 * 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA |
149 * | 146 * 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN F
REE VARIATION SELECTOR THREE |
150 * [\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\u200B-\u200F\u202A-\u202
E\u2060-\u206F\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001D173-\U0001D17A\
U000E0000-\U000E0FFF] | 147 * 180E # Cf MONGOLIAN VOWEL SEPARATOR |
151 * | 148 * 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK |
152 * 00AD ;SOFT HYPHEN | 149 * 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE |
153 * 034F ;COMBINING GRAPHEME JOINER | 150 * 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS |
154 * #115F ;HANGUL CHOSEONG FILLER | 151 * 2065 # Cn <reserved-2065> |
155 * #1160 ;HANGUL JUNGSEONG FILLER | 152 * 2066..206F # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES |
156 * 17B4 ;KHMER VOWEL INHERENT AQ | 153 * 3164 # Lo HANGUL FILLER |
157 * 17B5 ;KHMER VOWEL INHERENT AA | 154 * FE00..FE0F # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 |
158 * 180B..180D ;MONGOLIAN FREE VARIATION SELECTOR THREE | 155 * FEFF # Cf ZERO WIDTH NO-BREAK SPACE |
159 * 200B..200F ;RIGHT-TO-LEFT MARK | 156 * FFA0 # Lo HALFWIDTH HANGUL FILLER |
160 * 202A..202E ;RIGHT-TO-LEFT OVERRIDE | 157 * FFF0..FFF8 # Cn [9] <reserved-FFF0>..<reserved-FFF8> |
161 * 2060..206F ;NOMINAL DIGIT SHAPES | 158 * 1BCA0..1BCA3 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT
UP STEP |
162 * #3164 ;HANGUL FILLER | 159 * 1D173..1D17A # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRA
SE |
163 * FE00..FE0F ;VARIATION SELECTOR-16 | 160 * E0000 # Cn <reserved-E0000> |
164 * FEFF ;ZERO WIDTH NO-BREAK SPACE | 161 * E0001 # Cf LANGUAGE TAG |
165 * #FFA0 ;HALFWIDTH HANGUL FILLER | 162 * E0002..E001F # Cn [30] <reserved-E0002>..<reserved-E001F> |
166 * FFF0..FFF8 ;<unassigned-FFF8> | 163 * E0020..E007F # Cf [96] TAG SPACE..CANCEL TAG |
167 * 1D173..1D17A ;MUSICAL SYMBOL END PHRASE | 164 * E0080..E00FF # Cn [128] <reserved-E0080>..<reserved-E00FF> |
168 * E0000..E0FFF ;<unassigned-E0FFF> | 165 * E0100..E01EF # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 |
| 166 * E01F0..E0FFF # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> |
169 */ | 167 */ |
170 inline hb_bool_t | 168 static inline hb_bool_t |
171 is_default_ignorable (hb_codepoint_t ch) | 169 is_default_ignorable (hb_codepoint_t ch) |
172 { | 170 { |
173 hb_codepoint_t plane = ch >> 16; | 171 hb_codepoint_t plane = ch >> 16; |
174 if (likely (plane == 0)) | 172 if (likely (plane == 0)) |
175 { | 173 { |
176 /* BMP */ | 174 /* BMP */ |
177 hb_codepoint_t page = ch >> 8; | 175 hb_codepoint_t page = ch >> 8; |
178 switch (page) { | 176 switch (page) { |
179 » case 0x00: return unlikely (ch == 0x00AD); | 177 » case 0x00: return unlikely (ch == 0x00ADu); |
180 » case 0x03: return unlikely (ch == 0x034F); | 178 » case 0x03: return unlikely (ch == 0x034Fu); |
181 » case 0x06: return unlikely (ch == 0x061C); | 179 » case 0x06: return unlikely (ch == 0x061Cu); |
182 » case 0x17: return hb_in_range<hb_codepoint_t> (ch, 0x17B4, 0x17B5); | 180 » case 0x17: return hb_in_range (ch, 0x17B4u, 0x17B5u); |
183 » case 0x18: return hb_in_range<hb_codepoint_t> (ch, 0x180B, 0x180E); | 181 » case 0x18: return hb_in_range (ch, 0x180Bu, 0x180Eu); |
184 » case 0x20: return hb_in_ranges<hb_codepoint_t> (ch, 0x200B, 0x200F, | 182 » case 0x20: return hb_in_ranges (ch, 0x200Bu, 0x200Fu, |
185 » » » » » » » 0x202A, 0x202E, | 183 » » » » » » » 0x202Au, 0x202Eu, |
186 » » » » » » » 0x2060, 0x206F); | 184 » » » » » » » 0x2060u, 0x206Fu); |
187 » case 0xFE: return hb_in_range<hb_codepoint_t> (ch, 0xFE00, 0xFE0F) || ch
== 0xFEFF; | 185 » case 0xFE: return hb_in_range (ch, 0xFE00u, 0xFE0Fu) || ch == 0xFEFFu; |
188 » case 0xFF: return hb_in_range<hb_codepoint_t> (ch, 0xFFF0, 0xFFF8); | 186 » case 0xFF: return hb_in_range (ch, 0xFFF0u, 0xFFF8u); |
189 default: return false; | 187 default: return false; |
190 } | 188 } |
191 } | 189 } |
192 else | 190 else |
193 { | 191 { |
194 /* Other planes */ | 192 /* Other planes */ |
195 switch (plane) { | 193 switch (plane) { |
196 » case 0x01: return hb_in_range<hb_codepoint_t> (ch, 0x0001D173, 0x0001D17
A); | 194 » case 0x01: return hb_in_ranges (ch, 0x1BCA0u, 0x1BCA3u, |
197 » case 0x0E: return hb_in_range<hb_codepoint_t> (ch, 0x000E0000, 0x000E0FF
F); | 195 » » » » » 0x1D173u, 0x1D17Au); |
| 196 » case 0x0E: return hb_in_range (ch, 0xE0000u, 0xE0FFFu); |
198 default: return false; | 197 default: return false; |
199 } | 198 } |
200 } | 199 } |
201 } | 200 } |
202 | 201 |
203 | 202 |
204 struct { | 203 struct { |
205 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name; | 204 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name; |
206 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS | 205 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS |
207 #undef HB_UNICODE_FUNC_IMPLEMENT | 206 #undef HB_UNICODE_FUNC_IMPLEMENT |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
309 /* Misc */ | 308 /* Misc */ |
310 | 309 |
311 #define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \ | 310 #define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \ |
312 (FLAG (gen_cat) & \ | 311 (FLAG (gen_cat) & \ |
313 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ | 312 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ |
314 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \ | 313 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \ |
315 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) | 314 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) |
316 | 315 |
317 | 316 |
318 #endif /* HB_UNICODE_PRIVATE_HH */ | 317 #endif /* HB_UNICODE_PRIVATE_HH */ |
OLD | NEW |