OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2014 Google Inc. All rights reserved. | 2 * Copyright (C) 2014 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 15 matching lines...) Expand all Loading... | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "platform/fonts/Character.h" | 31 #include "platform/fonts/Character.h" |
32 | 32 |
33 #include "wtf/StdLibExtras.h" | 33 #include "wtf/StdLibExtras.h" |
34 #include "wtf/text/StringBuilder.h" | 34 #include "wtf/text/StringBuilder.h" |
35 #include <algorithm> | 35 #include <algorithm> |
36 #include <unicode/uobject.h> | |
36 #include <unicode/uscript.h> | 37 #include <unicode/uscript.h> |
38 #define MUTEX_H | |
drott
2016/01/14 12:10:53
Why is this needed?
kojii
2016/01/14 15:34:33
ICU fails to compile without this, at least on Win
| |
39 #include <utrie2.h> | |
37 | 40 |
38 using namespace WTF; | 41 using namespace WTF; |
39 using namespace Unicode; | 42 using namespace Unicode; |
40 | 43 |
41 namespace blink { | 44 namespace blink { |
42 | 45 |
43 static const UChar32 cjkIsolatedSymbolsArray[] = { | 46 static const UChar32 cjkIsolatedSymbolsArray[] = { |
44 // 0x2C7 Caron, Mandarin Chinese 3rd Tone | 47 // 0x2C7 Caron, Mandarin Chinese 3rd Tone |
45 0x2C7, | 48 0x2C7, |
46 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone | 49 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone |
47 0x2CA, | 50 0x2CA, |
48 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone | 51 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone |
49 0x2CB, | 52 0x2CB, |
50 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone | 53 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone |
51 0x2D9, | 54 0x2D9, |
52 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51, | 55 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51, |
53 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21, | 56 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21, |
54 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE, | 57 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE, |
55 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6, | 58 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6, |
56 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC, | 59 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC, |
57 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD, | 60 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD, |
58 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12, | 61 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12, |
59 0xFE19, 0xFF1D, | 62 0xFE19, 0xFF1D, |
60 // Emoji. | 63 // Emoji. |
61 0x1F100 | 64 0x1F100 |
62 }; | 65 }; |
63 | 66 |
67 static const UChar32 cjkIdeographRanges[] = { | |
68 // CJK Radicals Supplement and Kangxi Radicals. | |
69 0x2E80, 0x2FDF, | |
70 // CJK Strokes. | |
71 0x31C0, 0x31EF, | |
72 // CJK Unified Ideographs Extension A. | |
73 0x3400, 0x4DBF, | |
74 // The basic CJK Unified Ideographs block. | |
75 0x4E00, 0x9FFF, | |
76 // CJK Compatibility Ideographs. | |
77 0xF900, 0xFAFF, | |
78 // CJK Unified Ideographs Extension B. | |
79 0x20000, 0x2A6DF, | |
80 // CJK Unified Ideographs Extension C. | |
81 // CJK Unified Ideographs Extension D. | |
82 0x2A700, 0x2B81F, | |
83 // CJK Compatibility Ideographs Supplement. | |
84 0x2F800, 0x2FA1F | |
85 }; | |
86 | |
87 static const UChar32 cjkSymbolRanges[] = { | |
88 0x2156, 0x215A, | |
89 0x2160, 0x216B, | |
90 0x2170, 0x217B, | |
91 0x23BE, 0x23CC, | |
92 0x2460, 0x2492, | |
93 0x249C, 0x24FF, | |
94 0x25CE, 0x25D3, | |
95 0x25E2, 0x25E6, | |
96 0x2600, 0x2603, | |
97 0x2660, 0x266F, | |
98 0x2672, 0x267D, | |
drott
2016/01/14 12:10:53
Could you perhaps include
// Emoji HEAVY HEART EX
kojii
2016/01/14 15:34:33
Done.
| |
99 0x2776, 0x277F, | |
100 // Ideographic Description Characters, with CJK Symbols and Punctuation, exc luding 0x3030. | |
drott
2016/01/14 12:10:53
I know that this is previous code in a way. Howeve
kojii
2016/01/14 15:34:33
Wasn't aware that 80 column limit is also coming.
| |
101 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x310 0 .. 0x312F | |
102 0x2FF0, 0x302F, | |
103 0x3031, 0x312F, | |
104 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF | |
105 0x3190, 0x31BF, | |
106 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). | |
107 // CJK Compatibility (0x3300 .. 0x33FF). | |
108 0x3200, 0x33FF, | |
109 0xF860, 0xF862, | |
110 // CJK Compatibility Forms. | |
111 0xFE30, 0xFE4F, | |
112 // Halfwidth and Fullwidth Forms | |
113 // Usually only used in CJK | |
114 0xFF00, 0xFF0C, | |
115 0xFF0E, 0xFF1A, | |
116 0xFF1F, 0xFFEF, | |
117 // Emoji. | |
118 0x1F110, 0x1F129, | |
119 0x1F130, 0x1F149, | |
120 0x1F150, 0x1F169, | |
121 0x1F170, 0x1F189, | |
122 0x1F200, 0x1F6FF | |
123 }; | |
124 | |
125 // UNICODE VERTICAL TEXT LAYOUT http://www.unicode.org/reports/tr50/ | |
drott
2016/01/14 12:10:53
I wouldn't uppercase the comment here. Perhaps:
/
kojii
2016/01/14 15:34:33
Done (actually data is still rev 13, I need to upd
| |
126 static const UChar32 isUprightInMixedVerticalArray[] = { | |
127 0x000A7, | |
128 0x000A9, | |
129 0x000AE, | |
130 0x000B1, | |
131 0x000D7, | |
132 0x000F7 | |
133 }; | |
134 | |
135 static const UChar32 isUprightInMixedVerticalRanges[] = { | |
136 0x000BC, 0x000BE, | |
137 // Spacing Modifier Letters (Part of) | |
138 0x002EA, 0x002EB, | |
139 // Hangul Jamo | |
140 0x01100, 0x011FF, | |
141 // Unified Canadian Aboriginal Syllabics | |
142 0x01401, 0x0167F, | |
143 // Unified Canadian Aboriginal Syllabics Extended | |
144 0x018B0, 0x018FF, | |
145 // General Punctuation (Part of) | |
146 0x02016, 0x02016, | |
147 0x02020, 0x02021, | |
148 0x02030, 0x02031, | |
149 0x0203B, 0x0203C, | |
150 0x02042, 0x02042, | |
151 0x02047, 0x02049, | |
152 0x02051, 0x02051, | |
153 0x02065, 0x02069, | |
154 // Combining Diacritical Marks for Symbols (Part of) | |
155 0x020DD, 0x020E0, | |
156 0x020E2, 0x020E4, | |
157 // Letterlike Symbols (Part of)/Number Forms | |
158 0x02100, 0x02101, | |
159 0x02103, 0x02109, | |
160 0x0210F, 0x0210F, | |
161 0x02113, 0x02114, | |
162 0x02116, 0x02117, | |
163 0x0211E, 0x02123, | |
164 0x02125, 0x02125, | |
165 0x02127, 0x02127, | |
166 0x02129, 0x02129, | |
167 0x0212E, 0x0212E, | |
168 0x02135, 0x0213F, | |
169 0x02145, 0x0214A, | |
170 0x0214C, 0x0214D, | |
171 0x0214F, 0x0218F, | |
172 // Mathematical Operators (Part of) | |
173 0x0221E, 0x0221E, | |
174 0x02234, 0x02235, | |
175 // Miscellaneous Technical (Part of) | |
176 0x02300, 0x02307, | |
177 0x0230C, 0x0231F, | |
178 0x02324, 0x0232B, | |
179 0x0237D, 0x0239A, | |
180 0x023BE, 0x023CD, | |
181 0x023CF, 0x023CF, | |
182 0x023D1, 0x023DB, | |
183 0x023E2, 0x02422, | |
184 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alphanu merics | |
185 0x02424, 0x024FF, | |
186 // Geometric Shapes/Miscellaneous Symbols (Part of) | |
187 0x025A0, 0x02619, | |
188 0x02620, 0x02767, | |
189 0x02776, 0x02793, | |
190 // Miscellaneous Symbols and Arrows (Part of) | |
191 0x02B12, 0x02B2F, | |
192 0x02B50, 0x02B59, | |
193 0x02BB8, 0x02BFF, | |
194 // Common CJK | |
195 0x02E80, 0x0A4CF, | |
196 // Hangul Jamo Extended-A | |
197 0x0A960, 0x0A97F, | |
198 // Hangul Syllables/Hangul Jamo Extended-B | |
199 0x0AC00, 0x0D7FF, | |
200 // Private Use Area/CJK Compatibility Ideographs | |
201 0x0E000, 0x0FAFF, | |
202 // Vertical Forms | |
203 0x0FE10, 0x0FE1F, | |
204 // CJK Compatibility Forms (Part of) | |
205 0x0FE30, 0x0FE48, | |
206 // Small Form Variants (Part of) | |
207 0x0FE50, 0x0FE57, | |
208 0x0FE59, 0x0FE62, | |
209 0x0FE67, 0x0FE6F, | |
210 // Halfwidth and Fullwidth Forms | |
211 0x0FF01, 0x0FF0C, | |
212 0x0FF0E, 0x0FF1B, | |
213 0x0FF1F, 0x0FF60, | |
214 0x0FFE0, 0x0FFE7, | |
215 // Specials (Part of) | |
216 0x0FFF0, 0x0FFF8, | |
217 0x0FFFC, 0x0FFFD, | |
218 // Meroitic Hieroglyphs | |
219 0x10980, 0x1099F, | |
220 // Siddham | |
221 0x11580, 0x115FF, | |
222 // Egyptian Hieroglyphs | |
223 0x13000, 0x1342F, | |
224 // Kana Supplement | |
225 0x1B000, 0x1B0FF, | |
226 // Byzantine Musical Symbols/Musical Symbols | |
227 0x1D000, 0x1D1FF, | |
228 // Tai Xuan Jing Symbols/Counting Rod Numerals | |
229 0x1D300, 0x1D37F, | |
230 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supplement | |
231 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement | |
232 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Symbol s | |
233 // Alchemical Symbols | |
234 0x1F000, 0x1F7FF, | |
235 // CJK Unified Ideographs Extension B/C/D | |
236 // CJK Compatibility Ideographs Supplement | |
237 0x20000, 0x2FFFD, | |
238 0x30000, 0x3FFFD, | |
239 // Supplementary Private Use Area-A | |
240 0xF0000, 0xFFFFD, | |
241 // Supplementary Private Use Area-B | |
242 0x100000, 0x10FFFD, | |
243 }; | |
244 | |
245 using CharacterPropertiesType = uint8_t; | |
246 | |
247 enum class CharacterProperties : CharacterPropertiesType { | |
248 isCJKIdeographOrSymbol = 0x0001, | |
249 isUprightInMixedVertical = 0x0002, | |
250 }; | |
251 | |
252 inline CharacterProperties operator | (CharacterProperties a, CharacterPropertie s b) | |
253 { | |
254 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType >(a) | static_cast<CharacterPropertiesType>(b))); | |
255 } | |
256 | |
257 inline CharacterProperties operator & (CharacterProperties a, CharacterPropertie s b) | |
258 { | |
259 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType >(a) & static_cast<CharacterPropertiesType>(b))); | |
260 } | |
261 | |
262 inline CharacterProperties operator |= (CharacterProperties& a, CharacterPropert ies b) | |
263 { | |
264 a = a | b; | |
265 return a; | |
266 } | |
267 | |
268 const UChar32 maxCodePointForPropertyValues = 0x10FFFD; | |
drott
2016/01/14 12:10:53
As far as I can see this is only used for the asse
kojii
2016/01/14 15:34:33
Couldn't find that definition, thanks!
| |
269 | |
270 static void setRanges(CharacterProperties* values, const UChar32* ranges, size_t length, CharacterProperties value) | |
drott
2016/01/14 12:10:53
If I understand correctly, the CharacterProperties
kojii
2016/01/14 15:34:33
It's 1.1MB because CharacterProperties is uint8_t.
| |
271 { | |
272 ASSERT(length % 2 == 0); | |
273 const UChar32* end = ranges + length; | |
274 for (; ranges != end; ranges += 2) { | |
275 ASSERT(ranges[0] <= ranges[1] && ranges[1] <= maxCodePointForPropertyVal ues); | |
276 for (UChar32 c = ranges[0]; c <= ranges[1]; c++) | |
277 values[c] |= value; | |
278 } | |
279 } | |
280 | |
281 static void setValues(CharacterProperties* values, const UChar32* begin, size_t length, CharacterProperties value) | |
282 { | |
283 const UChar32* end = begin + length; | |
284 for (; begin != end; begin++) { | |
285 ASSERT(*begin <= maxCodePointForPropertyValues); | |
286 values[*begin] |= value; | |
287 } | |
288 } | |
289 | |
290 static UTrie2* createTrie() | |
291 { | |
292 // Create a value array of all possible code points. | |
293 const UChar32 size = maxCodePointForPropertyValues + 1; | |
294 OwnPtr<CharacterProperties[]> values = adoptArrayPtr(new CharacterProperties [size]); | |
295 memset(values.get(), 0, sizeof(CharacterProperties) * size); | |
296 setRanges(values.get(), cjkIdeographRanges, WTF_ARRAY_LENGTH(cjkIdeographRan ges), | |
297 CharacterProperties::isCJKIdeographOrSymbol); | |
298 setRanges(values.get(), cjkSymbolRanges, WTF_ARRAY_LENGTH(cjkSymbolRanges), | |
299 CharacterProperties::isCJKIdeographOrSymbol); | |
300 setValues(values.get(), cjkIsolatedSymbolsArray, WTF_ARRAY_LENGTH(cjkIsolate dSymbolsArray), | |
301 CharacterProperties::isCJKIdeographOrSymbol); | |
302 setRanges(values.get(), isUprightInMixedVerticalRanges, WTF_ARRAY_LENGTH(isU prightInMixedVerticalRanges), | |
303 CharacterProperties::isUprightInMixedVertical); | |
304 setValues(values.get(), isUprightInMixedVerticalArray, WTF_ARRAY_LENGTH(isUp rightInMixedVerticalArray), | |
305 CharacterProperties::isUprightInMixedVertical); | |
306 | |
307 // Create a Trie from the value array. | |
308 UErrorCode error = U_ZERO_ERROR; | |
309 UTrie2* trie = utrie2_open(0, 0, &error); | |
310 UChar32 start = 0; | |
311 CharacterProperties value = values[0]; | |
312 for (UChar32 c = 1; ; c++) { | |
313 if (c < size && values[c] == value) | |
314 continue; | |
315 if (static_cast<uint32_t>(value)) | |
316 utrie2_setRange32(trie, start, c - 1, static_cast<uint32_t>(value), TRUE, &error); | |
317 if (c >= size) | |
318 break; | |
319 start = c; | |
320 value = values[c]; | |
321 } | |
322 utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error); | |
323 return trie; | |
324 } | |
325 | |
326 static bool hasCharacterProperty(UChar32 c, CharacterProperties property) | |
327 { | |
328 static UTrie2* trie = nullptr; | |
329 if (!trie) | |
330 trie = createTrie(); | |
331 return UTRIE2_GET16(trie, c) & static_cast<CharacterPropertiesType>(property ); | |
332 } | |
333 | |
64 // Takes a flattened list of closed intervals | 334 // Takes a flattened list of closed intervals |
65 template <class T, size_t size> | 335 template <class T, size_t size> |
66 bool valueInIntervalList(const T (&intervalList)[size], const T& value) | 336 bool valueInIntervalList(const T (&intervalList)[size], const T& value) |
67 { | 337 { |
68 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue); | 338 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue); |
69 if ((bound - intervalList) % 2 == 1) | 339 if ((bound - intervalList) % 2 == 1) |
70 return true; | 340 return true; |
71 return bound > intervalList && *(bound - 1) == value; | 341 return bound > intervalList && *(bound - 1) == value; |
72 } | 342 } |
73 | 343 |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
170 // Search for other Complex cases | 440 // Search for other Complex cases |
171 if (valueInIntervalList(complexCodePathRanges, c)) | 441 if (valueInIntervalList(complexCodePathRanges, c)) |
172 return ComplexPath; | 442 return ComplexPath; |
173 } | 443 } |
174 | 444 |
175 return result; | 445 return result; |
176 } | 446 } |
177 | 447 |
178 bool Character::isUprightInMixedVertical(UChar32 character) | 448 bool Character::isUprightInMixedVertical(UChar32 character) |
179 { | 449 { |
180 // Fast path for common non-CJK | 450 return hasCharacterProperty(character, CharacterProperties::isUprightInMixed Vertical); |
181 if (character < 0x000A7) | |
182 return false; | |
183 | |
184 // Fast path for common CJK | |
185 if (isInRange(character, 0x02E80, 0x0A4CF)) | |
186 return true; | |
187 | |
188 if (isInRange(character, 0x0FF01, 0x0FFE7)) { | |
189 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B) | |
190 || isInRange(character, 0x0FF1F, 0x0FF60) || character >= 0x0FFE0) | |
191 return true; | |
192 return false; | |
193 } | |
194 | |
195 // Fast path for medium-common non-CJK | |
196 if (character == 0x000A7 || character == 0x000A9 || character == 0x000AE) | |
197 return true; | |
198 if (character == 0x000B1 || character == 0x000BC || character == 0x000BD || character == 0x000BE) | |
199 return true; | |
200 if (character == 0x000D7 || character == 0x000F7) | |
201 return true; | |
202 if (character < 0x002EA) | |
203 return false; | |
204 | |
205 static const UChar32 uprightRanges[] = { | |
206 // Spacing Modifier Letters (Part of) | |
207 0x002EA, 0x002EB, | |
208 // Hangul Jamo | |
209 0x01100, 0x011FF, | |
210 // Unified Canadian Aboriginal Syllabics | |
211 0x01401, 0x0167F, | |
212 // Unified Canadian Aboriginal Syllabics Extended | |
213 0x018B0, 0x018FF, | |
214 // General Punctuation (Part of) | |
215 0x02016, 0x02016, | |
216 0x02020, 0x02021, | |
217 0x02030, 0x02031, | |
218 0x0203B, 0x0203C, | |
219 0x02042, 0x02042, | |
220 0x02047, 0x02049, | |
221 0x02051, 0x02051, | |
222 0x02065, 0x02069, | |
223 // Combining Diacritical Marks for Symbols (Part of) | |
224 0x020DD, 0x020E0, | |
225 0x020E2, 0x020E4, | |
226 // Letterlike Symbols (Part of)/Number Forms | |
227 0x02100, 0x02101, | |
228 0x02103, 0x02109, | |
229 0x0210F, 0x0210F, | |
230 0x02113, 0x02114, | |
231 0x02116, 0x02117, | |
232 0x0211E, 0x02123, | |
233 0x02125, 0x02125, | |
234 0x02127, 0x02127, | |
235 0x02129, 0x02129, | |
236 0x0212E, 0x0212E, | |
237 0x02135, 0x0213F, | |
238 0x02145, 0x0214A, | |
239 0x0214C, 0x0214D, | |
240 0x0214F, 0x0218F, | |
241 // Mathematical Operators (Part of) | |
242 0x0221E, 0x0221E, | |
243 0x02234, 0x02235, | |
244 // Miscellaneous Technical (Part of) | |
245 0x02300, 0x02307, | |
246 0x0230C, 0x0231F, | |
247 0x02324, 0x0232B, | |
248 0x0237D, 0x0239A, | |
249 0x023BE, 0x023CD, | |
250 0x023CF, 0x023CF, | |
251 0x023D1, 0x023DB, | |
252 0x023E2, 0x02422, | |
253 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alp hanumerics | |
254 0x02424, 0x024FF, | |
255 // Geometric Shapes/Miscellaneous Symbols (Part of) | |
256 0x025A0, 0x02619, | |
257 0x02620, 0x02767, | |
258 0x02776, 0x02793, | |
259 // Miscellaneous Symbols and Arrows (Part of) | |
260 0x02B12, 0x02B2F, | |
261 0x02B50, 0x02B59, | |
262 0x02BB8, 0x02BFF, | |
263 // Hangul Jamo Extended-A | |
264 0x0A960, 0x0A97F, | |
265 // Hangul Syllables/Hangul Jamo Extended-B | |
266 0x0AC00, 0x0D7FF, | |
267 // Private Use Area/CJK Compatibility Ideographs | |
268 0x0E000, 0x0FAFF, | |
269 // Vertical Forms | |
270 0x0FE10, 0x0FE1F, | |
271 // CJK Compatibility Forms (Part of) | |
272 0x0FE30, 0x0FE48, | |
273 // Small Form Variants (Part of) | |
274 0x0FE50, 0x0FE57, | |
275 0x0FE59, 0x0FE62, | |
276 0x0FE67, 0x0FE6F, | |
277 // Specials (Part of) | |
278 0x0FFF0, 0x0FFF8, | |
279 0x0FFFC, 0x0FFFD, | |
280 // Meroitic Hieroglyphs | |
281 0x10980, 0x1099F, | |
282 // Siddham | |
283 0x11580, 0x115FF, | |
284 // Egyptian Hieroglyphs | |
285 0x13000, 0x1342F, | |
286 // Kana Supplement | |
287 0x1B000, 0x1B0FF, | |
288 // Byzantine Musical Symbols/Musical Symbols | |
289 0x1D000, 0x1D1FF, | |
290 // Tai Xuan Jing Symbols/Counting Rod Numerals | |
291 0x1D300, 0x1D37F, | |
292 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supple ment | |
293 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement | |
294 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Sy mbols | |
295 // Alchemical Symbols | |
296 0x1F000, 0x1F7FF, | |
297 // CJK Unified Ideographs Extension B/C/D | |
298 // CJK Compatibility Ideographs Supplement | |
299 0x20000, 0x2FFFD, | |
300 0x30000, 0x3FFFD, | |
301 // Supplementary Private Use Area-A | |
302 0xF0000, 0xFFFFD, | |
303 // Supplementary Private Use Area-B | |
304 0x100000, 0x10FFFD, | |
305 }; | |
306 return valueInIntervalList(uprightRanges, character); | |
307 } | |
308 | |
309 bool Character::isCJKIdeograph(UChar32 c) | |
310 { | |
311 static const UChar32 cjkIdeographRanges[] = { | |
312 // CJK Radicals Supplement and Kangxi Radicals. | |
313 0x2E80, 0x2FDF, | |
314 // CJK Strokes. | |
315 0x31C0, 0x31EF, | |
316 // CJK Unified Ideographs Extension A. | |
317 0x3400, 0x4DBF, | |
318 // The basic CJK Unified Ideographs block. | |
319 0x4E00, 0x9FFF, | |
320 // CJK Compatibility Ideographs. | |
321 0xF900, 0xFAFF, | |
322 // CJK Unified Ideographs Extension B. | |
323 0x20000, 0x2A6DF, | |
324 // CJK Unified Ideographs Extension C. | |
325 // CJK Unified Ideographs Extension D. | |
326 0x2A700, 0x2B81F, | |
327 // CJK Compatibility Ideographs Supplement. | |
328 0x2F800, 0x2FA1F | |
329 }; | |
330 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges) ; | |
331 | |
332 // Early out | |
333 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo unt - 1]) | |
334 return false; | |
335 | |
336 return valueInIntervalList(cjkIdeographRanges, c); | |
337 } | 451 } |
338 | 452 |
339 bool Character::isCJKIdeographOrSymbol(UChar32 c) | 453 bool Character::isCJKIdeographOrSymbol(UChar32 c) |
340 { | 454 { |
341 // Likely common case | 455 // Likely common case |
342 if (c < 0x2C7) | 456 if (c < 0x2C7) |
343 return false; | 457 return false; |
344 | 458 |
345 if (isCJKIdeograph(c)) | 459 return hasCharacterProperty(c, CharacterProperties::isCJKIdeographOrSymbol); |
346 return true; | |
347 | |
348 static const UChar32 cjkSymbolRanges[] = { | |
349 0x2156, 0x215A, | |
350 0x2160, 0x216B, | |
351 0x2170, 0x217B, | |
352 0x23BE, 0x23CC, | |
353 0x2460, 0x2492, | |
354 0x249C, 0x24FF, | |
355 0x25CE, 0x25D3, | |
356 0x25E2, 0x25E6, | |
357 0x2600, 0x2603, | |
358 0x2660, 0x266F, | |
359 0x2672, 0x267D, | |
360 0x2776, 0x277F, | |
361 // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030. | |
362 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0 x3100 .. 0x312F | |
363 0x2FF0, 0x302F, | |
364 0x3031, 0x312F, | |
365 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF | |
366 0x3190, 0x31BF, | |
367 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). | |
368 // CJK Compatibility (0x3300 .. 0x33FF). | |
369 0x3200, 0x33FF, | |
370 0xF860, 0xF862, | |
371 // CJK Compatibility Forms. | |
372 0xFE30, 0xFE4F, | |
373 // Halfwidth and Fullwidth Forms | |
374 // Usually only used in CJK | |
375 0xFF00, 0xFF0C, | |
376 0xFF0E, 0xFF1A, | |
377 0xFF1F, 0xFFEF, | |
378 // Emoji. | |
379 0x1F110, 0x1F129, | |
380 0x1F130, 0x1F149, | |
381 0x1F150, 0x1F169, | |
382 0x1F170, 0x1F189, | |
383 0x1F200, 0x1F6FF | |
384 }; | |
385 | |
386 if (c >= cjkSymbolRanges[0] | |
387 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1] | |
388 && valueInIntervalList(cjkSymbolRanges, c)) { | |
389 return true; | |
390 } | |
391 | |
392 if (c < 0x2020 && c > 0x2D9) | |
393 return false; | |
394 | |
395 // Hash lookup for isolated symbols (those not part of a contiguous range) | |
396 static HashSet<UChar32>* cjkIsolatedSymbols = 0; | |
397 if (!cjkIsolatedSymbols) { | |
398 cjkIsolatedSymbols = new HashSet<UChar32>(); | |
399 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i) | |
400 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]); | |
401 } | |
402 return cjkIsolatedSymbols->contains(c); | |
403 } | 460 } |
404 | 461 |
405 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify) | 462 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify) |
406 { | 463 { |
407 unsigned count = 0; | 464 unsigned count = 0; |
408 if (textJustify == TextJustifyDistribute) { | 465 if (textJustify == TextJustifyDistribute) { |
409 isAfterExpansion = true; | 466 isAfterExpansion = true; |
410 return length; | 467 return length; |
411 } | 468 } |
412 | 469 |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
519 } | 576 } |
520 | 577 |
521 bool Character::isCommonOrInheritedScript(UChar32 character) | 578 bool Character::isCommonOrInheritedScript(UChar32 character) |
522 { | 579 { |
523 UErrorCode status = U_ZERO_ERROR; | 580 UErrorCode status = U_ZERO_ERROR; |
524 UScriptCode script = uscript_getScript(character, &status); | 581 UScriptCode script = uscript_getScript(character, &status); |
525 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED); | 582 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED); |
526 } | 583 } |
527 | 584 |
528 } // namespace blink | 585 } // namespace blink |
OLD | NEW |