Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(742)

Side by Side Diff: third_party/WebKit/Source/platform/fonts/Character.cpp

Issue 1541393003: Improve performance of Character::isCJKIdeographOrSymbol by using trie tree (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Cleanup, remove isCJKIdeograph, remove perf test code Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2014 Google Inc. All rights reserved. 2 * Copyright (C) 2014 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 15 matching lines...) Expand all
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */ 29 */
30 30
31 #include "platform/fonts/Character.h" 31 #include "platform/fonts/Character.h"
32 32
33 #include "wtf/StdLibExtras.h" 33 #include "wtf/StdLibExtras.h"
34 #include "wtf/text/StringBuilder.h" 34 #include "wtf/text/StringBuilder.h"
35 #include <algorithm> 35 #include <algorithm>
36 #include <unicode/uobject.h>
36 #include <unicode/uscript.h> 37 #include <unicode/uscript.h>
38 #define MUTEX_H
drott 2016/01/14 12:10:53 Why is this needed?
kojii 2016/01/14 15:34:33 ICU fails to compile without this, at least on Win
39 #include <utrie2.h>
37 40
38 using namespace WTF; 41 using namespace WTF;
39 using namespace Unicode; 42 using namespace Unicode;
40 43
41 namespace blink { 44 namespace blink {
42 45
43 static const UChar32 cjkIsolatedSymbolsArray[] = { 46 static const UChar32 cjkIsolatedSymbolsArray[] = {
44 // 0x2C7 Caron, Mandarin Chinese 3rd Tone 47 // 0x2C7 Caron, Mandarin Chinese 3rd Tone
45 0x2C7, 48 0x2C7,
46 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone 49 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
47 0x2CA, 50 0x2CA,
48 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone 51 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
49 0x2CB, 52 0x2CB,
50 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone 53 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone
51 0x2D9, 54 0x2D9,
52 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51, 55 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51,
53 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21, 56 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21,
54 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE, 57 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE,
55 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6, 58 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6,
56 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC, 59 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC,
57 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD, 60 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD,
58 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12, 61 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12,
59 0xFE19, 0xFF1D, 62 0xFE19, 0xFF1D,
60 // Emoji. 63 // Emoji.
61 0x1F100 64 0x1F100
62 }; 65 };
63 66
67 static const UChar32 cjkIdeographRanges[] = {
68 // CJK Radicals Supplement and Kangxi Radicals.
69 0x2E80, 0x2FDF,
70 // CJK Strokes.
71 0x31C0, 0x31EF,
72 // CJK Unified Ideographs Extension A.
73 0x3400, 0x4DBF,
74 // The basic CJK Unified Ideographs block.
75 0x4E00, 0x9FFF,
76 // CJK Compatibility Ideographs.
77 0xF900, 0xFAFF,
78 // CJK Unified Ideographs Extension B.
79 0x20000, 0x2A6DF,
80 // CJK Unified Ideographs Extension C.
81 // CJK Unified Ideographs Extension D.
82 0x2A700, 0x2B81F,
83 // CJK Compatibility Ideographs Supplement.
84 0x2F800, 0x2FA1F
85 };
86
87 static const UChar32 cjkSymbolRanges[] = {
88 0x2156, 0x215A,
89 0x2160, 0x216B,
90 0x2170, 0x217B,
91 0x23BE, 0x23CC,
92 0x2460, 0x2492,
93 0x249C, 0x24FF,
94 0x25CE, 0x25D3,
95 0x25E2, 0x25E6,
96 0x2600, 0x2603,
97 0x2660, 0x266F,
98 0x2672, 0x267D,
drott 2016/01/14 12:10:53 Could you perhaps include // Emoji HEAVY HEART EX
kojii 2016/01/14 15:34:33 Done.
99 0x2776, 0x277F,
100 // Ideographic Description Characters, with CJK Symbols and Punctuation, exc luding 0x3030.
drott 2016/01/14 12:10:53 I know that this is previous code in a way. Howeve
kojii 2016/01/14 15:34:33 Wasn't aware that 80 column limit is also coming.
101 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x310 0 .. 0x312F
102 0x2FF0, 0x302F,
103 0x3031, 0x312F,
104 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
105 0x3190, 0x31BF,
106 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
107 // CJK Compatibility (0x3300 .. 0x33FF).
108 0x3200, 0x33FF,
109 0xF860, 0xF862,
110 // CJK Compatibility Forms.
111 0xFE30, 0xFE4F,
112 // Halfwidth and Fullwidth Forms
113 // Usually only used in CJK
114 0xFF00, 0xFF0C,
115 0xFF0E, 0xFF1A,
116 0xFF1F, 0xFFEF,
117 // Emoji.
118 0x1F110, 0x1F129,
119 0x1F130, 0x1F149,
120 0x1F150, 0x1F169,
121 0x1F170, 0x1F189,
122 0x1F200, 0x1F6FF
123 };
124
125 // UNICODE VERTICAL TEXT LAYOUT http://www.unicode.org/reports/tr50/
drott 2016/01/14 12:10:53 I wouldn't uppercase the comment here. Perhaps: /
kojii 2016/01/14 15:34:33 Done (actually data is still rev 13, I need to upd
126 static const UChar32 isUprightInMixedVerticalArray[] = {
127 0x000A7,
128 0x000A9,
129 0x000AE,
130 0x000B1,
131 0x000D7,
132 0x000F7
133 };
134
135 static const UChar32 isUprightInMixedVerticalRanges[] = {
136 0x000BC, 0x000BE,
137 // Spacing Modifier Letters (Part of)
138 0x002EA, 0x002EB,
139 // Hangul Jamo
140 0x01100, 0x011FF,
141 // Unified Canadian Aboriginal Syllabics
142 0x01401, 0x0167F,
143 // Unified Canadian Aboriginal Syllabics Extended
144 0x018B0, 0x018FF,
145 // General Punctuation (Part of)
146 0x02016, 0x02016,
147 0x02020, 0x02021,
148 0x02030, 0x02031,
149 0x0203B, 0x0203C,
150 0x02042, 0x02042,
151 0x02047, 0x02049,
152 0x02051, 0x02051,
153 0x02065, 0x02069,
154 // Combining Diacritical Marks for Symbols (Part of)
155 0x020DD, 0x020E0,
156 0x020E2, 0x020E4,
157 // Letterlike Symbols (Part of)/Number Forms
158 0x02100, 0x02101,
159 0x02103, 0x02109,
160 0x0210F, 0x0210F,
161 0x02113, 0x02114,
162 0x02116, 0x02117,
163 0x0211E, 0x02123,
164 0x02125, 0x02125,
165 0x02127, 0x02127,
166 0x02129, 0x02129,
167 0x0212E, 0x0212E,
168 0x02135, 0x0213F,
169 0x02145, 0x0214A,
170 0x0214C, 0x0214D,
171 0x0214F, 0x0218F,
172 // Mathematical Operators (Part of)
173 0x0221E, 0x0221E,
174 0x02234, 0x02235,
175 // Miscellaneous Technical (Part of)
176 0x02300, 0x02307,
177 0x0230C, 0x0231F,
178 0x02324, 0x0232B,
179 0x0237D, 0x0239A,
180 0x023BE, 0x023CD,
181 0x023CF, 0x023CF,
182 0x023D1, 0x023DB,
183 0x023E2, 0x02422,
184 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alphanu merics
185 0x02424, 0x024FF,
186 // Geometric Shapes/Miscellaneous Symbols (Part of)
187 0x025A0, 0x02619,
188 0x02620, 0x02767,
189 0x02776, 0x02793,
190 // Miscellaneous Symbols and Arrows (Part of)
191 0x02B12, 0x02B2F,
192 0x02B50, 0x02B59,
193 0x02BB8, 0x02BFF,
194 // Common CJK
195 0x02E80, 0x0A4CF,
196 // Hangul Jamo Extended-A
197 0x0A960, 0x0A97F,
198 // Hangul Syllables/Hangul Jamo Extended-B
199 0x0AC00, 0x0D7FF,
200 // Private Use Area/CJK Compatibility Ideographs
201 0x0E000, 0x0FAFF,
202 // Vertical Forms
203 0x0FE10, 0x0FE1F,
204 // CJK Compatibility Forms (Part of)
205 0x0FE30, 0x0FE48,
206 // Small Form Variants (Part of)
207 0x0FE50, 0x0FE57,
208 0x0FE59, 0x0FE62,
209 0x0FE67, 0x0FE6F,
210 // Halfwidth and Fullwidth Forms
211 0x0FF01, 0x0FF0C,
212 0x0FF0E, 0x0FF1B,
213 0x0FF1F, 0x0FF60,
214 0x0FFE0, 0x0FFE7,
215 // Specials (Part of)
216 0x0FFF0, 0x0FFF8,
217 0x0FFFC, 0x0FFFD,
218 // Meroitic Hieroglyphs
219 0x10980, 0x1099F,
220 // Siddham
221 0x11580, 0x115FF,
222 // Egyptian Hieroglyphs
223 0x13000, 0x1342F,
224 // Kana Supplement
225 0x1B000, 0x1B0FF,
226 // Byzantine Musical Symbols/Musical Symbols
227 0x1D000, 0x1D1FF,
228 // Tai Xuan Jing Symbols/Counting Rod Numerals
229 0x1D300, 0x1D37F,
230 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supplement
231 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement
232 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Symbol s
233 // Alchemical Symbols
234 0x1F000, 0x1F7FF,
235 // CJK Unified Ideographs Extension B/C/D
236 // CJK Compatibility Ideographs Supplement
237 0x20000, 0x2FFFD,
238 0x30000, 0x3FFFD,
239 // Supplementary Private Use Area-A
240 0xF0000, 0xFFFFD,
241 // Supplementary Private Use Area-B
242 0x100000, 0x10FFFD,
243 };
244
245 using CharacterPropertiesType = uint8_t;
246
247 enum class CharacterProperties : CharacterPropertiesType {
248 isCJKIdeographOrSymbol = 0x0001,
249 isUprightInMixedVertical = 0x0002,
250 };
251
252 inline CharacterProperties operator | (CharacterProperties a, CharacterPropertie s b)
253 {
254 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType >(a) | static_cast<CharacterPropertiesType>(b)));
255 }
256
257 inline CharacterProperties operator & (CharacterProperties a, CharacterPropertie s b)
258 {
259 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType >(a) & static_cast<CharacterPropertiesType>(b)));
260 }
261
262 inline CharacterProperties operator |= (CharacterProperties& a, CharacterPropert ies b)
263 {
264 a = a | b;
265 return a;
266 }
267
268 const UChar32 maxCodePointForPropertyValues = 0x10FFFD;
drott 2016/01/14 12:10:53 As far as I can see this is only used for the asse
kojii 2016/01/14 15:34:33 Couldn't find that definition, thanks!
269
270 static void setRanges(CharacterProperties* values, const UChar32* ranges, size_t length, CharacterProperties value)
drott 2016/01/14 12:10:53 If I understand correctly, the CharacterProperties
kojii 2016/01/14 15:34:33 It's 1.1MB because CharacterProperties is uint8_t.
271 {
272 ASSERT(length % 2 == 0);
273 const UChar32* end = ranges + length;
274 for (; ranges != end; ranges += 2) {
275 ASSERT(ranges[0] <= ranges[1] && ranges[1] <= maxCodePointForPropertyVal ues);
276 for (UChar32 c = ranges[0]; c <= ranges[1]; c++)
277 values[c] |= value;
278 }
279 }
280
281 static void setValues(CharacterProperties* values, const UChar32* begin, size_t length, CharacterProperties value)
282 {
283 const UChar32* end = begin + length;
284 for (; begin != end; begin++) {
285 ASSERT(*begin <= maxCodePointForPropertyValues);
286 values[*begin] |= value;
287 }
288 }
289
290 static UTrie2* createTrie()
291 {
292 // Create a value array of all possible code points.
293 const UChar32 size = maxCodePointForPropertyValues + 1;
294 OwnPtr<CharacterProperties[]> values = adoptArrayPtr(new CharacterProperties [size]);
295 memset(values.get(), 0, sizeof(CharacterProperties) * size);
296 setRanges(values.get(), cjkIdeographRanges, WTF_ARRAY_LENGTH(cjkIdeographRan ges),
297 CharacterProperties::isCJKIdeographOrSymbol);
298 setRanges(values.get(), cjkSymbolRanges, WTF_ARRAY_LENGTH(cjkSymbolRanges),
299 CharacterProperties::isCJKIdeographOrSymbol);
300 setValues(values.get(), cjkIsolatedSymbolsArray, WTF_ARRAY_LENGTH(cjkIsolate dSymbolsArray),
301 CharacterProperties::isCJKIdeographOrSymbol);
302 setRanges(values.get(), isUprightInMixedVerticalRanges, WTF_ARRAY_LENGTH(isU prightInMixedVerticalRanges),
303 CharacterProperties::isUprightInMixedVertical);
304 setValues(values.get(), isUprightInMixedVerticalArray, WTF_ARRAY_LENGTH(isUp rightInMixedVerticalArray),
305 CharacterProperties::isUprightInMixedVertical);
306
307 // Create a Trie from the value array.
308 UErrorCode error = U_ZERO_ERROR;
309 UTrie2* trie = utrie2_open(0, 0, &error);
310 UChar32 start = 0;
311 CharacterProperties value = values[0];
312 for (UChar32 c = 1; ; c++) {
313 if (c < size && values[c] == value)
314 continue;
315 if (static_cast<uint32_t>(value))
316 utrie2_setRange32(trie, start, c - 1, static_cast<uint32_t>(value), TRUE, &error);
317 if (c >= size)
318 break;
319 start = c;
320 value = values[c];
321 }
322 utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error);
323 return trie;
324 }
325
326 static bool hasCharacterProperty(UChar32 c, CharacterProperties property)
327 {
328 static UTrie2* trie = nullptr;
329 if (!trie)
330 trie = createTrie();
331 return UTRIE2_GET16(trie, c) & static_cast<CharacterPropertiesType>(property );
332 }
333
64 // Takes a flattened list of closed intervals 334 // Takes a flattened list of closed intervals
65 template <class T, size_t size> 335 template <class T, size_t size>
66 bool valueInIntervalList(const T (&intervalList)[size], const T& value) 336 bool valueInIntervalList(const T (&intervalList)[size], const T& value)
67 { 337 {
68 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue); 338 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue);
69 if ((bound - intervalList) % 2 == 1) 339 if ((bound - intervalList) % 2 == 1)
70 return true; 340 return true;
71 return bound > intervalList && *(bound - 1) == value; 341 return bound > intervalList && *(bound - 1) == value;
72 } 342 }
73 343
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 // Search for other Complex cases 440 // Search for other Complex cases
171 if (valueInIntervalList(complexCodePathRanges, c)) 441 if (valueInIntervalList(complexCodePathRanges, c))
172 return ComplexPath; 442 return ComplexPath;
173 } 443 }
174 444
175 return result; 445 return result;
176 } 446 }
177 447
178 bool Character::isUprightInMixedVertical(UChar32 character) 448 bool Character::isUprightInMixedVertical(UChar32 character)
179 { 449 {
180 // Fast path for common non-CJK 450 return hasCharacterProperty(character, CharacterProperties::isUprightInMixed Vertical);
181 if (character < 0x000A7)
182 return false;
183
184 // Fast path for common CJK
185 if (isInRange(character, 0x02E80, 0x0A4CF))
186 return true;
187
188 if (isInRange(character, 0x0FF01, 0x0FFE7)) {
189 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B)
190 || isInRange(character, 0x0FF1F, 0x0FF60) || character >= 0x0FFE0)
191 return true;
192 return false;
193 }
194
195 // Fast path for medium-common non-CJK
196 if (character == 0x000A7 || character == 0x000A9 || character == 0x000AE)
197 return true;
198 if (character == 0x000B1 || character == 0x000BC || character == 0x000BD || character == 0x000BE)
199 return true;
200 if (character == 0x000D7 || character == 0x000F7)
201 return true;
202 if (character < 0x002EA)
203 return false;
204
205 static const UChar32 uprightRanges[] = {
206 // Spacing Modifier Letters (Part of)
207 0x002EA, 0x002EB,
208 // Hangul Jamo
209 0x01100, 0x011FF,
210 // Unified Canadian Aboriginal Syllabics
211 0x01401, 0x0167F,
212 // Unified Canadian Aboriginal Syllabics Extended
213 0x018B0, 0x018FF,
214 // General Punctuation (Part of)
215 0x02016, 0x02016,
216 0x02020, 0x02021,
217 0x02030, 0x02031,
218 0x0203B, 0x0203C,
219 0x02042, 0x02042,
220 0x02047, 0x02049,
221 0x02051, 0x02051,
222 0x02065, 0x02069,
223 // Combining Diacritical Marks for Symbols (Part of)
224 0x020DD, 0x020E0,
225 0x020E2, 0x020E4,
226 // Letterlike Symbols (Part of)/Number Forms
227 0x02100, 0x02101,
228 0x02103, 0x02109,
229 0x0210F, 0x0210F,
230 0x02113, 0x02114,
231 0x02116, 0x02117,
232 0x0211E, 0x02123,
233 0x02125, 0x02125,
234 0x02127, 0x02127,
235 0x02129, 0x02129,
236 0x0212E, 0x0212E,
237 0x02135, 0x0213F,
238 0x02145, 0x0214A,
239 0x0214C, 0x0214D,
240 0x0214F, 0x0218F,
241 // Mathematical Operators (Part of)
242 0x0221E, 0x0221E,
243 0x02234, 0x02235,
244 // Miscellaneous Technical (Part of)
245 0x02300, 0x02307,
246 0x0230C, 0x0231F,
247 0x02324, 0x0232B,
248 0x0237D, 0x0239A,
249 0x023BE, 0x023CD,
250 0x023CF, 0x023CF,
251 0x023D1, 0x023DB,
252 0x023E2, 0x02422,
253 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alp hanumerics
254 0x02424, 0x024FF,
255 // Geometric Shapes/Miscellaneous Symbols (Part of)
256 0x025A0, 0x02619,
257 0x02620, 0x02767,
258 0x02776, 0x02793,
259 // Miscellaneous Symbols and Arrows (Part of)
260 0x02B12, 0x02B2F,
261 0x02B50, 0x02B59,
262 0x02BB8, 0x02BFF,
263 // Hangul Jamo Extended-A
264 0x0A960, 0x0A97F,
265 // Hangul Syllables/Hangul Jamo Extended-B
266 0x0AC00, 0x0D7FF,
267 // Private Use Area/CJK Compatibility Ideographs
268 0x0E000, 0x0FAFF,
269 // Vertical Forms
270 0x0FE10, 0x0FE1F,
271 // CJK Compatibility Forms (Part of)
272 0x0FE30, 0x0FE48,
273 // Small Form Variants (Part of)
274 0x0FE50, 0x0FE57,
275 0x0FE59, 0x0FE62,
276 0x0FE67, 0x0FE6F,
277 // Specials (Part of)
278 0x0FFF0, 0x0FFF8,
279 0x0FFFC, 0x0FFFD,
280 // Meroitic Hieroglyphs
281 0x10980, 0x1099F,
282 // Siddham
283 0x11580, 0x115FF,
284 // Egyptian Hieroglyphs
285 0x13000, 0x1342F,
286 // Kana Supplement
287 0x1B000, 0x1B0FF,
288 // Byzantine Musical Symbols/Musical Symbols
289 0x1D000, 0x1D1FF,
290 // Tai Xuan Jing Symbols/Counting Rod Numerals
291 0x1D300, 0x1D37F,
292 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supple ment
293 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement
294 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Sy mbols
295 // Alchemical Symbols
296 0x1F000, 0x1F7FF,
297 // CJK Unified Ideographs Extension B/C/D
298 // CJK Compatibility Ideographs Supplement
299 0x20000, 0x2FFFD,
300 0x30000, 0x3FFFD,
301 // Supplementary Private Use Area-A
302 0xF0000, 0xFFFFD,
303 // Supplementary Private Use Area-B
304 0x100000, 0x10FFFD,
305 };
306 return valueInIntervalList(uprightRanges, character);
307 }
308
309 bool Character::isCJKIdeograph(UChar32 c)
310 {
311 static const UChar32 cjkIdeographRanges[] = {
312 // CJK Radicals Supplement and Kangxi Radicals.
313 0x2E80, 0x2FDF,
314 // CJK Strokes.
315 0x31C0, 0x31EF,
316 // CJK Unified Ideographs Extension A.
317 0x3400, 0x4DBF,
318 // The basic CJK Unified Ideographs block.
319 0x4E00, 0x9FFF,
320 // CJK Compatibility Ideographs.
321 0xF900, 0xFAFF,
322 // CJK Unified Ideographs Extension B.
323 0x20000, 0x2A6DF,
324 // CJK Unified Ideographs Extension C.
325 // CJK Unified Ideographs Extension D.
326 0x2A700, 0x2B81F,
327 // CJK Compatibility Ideographs Supplement.
328 0x2F800, 0x2FA1F
329 };
330 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges) ;
331
332 // Early out
333 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo unt - 1])
334 return false;
335
336 return valueInIntervalList(cjkIdeographRanges, c);
337 } 451 }
338 452
339 bool Character::isCJKIdeographOrSymbol(UChar32 c) 453 bool Character::isCJKIdeographOrSymbol(UChar32 c)
340 { 454 {
341 // Likely common case 455 // Likely common case
342 if (c < 0x2C7) 456 if (c < 0x2C7)
343 return false; 457 return false;
344 458
345 if (isCJKIdeograph(c)) 459 return hasCharacterProperty(c, CharacterProperties::isCJKIdeographOrSymbol);
346 return true;
347
348 static const UChar32 cjkSymbolRanges[] = {
349 0x2156, 0x215A,
350 0x2160, 0x216B,
351 0x2170, 0x217B,
352 0x23BE, 0x23CC,
353 0x2460, 0x2492,
354 0x249C, 0x24FF,
355 0x25CE, 0x25D3,
356 0x25E2, 0x25E6,
357 0x2600, 0x2603,
358 0x2660, 0x266F,
359 0x2672, 0x267D,
360 0x2776, 0x277F,
361 // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
362 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0 x3100 .. 0x312F
363 0x2FF0, 0x302F,
364 0x3031, 0x312F,
365 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
366 0x3190, 0x31BF,
367 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
368 // CJK Compatibility (0x3300 .. 0x33FF).
369 0x3200, 0x33FF,
370 0xF860, 0xF862,
371 // CJK Compatibility Forms.
372 0xFE30, 0xFE4F,
373 // Halfwidth and Fullwidth Forms
374 // Usually only used in CJK
375 0xFF00, 0xFF0C,
376 0xFF0E, 0xFF1A,
377 0xFF1F, 0xFFEF,
378 // Emoji.
379 0x1F110, 0x1F129,
380 0x1F130, 0x1F149,
381 0x1F150, 0x1F169,
382 0x1F170, 0x1F189,
383 0x1F200, 0x1F6FF
384 };
385
386 if (c >= cjkSymbolRanges[0]
387 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1]
388 && valueInIntervalList(cjkSymbolRanges, c)) {
389 return true;
390 }
391
392 if (c < 0x2020 && c > 0x2D9)
393 return false;
394
395 // Hash lookup for isolated symbols (those not part of a contiguous range)
396 static HashSet<UChar32>* cjkIsolatedSymbols = 0;
397 if (!cjkIsolatedSymbols) {
398 cjkIsolatedSymbols = new HashSet<UChar32>();
399 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i)
400 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
401 }
402 return cjkIsolatedSymbols->contains(c);
403 } 460 }
404 461
405 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify) 462 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)
406 { 463 {
407 unsigned count = 0; 464 unsigned count = 0;
408 if (textJustify == TextJustifyDistribute) { 465 if (textJustify == TextJustifyDistribute) {
409 isAfterExpansion = true; 466 isAfterExpansion = true;
410 return length; 467 return length;
411 } 468 }
412 469
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
519 } 576 }
520 577
521 bool Character::isCommonOrInheritedScript(UChar32 character) 578 bool Character::isCommonOrInheritedScript(UChar32 character)
522 { 579 {
523 UErrorCode status = U_ZERO_ERROR; 580 UErrorCode status = U_ZERO_ERROR;
524 UScriptCode script = uscript_getScript(character, &status); 581 UScriptCode script = uscript_getScript(character, &status);
525 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED); 582 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED);
526 } 583 }
527 584
528 } // namespace blink 585 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/fonts/Character.h ('k') | third_party/WebKit/Source/platform/fonts/FontTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698