OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2014 Google Inc. All rights reserved. | 2 * Copyright (C) 2014 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 16 matching lines...) Expand all Loading... |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "platform/fonts/Character.h" | 31 #include "platform/fonts/Character.h" |
32 | 32 |
33 #include "wtf/StdLibExtras.h" | 33 #include "wtf/StdLibExtras.h" |
34 #include "wtf/text/StringBuilder.h" | 34 #include "wtf/text/StringBuilder.h" |
35 #include <algorithm> | 35 #include <algorithm> |
36 #include <unicode/uscript.h> | 36 #include <unicode/uscript.h> |
| 37 #define USE_TRIE |
| 38 #if defined(USE_TRIE) |
| 39 #include <unicode/uobject.h> |
| 40 #define MUTEX_H // Required to compile on Windows |
| 41 #include <utrie2.h> |
| 42 #endif |
37 | 43 |
38 using namespace WTF; | 44 using namespace WTF; |
39 using namespace Unicode; | 45 using namespace Unicode; |
40 | 46 |
41 namespace blink { | 47 namespace blink { |
42 | 48 |
43 static const UChar32 cjkIsolatedSymbolsArray[] = { | 49 static const UChar32 cjkIsolatedSymbolsArray[] = { |
44 // 0x2C7 Caron, Mandarin Chinese 3rd Tone | 50 // 0x2C7 Caron, Mandarin Chinese 3rd Tone |
45 0x2C7, | 51 0x2C7, |
46 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone | 52 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone |
47 0x2CA, | 53 0x2CA, |
48 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone | 54 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone |
49 0x2CB, | 55 0x2CB, |
50 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone | 56 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone |
51 0x2D9, | 57 0x2D9, |
52 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20
51, | 58 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20
51, |
53 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21
21, | 59 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21
21, |
54 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23
CE, | 60 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23
CE, |
55 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25
B6, | 61 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25
B6, |
56 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25
CC, | 62 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25
CC, |
57 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26
BD, | 63 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26
BD, |
58 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE
12, | 64 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE
12, |
59 0xFE19, 0xFF1D, | 65 0xFE19, 0xFF1D, |
60 // Emoji. | 66 // Emoji. |
61 0x1F100 | 67 0x1F100 |
62 }; | 68 }; |
63 | 69 |
| 70 static const UChar32 cjkIdeographRanges[] = { |
| 71 // CJK Radicals Supplement and Kangxi Radicals. |
| 72 0x2E80, 0x2FDF, |
| 73 // CJK Strokes. |
| 74 0x31C0, 0x31EF, |
| 75 // CJK Unified Ideographs Extension A. |
| 76 0x3400, 0x4DBF, |
| 77 // The basic CJK Unified Ideographs block. |
| 78 0x4E00, 0x9FFF, |
| 79 // CJK Compatibility Ideographs. |
| 80 0xF900, 0xFAFF, |
| 81 // CJK Unified Ideographs Extension B. |
| 82 0x20000, 0x2A6DF, |
| 83 // CJK Unified Ideographs Extension C. |
| 84 // CJK Unified Ideographs Extension D. |
| 85 0x2A700, 0x2B81F, |
| 86 // CJK Compatibility Ideographs Supplement. |
| 87 0x2F800, 0x2FA1F |
| 88 }; |
| 89 |
| 90 static const UChar32 cjkSymbolRanges[] = { |
| 91 0x2156, 0x215A, |
| 92 0x2160, 0x216B, |
| 93 0x2170, 0x217B, |
| 94 0x23BE, 0x23CC, |
| 95 0x2460, 0x2492, |
| 96 0x249C, 0x24FF, |
| 97 0x25CE, 0x25D3, |
| 98 0x25E2, 0x25E6, |
| 99 0x2600, 0x2603, |
| 100 0x2660, 0x266F, |
| 101 0x2672, 0x267D, |
| 102 0x2776, 0x277F, |
| 103 // Ideographic Description Characters, with CJK Symbols and Punctuation, exc
luding 0x3030. |
| 104 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x310
0 .. 0x312F |
| 105 0x2FF0, 0x302F, |
| 106 0x3031, 0x312F, |
| 107 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF |
| 108 0x3190, 0x31BF, |
| 109 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). |
| 110 // CJK Compatibility (0x3300 .. 0x33FF). |
| 111 0x3200, 0x33FF, |
| 112 0xF860, 0xF862, |
| 113 // CJK Compatibility Forms. |
| 114 0xFE30, 0xFE4F, |
| 115 // Halfwidth and Fullwidth Forms |
| 116 // Usually only used in CJK |
| 117 0xFF00, 0xFF0C, |
| 118 0xFF0E, 0xFF1A, |
| 119 0xFF1F, 0xFFEF, |
| 120 // Emoji. |
| 121 0x1F110, 0x1F129, |
| 122 0x1F130, 0x1F149, |
| 123 0x1F150, 0x1F169, |
| 124 0x1F170, 0x1F189, |
| 125 0x1F200, 0x1F6FF |
| 126 }; |
| 127 |
| 128 #if defined(USE_TRIE) |
| 129 static const UChar32 isUprightInMixedVerticalArray[] = { |
| 130 0x000A7, |
| 131 0x000A9, |
| 132 0x000AE, |
| 133 0x000B1, |
| 134 0x000D7, |
| 135 0x000F7 |
| 136 }; |
| 137 |
| 138 static const UChar32 isUprightInMixedVerticalRanges[] = { |
| 139 0x000BC, 0x000BE, |
| 140 // Spacing Modifier Letters (Part of) |
| 141 0x002EA, 0x002EB, |
| 142 // Hangul Jamo |
| 143 0x01100, 0x011FF, |
| 144 // Unified Canadian Aboriginal Syllabics |
| 145 0x01401, 0x0167F, |
| 146 // Unified Canadian Aboriginal Syllabics Extended |
| 147 0x018B0, 0x018FF, |
| 148 // General Punctuation (Part of) |
| 149 0x02016, 0x02016, |
| 150 0x02020, 0x02021, |
| 151 0x02030, 0x02031, |
| 152 0x0203B, 0x0203C, |
| 153 0x02042, 0x02042, |
| 154 0x02047, 0x02049, |
| 155 0x02051, 0x02051, |
| 156 0x02065, 0x02069, |
| 157 // Combining Diacritical Marks for Symbols (Part of) |
| 158 0x020DD, 0x020E0, |
| 159 0x020E2, 0x020E4, |
| 160 // Letterlike Symbols (Part of)/Number Forms |
| 161 0x02100, 0x02101, |
| 162 0x02103, 0x02109, |
| 163 0x0210F, 0x0210F, |
| 164 0x02113, 0x02114, |
| 165 0x02116, 0x02117, |
| 166 0x0211E, 0x02123, |
| 167 0x02125, 0x02125, |
| 168 0x02127, 0x02127, |
| 169 0x02129, 0x02129, |
| 170 0x0212E, 0x0212E, |
| 171 0x02135, 0x0213F, |
| 172 0x02145, 0x0214A, |
| 173 0x0214C, 0x0214D, |
| 174 0x0214F, 0x0218F, |
| 175 // Mathematical Operators (Part of) |
| 176 0x0221E, 0x0221E, |
| 177 0x02234, 0x02235, |
| 178 // Miscellaneous Technical (Part of) |
| 179 0x02300, 0x02307, |
| 180 0x0230C, 0x0231F, |
| 181 0x02324, 0x0232B, |
| 182 0x0237D, 0x0239A, |
| 183 0x023BE, 0x023CD, |
| 184 0x023CF, 0x023CF, |
| 185 0x023D1, 0x023DB, |
| 186 0x023E2, 0x02422, |
| 187 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alphanu
merics |
| 188 0x02424, 0x024FF, |
| 189 // Geometric Shapes/Miscellaneous Symbols (Part of) |
| 190 0x025A0, 0x02619, |
| 191 0x02620, 0x02767, |
| 192 0x02776, 0x02793, |
| 193 // Miscellaneous Symbols and Arrows (Part of) |
| 194 0x02B12, 0x02B2F, |
| 195 0x02B50, 0x02B59, |
| 196 0x02BB8, 0x02BFF, |
| 197 // Common CJK |
| 198 0x02E80, 0x0A4CF, |
| 199 // Hangul Jamo Extended-A |
| 200 0x0A960, 0x0A97F, |
| 201 // Hangul Syllables/Hangul Jamo Extended-B |
| 202 0x0AC00, 0x0D7FF, |
| 203 // Private Use Area/CJK Compatibility Ideographs |
| 204 0x0E000, 0x0FAFF, |
| 205 // Vertical Forms |
| 206 0x0FE10, 0x0FE1F, |
| 207 // CJK Compatibility Forms (Part of) |
| 208 0x0FE30, 0x0FE48, |
| 209 // Small Form Variants (Part of) |
| 210 0x0FE50, 0x0FE57, |
| 211 0x0FE59, 0x0FE62, |
| 212 0x0FE67, 0x0FE6F, |
| 213 // Halfwidth and Fullwidth Forms |
| 214 0x0FF01, 0x0FF0C, |
| 215 0x0FF0E, 0x0FF1B, |
| 216 0x0FF1F, 0x0FF60, |
| 217 0x0FFE0, 0x0FFE7, |
| 218 // Specials (Part of) |
| 219 0x0FFF0, 0x0FFF8, |
| 220 0x0FFFC, 0x0FFFD, |
| 221 // Meroitic Hieroglyphs |
| 222 0x10980, 0x1099F, |
| 223 // Siddham |
| 224 0x11580, 0x115FF, |
| 225 // Egyptian Hieroglyphs |
| 226 0x13000, 0x1342F, |
| 227 // Kana Supplement |
| 228 0x1B000, 0x1B0FF, |
| 229 // Byzantine Musical Symbols/Musical Symbols |
| 230 0x1D000, 0x1D1FF, |
| 231 // Tai Xuan Jing Symbols/Counting Rod Numerals |
| 232 0x1D300, 0x1D37F, |
| 233 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supplement |
| 234 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement |
| 235 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Symbol
s |
| 236 // Alchemical Symbols |
| 237 0x1F000, 0x1F7FF, |
| 238 // CJK Unified Ideographs Extension B/C/D |
| 239 // CJK Compatibility Ideographs Supplement |
| 240 0x20000, 0x2FFFD, |
| 241 0x30000, 0x3FFFD, |
| 242 // Supplementary Private Use Area-A |
| 243 0xF0000, 0xFFFFD, |
| 244 // Supplementary Private Use Area-B |
| 245 0x100000, 0x10FFFD, |
| 246 }; |
| 247 |
| 248 using CharacterPropertiesType = uint8_t; |
| 249 |
| 250 enum class CharacterProperties : CharacterPropertiesType { |
| 251 isCJKIdeograph = 0x0001, |
| 252 isCJKIdeographOrSymbol = 0x0002, |
| 253 isUprightInMixedVertical = 0x0004, |
| 254 }; |
| 255 |
| 256 inline CharacterProperties operator | (CharacterProperties a, CharacterPropertie
s b) |
| 257 { |
| 258 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType
>(a) | static_cast<CharacterPropertiesType>(b))); |
| 259 } |
| 260 |
| 261 inline CharacterProperties operator & (CharacterProperties a, CharacterPropertie
s b) |
| 262 { |
| 263 return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType
>(a) & static_cast<CharacterPropertiesType>(b))); |
| 264 } |
| 265 |
| 266 inline CharacterProperties operator |= (CharacterProperties& a, CharacterPropert
ies b) |
| 267 { |
| 268 a = a | b; |
| 269 return a; |
| 270 } |
| 271 |
| 272 const UChar32 maxCodePointForPropertyValues = 0x1FFFFF; |
| 273 |
| 274 static void setRanges(CharacterProperties* values, const UChar32* ranges, size_t
length, CharacterProperties value) |
| 275 { |
| 276 const UChar32* end = ranges + length; |
| 277 for (; ranges != end; ranges += 2) { |
| 278 ASSERT(ranges[1] <= maxCodePointForPropertyValues); |
| 279 for (UChar32 c = ranges[0]; c <= ranges[1]; c++) |
| 280 values[c] |= value; |
| 281 } |
| 282 } |
| 283 |
| 284 static void setValues(CharacterProperties* values, const UChar32* begin, size_t
length, CharacterProperties value) |
| 285 { |
| 286 const UChar32* end = begin + length; |
| 287 for (; begin != end; begin++) { |
| 288 ASSERT(*begin <= maxCodePointForPropertyValues); |
| 289 values[*begin] |= value; |
| 290 } |
| 291 } |
| 292 |
| 293 static UTrie2* createTrie() |
| 294 { |
| 295 // Create a value array of all possible code points. |
| 296 const UChar32 size = maxCodePointForPropertyValues + 1; |
| 297 OwnPtr<CharacterProperties[]> values = adoptArrayPtr(new CharacterProperties
[size]); |
| 298 memset(values.get(), 0, sizeof(CharacterProperties) * size); |
| 299 setRanges(values.get(), cjkIdeographRanges, WTF_ARRAY_LENGTH(cjkIdeographRan
ges), |
| 300 CharacterProperties::isCJKIdeograph | CharacterProperties::isCJKIdeograp
hOrSymbol); |
| 301 setRanges(values.get(), cjkSymbolRanges, WTF_ARRAY_LENGTH(cjkSymbolRanges), |
| 302 CharacterProperties::isCJKIdeographOrSymbol); |
| 303 setValues(values.get(), cjkIsolatedSymbolsArray, WTF_ARRAY_LENGTH(cjkIsolate
dSymbolsArray), |
| 304 CharacterProperties::isCJKIdeographOrSymbol); |
| 305 setRanges(values.get(), isUprightInMixedVerticalRanges, WTF_ARRAY_LENGTH(isU
prightInMixedVerticalRanges), |
| 306 CharacterProperties::isUprightInMixedVertical); |
| 307 setValues(values.get(), isUprightInMixedVerticalArray, WTF_ARRAY_LENGTH(isUp
rightInMixedVerticalArray), |
| 308 CharacterProperties::isUprightInMixedVertical); |
| 309 |
| 310 // Create a Trie from the value array. |
| 311 UErrorCode error = U_ZERO_ERROR; |
| 312 UTrie2* trie = utrie2_open(0, 0, &error); |
| 313 UChar32 start = 0; |
| 314 CharacterProperties value = values[0]; |
| 315 for (UChar32 c = 1; ; c++) { |
| 316 if (c < size && values[c] == value) |
| 317 continue; |
| 318 if (static_cast<uint32_t>(value)) |
| 319 utrie2_setRange32(trie, start, c - 1, static_cast<uint32_t>(value),
TRUE, &error); |
| 320 if (c >= size) |
| 321 break; |
| 322 start = c; |
| 323 value = values[c]; |
| 324 } |
| 325 utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error); |
| 326 // uint32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error); |
| 327 // fprintf(stderr, "size=%u\n", serializedSize); |
| 328 return trie; |
| 329 } |
| 330 |
| 331 static bool hasCharacterProperty(UChar32 c, CharacterProperties property) |
| 332 { |
| 333 static UTrie2* trie = nullptr; |
| 334 if (!trie) |
| 335 trie = createTrie(); |
| 336 uint16_t value = UTRIE2_GET16(trie, c); |
| 337 return value & static_cast<CharacterPropertiesType>(property); |
| 338 } |
| 339 #endif |
| 340 |
64 // Takes a flattened list of closed intervals | 341 // Takes a flattened list of closed intervals |
65 template <class T, size_t size> | 342 template <class T, size_t size> |
66 bool valueInIntervalList(const T (&intervalList)[size], const T& value) | 343 bool valueInIntervalList(const T (&intervalList)[size], const T& value) |
67 { | 344 { |
68 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val
ue); | 345 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val
ue); |
69 if ((bound - intervalList) % 2 == 1) | 346 if ((bound - intervalList) % 2 == 1) |
70 return true; | 347 return true; |
71 return bound > intervalList && *(bound - 1) == value; | 348 return bound > intervalList && *(bound - 1) == value; |
72 } | 349 } |
73 | 350 |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 // Search for other Complex cases | 447 // Search for other Complex cases |
171 if (valueInIntervalList(complexCodePathRanges, c)) | 448 if (valueInIntervalList(complexCodePathRanges, c)) |
172 return ComplexPath; | 449 return ComplexPath; |
173 } | 450 } |
174 | 451 |
175 return result; | 452 return result; |
176 } | 453 } |
177 | 454 |
178 bool Character::isUprightInMixedVertical(UChar32 character) | 455 bool Character::isUprightInMixedVertical(UChar32 character) |
179 { | 456 { |
| 457 #if defined(USE_TRIE) |
| 458 return hasCharacterProperty(character, CharacterProperties::isUprightInMixed
Vertical); |
| 459 #else |
180 // Fast path for common non-CJK | 460 // Fast path for common non-CJK |
181 if (character < 0x000A7) | 461 if (character < 0x000A7) |
182 return false; | 462 return false; |
183 | 463 |
184 // Fast path for common CJK | 464 // Fast path for common CJK |
185 if (isInRange(character, 0x02E80, 0x0A4CF)) | 465 if (isInRange(character, 0x02E80, 0x0A4CF)) |
186 return true; | 466 return true; |
187 | 467 |
188 if (isInRange(character, 0x0FF01, 0x0FFE7)) { | 468 if (isInRange(character, 0x0FF01, 0x0FFE7)) { |
189 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B) | 469 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B) |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 // CJK Unified Ideographs Extension B/C/D | 577 // CJK Unified Ideographs Extension B/C/D |
298 // CJK Compatibility Ideographs Supplement | 578 // CJK Compatibility Ideographs Supplement |
299 0x20000, 0x2FFFD, | 579 0x20000, 0x2FFFD, |
300 0x30000, 0x3FFFD, | 580 0x30000, 0x3FFFD, |
301 // Supplementary Private Use Area-A | 581 // Supplementary Private Use Area-A |
302 0xF0000, 0xFFFFD, | 582 0xF0000, 0xFFFFD, |
303 // Supplementary Private Use Area-B | 583 // Supplementary Private Use Area-B |
304 0x100000, 0x10FFFD, | 584 0x100000, 0x10FFFD, |
305 }; | 585 }; |
306 return valueInIntervalList(uprightRanges, character); | 586 return valueInIntervalList(uprightRanges, character); |
| 587 #endif |
307 } | 588 } |
308 | 589 |
309 bool Character::isCJKIdeograph(UChar32 c) | 590 bool Character::isCJKIdeograph(UChar32 c) |
310 { | 591 { |
311 static const UChar32 cjkIdeographRanges[] = { | 592 #if defined(USE_TRIE) |
312 // CJK Radicals Supplement and Kangxi Radicals. | 593 return hasCharacterProperty(c, CharacterProperties::isCJKIdeograph); |
313 0x2E80, 0x2FDF, | 594 #else |
314 // CJK Strokes. | |
315 0x31C0, 0x31EF, | |
316 // CJK Unified Ideographs Extension A. | |
317 0x3400, 0x4DBF, | |
318 // The basic CJK Unified Ideographs block. | |
319 0x4E00, 0x9FFF, | |
320 // CJK Compatibility Ideographs. | |
321 0xF900, 0xFAFF, | |
322 // CJK Unified Ideographs Extension B. | |
323 0x20000, 0x2A6DF, | |
324 // CJK Unified Ideographs Extension C. | |
325 // CJK Unified Ideographs Extension D. | |
326 0x2A700, 0x2B81F, | |
327 // CJK Compatibility Ideographs Supplement. | |
328 0x2F800, 0x2FA1F | |
329 }; | |
330 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges)
; | 595 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges)
; |
331 | 596 |
332 // Early out | 597 // Early out |
333 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo
unt - 1]) | 598 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo
unt - 1]) |
334 return false; | 599 return false; |
335 | 600 |
336 return valueInIntervalList(cjkIdeographRanges, c); | 601 return valueInIntervalList(cjkIdeographRanges, c); |
| 602 #endif |
337 } | 603 } |
338 | 604 |
339 bool Character::isCJKIdeographOrSymbol(UChar32 c) | 605 bool Character::isCJKIdeographOrSymbol(UChar32 c) |
340 { | 606 { |
341 // Likely common case | 607 // Likely common case |
342 if (c < 0x2C7) | 608 if (c < 0x2C7) |
343 return false; | 609 return false; |
344 | 610 |
| 611 #if defined(USE_TRIE) |
| 612 return hasCharacterProperty(c, CharacterProperties::isCJKIdeographOrSymbol); |
| 613 #else |
345 if (isCJKIdeograph(c)) | 614 if (isCJKIdeograph(c)) |
346 return true; | 615 return true; |
347 | 616 |
348 static const UChar32 cjkSymbolRanges[] = { | |
349 0x2156, 0x215A, | |
350 0x2160, 0x216B, | |
351 0x2170, 0x217B, | |
352 0x23BE, 0x23CC, | |
353 0x2460, 0x2492, | |
354 0x249C, 0x24FF, | |
355 0x25CE, 0x25D3, | |
356 0x25E2, 0x25E6, | |
357 0x2600, 0x2603, | |
358 0x2660, 0x266F, | |
359 0x2672, 0x267D, | |
360 0x2776, 0x277F, | |
361 // Ideographic Description Characters, with CJK Symbols and Punctuation,
excluding 0x3030. | |
362 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0
x3100 .. 0x312F | |
363 0x2FF0, 0x302F, | |
364 0x3031, 0x312F, | |
365 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF | |
366 0x3190, 0x31BF, | |
367 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). | |
368 // CJK Compatibility (0x3300 .. 0x33FF). | |
369 0x3200, 0x33FF, | |
370 0xF860, 0xF862, | |
371 // CJK Compatibility Forms. | |
372 0xFE30, 0xFE4F, | |
373 // Halfwidth and Fullwidth Forms | |
374 // Usually only used in CJK | |
375 0xFF00, 0xFF0C, | |
376 0xFF0E, 0xFF1A, | |
377 0xFF1F, 0xFFEF, | |
378 // Emoji. | |
379 0x1F110, 0x1F129, | |
380 0x1F130, 0x1F149, | |
381 0x1F150, 0x1F169, | |
382 0x1F170, 0x1F189, | |
383 0x1F200, 0x1F6FF | |
384 }; | |
385 | 617 |
386 if (c >= cjkSymbolRanges[0] | 618 if (c >= cjkSymbolRanges[0] |
387 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1] | 619 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1] |
388 && valueInIntervalList(cjkSymbolRanges, c)) { | 620 && valueInIntervalList(cjkSymbolRanges, c)) { |
389 return true; | 621 return true; |
390 } | 622 } |
391 | 623 |
392 if (c < 0x2020 && c > 0x2D9) | 624 if (c < 0x2020 && c > 0x2D9) |
393 return false; | 625 return false; |
394 | 626 |
395 // Hash lookup for isolated symbols (those not part of a contiguous range) | 627 // Hash lookup for isolated symbols (those not part of a contiguous range) |
396 static HashSet<UChar32>* cjkIsolatedSymbols = 0; | 628 static HashSet<UChar32>* cjkIsolatedSymbols = 0; |
397 if (!cjkIsolatedSymbols) { | 629 if (!cjkIsolatedSymbols) { |
398 cjkIsolatedSymbols = new HashSet<UChar32>(); | 630 cjkIsolatedSymbols = new HashSet<UChar32>(); |
399 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i) | 631 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i) |
400 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]); | 632 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]); |
401 } | 633 } |
402 return cjkIsolatedSymbols->contains(c); | 634 return cjkIsolatedSymbols->contains(c); |
| 635 #endif |
403 } | 636 } |
404 | 637 |
405 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) | 638 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) |
406 { | 639 { |
407 unsigned count = 0; | 640 unsigned count = 0; |
408 if (textJustify == TextJustifyDistribute) { | 641 if (textJustify == TextJustifyDistribute) { |
409 isAfterExpansion = true; | 642 isAfterExpansion = true; |
410 return length; | 643 return length; |
411 } | 644 } |
412 | 645 |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
519 } | 752 } |
520 | 753 |
521 bool Character::isCommonOrInheritedScript(UChar32 character) | 754 bool Character::isCommonOrInheritedScript(UChar32 character) |
522 { | 755 { |
523 UErrorCode status = U_ZERO_ERROR; | 756 UErrorCode status = U_ZERO_ERROR; |
524 UScriptCode script = uscript_getScript(character, &status); | 757 UScriptCode script = uscript_getScript(character, &status); |
525 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I
NHERITED); | 758 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I
NHERITED); |
526 } | 759 } |
527 | 760 |
528 } // namespace blink | 761 } // namespace blink |
OLD | NEW |