| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2014 Google Inc. All rights reserved. | |
| 3 * | |
| 4 * Redistribution and use in source and binary forms, with or without | |
| 5 * modification, are permitted provided that the following conditions are | |
| 6 * met: | |
| 7 * | |
| 8 * * Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * * Redistributions in binary form must reproduce the above | |
| 11 * copyright notice, this list of conditions and the following disclaimer | |
| 12 * in the documentation and/or other materials provided with the | |
| 13 * distribution. | |
| 14 * * Neither the name of Google Inc. nor the names of its | |
| 15 * contributors may be used to endorse or promote products derived from | |
| 16 * this software without specific prior written permission. | |
| 17 * | |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 #include "platform/fonts/Character.h" | |
| 32 | |
| 33 #include "wtf/StdLibExtras.h" | |
| 34 #include "wtf/text/StringBuilder.h" | |
| 35 #include <algorithm> | |
| 36 #include <unicode/uobject.h> | |
| 37 #include <unicode/uscript.h> | |
| 38 | |
| 39 #if defined(USING_SYSTEM_ICU) | |
| 40 #include "platform/fonts/CharacterPropertyDataGenerator.h" | |
| 41 #include <unicode/uniset.h> | |
| 42 #else | |
| 43 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows | |
| 44 #include <utrie2.h> | |
| 45 #endif | |
| 46 | |
| 47 using namespace WTF; | |
| 48 using namespace Unicode; | |
| 49 | |
| 50 namespace blink { | |
| 51 | |
| 52 #if defined(USING_SYSTEM_ICU) | |
| 53 static icu::UnicodeSet* createUnicodeSet( | |
| 54 const UChar32* characters, size_t charactersCount, | |
| 55 const UChar32* ranges, size_t rangesCount) | |
| 56 { | |
| 57 icu::UnicodeSet* unicodeSet = new icu::UnicodeSet(); | |
| 58 for (size_t i = 0; i < charactersCount; i++) | |
| 59 unicodeSet->add(characters[i]); | |
| 60 for (size_t i = 0; i < rangesCount; i += 2) | |
| 61 unicodeSet->add(ranges[i], ranges[i + 1]); | |
| 62 unicodeSet->freeze(); | |
| 63 return unicodeSet; | |
| 64 } | |
| 65 | |
| 66 #define CREATE_UNICODE_SET(name) \ | |
| 67 createUnicodeSet( \ | |
| 68 name##Array, WTF_ARRAY_LENGTH(name##Array), \ | |
| 69 name##Ranges, WTF_ARRAY_LENGTH(name##Ranges)) | |
| 70 | |
| 71 #define RETURN_HAS_PROPERTY(c, name) \ | |
| 72 static icu::UnicodeSet* unicodeSet = nullptr; \ | |
| 73 if (!unicodeSet) \ | |
| 74 unicodeSet = CREATE_UNICODE_SET(name); \ | |
| 75 return unicodeSet->contains(c); | |
| 76 #else | |
| 77 // Freezed trie tree, see CharacterDataGenerator.cpp. | |
| 78 extern int32_t serializedCharacterDataSize; | |
| 79 extern uint8_t serializedCharacterData[]; | |
| 80 | |
| 81 static UTrie2* createTrie() | |
| 82 { | |
| 83 // Create a Trie from the value array. | |
| 84 UErrorCode error = U_ZERO_ERROR; | |
| 85 UTrie2* trie = utrie2_openFromSerialized( | |
| 86 UTrie2ValueBits::UTRIE2_16_VALUE_BITS, | |
| 87 serializedCharacterData, serializedCharacterDataSize, | |
| 88 nullptr, &error); | |
| 89 ASSERT(error == U_ZERO_ERROR); | |
| 90 return trie; | |
| 91 } | |
| 92 | |
| 93 static bool hasProperty(UChar32 c, CharacterProperty property) | |
| 94 { | |
| 95 static UTrie2* trie = nullptr; | |
| 96 if (!trie) | |
| 97 trie = createTrie(); | |
| 98 return UTRIE2_GET16(trie, c) | |
| 99 & static_cast<CharacterPropertyType>(property); | |
| 100 } | |
| 101 | |
| 102 #define RETURN_HAS_PROPERTY(c, name) \ | |
| 103 return hasProperty(c, CharacterProperty::name); | |
| 104 #endif | |
| 105 | |
| 106 // Takes a flattened list of closed intervals | |
| 107 template <class T, size_t size> | |
| 108 bool valueInIntervalList(const T (&intervalList)[size], const T& value) | |
| 109 { | |
| 110 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val
ue); | |
| 111 if ((bound - intervalList) % 2 == 1) | |
| 112 return true; | |
| 113 return bound > intervalList && *(bound - 1) == value; | |
| 114 } | |
| 115 | |
| 116 CodePath Character::characterRangeCodePath(const UChar* characters, unsigned len
) | |
| 117 { | |
| 118 static const UChar complexCodePathRanges[] = { | |
| 119 // U+02E5 through U+02E9 (Modifier Letters : Tone letters) | |
| 120 0x2E5, 0x2E9, | |
| 121 // U+0300 through U+036F Combining diacritical marks | |
| 122 0x300, 0x36F, | |
| 123 // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ... | |
| 124 0x0591, 0x05BD, | |
| 125 // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha | |
| 126 0x05BF, 0x05CF, | |
| 127 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic
, | |
| 128 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannad
a, | |
| 129 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar | |
| 130 0x0600, 0x109F, | |
| 131 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left | |
| 132 // here if you precompose; Modern Korean will be precomposed as a result
of step A) | |
| 133 0x1100, 0x11FF, | |
| 134 // U+135D through U+135F Ethiopic combining marks | |
| 135 0x135D, 0x135F, | |
| 136 // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongol
ian | |
| 137 0x1700, 0x18AF, | |
| 138 // U+1900 through U+194F Limbu (Unicode 4.0) | |
| 139 0x1900, 0x194F, | |
| 140 // U+1980 through U+19DF New Tai Lue | |
| 141 0x1980, 0x19DF, | |
| 142 // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Ve
dic | |
| 143 0x1A00, 0x1CFF, | |
| 144 // U+1DC0 through U+1DFF Comining diacritical mark supplement | |
| 145 0x1DC0, 0x1DFF, | |
| 146 // U+20D0 through U+20FF Combining marks for symbols | |
| 147 0x20D0, 0x20FF, | |
| 148 // U+2CEF through U+2CF1 Combining marks for Coptic | |
| 149 0x2CEF, 0x2CF1, | |
| 150 // U+302A through U+302F Ideographic and Hangul Tone marks | |
| 151 0x302A, 0x302F, | |
| 152 // Combining Katakana-Hiragana Voiced/Semi-voiced Sound Mark | |
| 153 0x3099, 0x309A, | |
| 154 // U+A67C through U+A67D Combining marks for old Cyrillic | |
| 155 0xA67C, 0xA67D, | |
| 156 // U+A6F0 through U+A6F1 Combining mark for Bamum | |
| 157 0xA6F0, 0xA6F1, | |
| 158 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extende
d, | |
| 159 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Ma
yek | |
| 160 0xA800, 0xABFF, | |
| 161 // U+D7B0 through U+D7FF Hangul Jamo Ext. B | |
| 162 0xD7B0, 0xD7FF, | |
| 163 // U+FE00 through U+FE0F Unicode variation selectors | |
| 164 0xFE00, 0xFE0F, | |
| 165 // U+FE20 through U+FE2F Combining half marks | |
| 166 0xFE20, 0xFE2F | |
| 167 }; | |
| 168 | |
| 169 CodePath result = SimplePath; | |
| 170 for (unsigned i = 0; i < len; i++) { | |
| 171 const UChar c = characters[i]; | |
| 172 | |
| 173 // Shortcut for common case | |
| 174 if (c < 0x2E5) | |
| 175 continue; | |
| 176 | |
| 177 // Surrogate pairs | |
| 178 if (c > 0xD7FF && c <= 0xDBFF) { | |
| 179 if (i == len - 1) | |
| 180 continue; | |
| 181 | |
| 182 UChar next = characters[++i]; | |
| 183 if (!U16_IS_TRAIL(next)) | |
| 184 continue; | |
| 185 | |
| 186 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next); | |
| 187 | |
| 188 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Reg
ional Indicator Symbols | |
| 189 continue; | |
| 190 if (supplementaryCharacter <= 0x1F1FF) | |
| 191 return ComplexPath; | |
| 192 | |
| 193 // Emoji Fitzpatrick modifiers trigger upgrade to complex path for s
haping them. | |
| 194 if (supplementaryCharacter < 0x1F3FB) | |
| 195 continue; | |
| 196 if (supplementaryCharacter <= 0x1F3FF) | |
| 197 return ComplexPath; | |
| 198 | |
| 199 if (supplementaryCharacter == eyeCharacter) | |
| 200 return ComplexPath; | |
| 201 | |
| 202 // Man and Woman Emojies, | |
| 203 // in order to support emoji joiner combinations for family and coup
le pictographs. | |
| 204 // Compare http://unicode.org/reports/tr51/#Emoji_ZWJ_Sequences | |
| 205 if (supplementaryCharacter < 0x1F468) | |
| 206 continue; | |
| 207 if (supplementaryCharacter <= 0x1F469) | |
| 208 return ComplexPath; | |
| 209 | |
| 210 if (supplementaryCharacter == leftSpeechBubbleCharacter) | |
| 211 return ComplexPath; | |
| 212 | |
| 213 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Uni
code variation selectors. | |
| 214 continue; | |
| 215 if (supplementaryCharacter <= 0xE01EF) | |
| 216 return ComplexPath; | |
| 217 | |
| 218 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) a
nd other complex scripts | |
| 219 // in plane 1 or higher. | |
| 220 | |
| 221 continue; | |
| 222 } | |
| 223 | |
| 224 // Search for other Complex cases | |
| 225 if (valueInIntervalList(complexCodePathRanges, c)) | |
| 226 return ComplexPath; | |
| 227 } | |
| 228 | |
| 229 return result; | |
| 230 } | |
| 231 | |
| 232 bool Character::isUprightInMixedVertical(UChar32 character) | |
| 233 { | |
| 234 RETURN_HAS_PROPERTY(character, isUprightInMixedVertical) | |
| 235 } | |
| 236 | |
| 237 bool Character::isCJKIdeographOrSymbol(UChar32 c) | |
| 238 { | |
| 239 // Likely common case | |
| 240 if (c < 0x2C7) | |
| 241 return false; | |
| 242 | |
| 243 RETURN_HAS_PROPERTY(c, isCJKIdeographOrSymbol) | |
| 244 } | |
| 245 | |
| 246 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) | |
| 247 { | |
| 248 unsigned count = 0; | |
| 249 if (textJustify == TextJustifyDistribute) { | |
| 250 isAfterExpansion = true; | |
| 251 return length; | |
| 252 } | |
| 253 | |
| 254 if (direction == LTR) { | |
| 255 for (size_t i = 0; i < length; ++i) { | |
| 256 if (treatAsSpace(characters[i])) { | |
| 257 count++; | |
| 258 isAfterExpansion = true; | |
| 259 } else { | |
| 260 isAfterExpansion = false; | |
| 261 } | |
| 262 } | |
| 263 } else { | |
| 264 for (size_t i = length; i > 0; --i) { | |
| 265 if (treatAsSpace(characters[i - 1])) { | |
| 266 count++; | |
| 267 isAfterExpansion = true; | |
| 268 } else { | |
| 269 isAfterExpansion = false; | |
| 270 } | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 return count; | |
| 275 } | |
| 276 | |
| 277 unsigned Character::expansionOpportunityCount(const UChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) | |
| 278 { | |
| 279 unsigned count = 0; | |
| 280 if (direction == LTR) { | |
| 281 for (size_t i = 0; i < length; ++i) { | |
| 282 UChar32 character = characters[i]; | |
| 283 if (treatAsSpace(character)) { | |
| 284 count++; | |
| 285 isAfterExpansion = true; | |
| 286 continue; | |
| 287 } | |
| 288 if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(charact
ers[i + 1])) { | |
| 289 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]); | |
| 290 i++; | |
| 291 } | |
| 292 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS
ymbol(character)) { | |
| 293 if (!isAfterExpansion) | |
| 294 count++; | |
| 295 count++; | |
| 296 isAfterExpansion = true; | |
| 297 continue; | |
| 298 } | |
| 299 isAfterExpansion = false; | |
| 300 } | |
| 301 } else { | |
| 302 for (size_t i = length; i > 0; --i) { | |
| 303 UChar32 character = characters[i - 1]; | |
| 304 if (treatAsSpace(character)) { | |
| 305 count++; | |
| 306 isAfterExpansion = true; | |
| 307 continue; | |
| 308 } | |
| 309 if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2
])) { | |
| 310 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character); | |
| 311 i--; | |
| 312 } | |
| 313 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS
ymbol(character)) { | |
| 314 if (!isAfterExpansion) | |
| 315 count++; | |
| 316 count++; | |
| 317 isAfterExpansion = true; | |
| 318 continue; | |
| 319 } | |
| 320 isAfterExpansion = false; | |
| 321 } | |
| 322 } | |
| 323 return count; | |
| 324 } | |
| 325 | |
| 326 bool Character::canReceiveTextEmphasis(UChar32 c) | |
| 327 { | |
| 328 CharCategory category = Unicode::category(c); | |
| 329 if (category & (Separator_Space | Separator_Line | Separator_Paragraph | Oth
er_NotAssigned | Other_Control | Other_Format)) | |
| 330 return false; | |
| 331 | |
| 332 // Additional word-separator characters listed in CSS Text Level 3 Editor's
Draft 3 November 2010. | |
| 333 if (c == ethiopicWordspaceCharacter || c == aegeanWordSeparatorLineCharacter
|| c == aegeanWordSeparatorDotCharacter | |
| 334 || c == ugariticWordDividerCharacter || c == tibetanMarkIntersyllabicTsh
egCharacter || c == tibetanMarkDelimiterTshegBstarCharacter) | |
| 335 return false; | |
| 336 | |
| 337 return true; | |
| 338 } | |
| 339 | |
| 340 template <typename CharacterType> | |
| 341 static inline String normalizeSpacesInternal(const CharacterType* characters, un
signed length) | |
| 342 { | |
| 343 StringBuilder normalized; | |
| 344 normalized.reserveCapacity(length); | |
| 345 | |
| 346 for (unsigned i = 0; i < length; ++i) | |
| 347 normalized.append(Character::normalizeSpaces(characters[i])); | |
| 348 | |
| 349 return normalized.toString(); | |
| 350 } | |
| 351 | |
| 352 String Character::normalizeSpaces(const LChar* characters, unsigned length) | |
| 353 { | |
| 354 return normalizeSpacesInternal(characters, length); | |
| 355 } | |
| 356 | |
| 357 String Character::normalizeSpaces(const UChar* characters, unsigned length) | |
| 358 { | |
| 359 return normalizeSpacesInternal(characters, length); | |
| 360 } | |
| 361 | |
| 362 bool Character::isCommonOrInheritedScript(UChar32 character) | |
| 363 { | |
| 364 UErrorCode status = U_ZERO_ERROR; | |
| 365 UScriptCode script = uscript_getScript(character, &status); | |
| 366 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I
NHERITED); | |
| 367 } | |
| 368 | |
| 369 } // namespace blink | |
| OLD | NEW |