| Index: third_party/WebKit/Source/platform/fonts/Character.cpp
|
| diff --git a/third_party/WebKit/Source/platform/fonts/Character.cpp b/third_party/WebKit/Source/platform/fonts/Character.cpp
|
| index b0dc240d950286ad8b231606f65e639ec169f338..830eaec50b34c286c8dea282024997004b83a56c 100644
|
| --- a/third_party/WebKit/Source/platform/fonts/Character.cpp
|
| +++ b/third_party/WebKit/Source/platform/fonts/Character.cpp
|
| @@ -34,6 +34,12 @@
|
| #include "wtf/text/StringBuilder.h"
|
| #include <algorithm>
|
| #include <unicode/uscript.h>
|
| +#define USE_TRIE
|
| +#if defined(USE_TRIE)
|
| +#include <unicode/uobject.h>
|
| +#define MUTEX_H // Required to compile on Windows
|
| +#include <utrie2.h>
|
| +#endif
|
|
|
| using namespace WTF;
|
| using namespace Unicode;
|
| @@ -61,6 +67,277 @@ static const UChar32 cjkIsolatedSymbolsArray[] = {
|
| 0x1F100
|
| };
|
|
|
| +static const UChar32 cjkIdeographRanges[] = {
|
| + // CJK Radicals Supplement and Kangxi Radicals.
|
| + 0x2E80, 0x2FDF,
|
| + // CJK Strokes.
|
| + 0x31C0, 0x31EF,
|
| + // CJK Unified Ideographs Extension A.
|
| + 0x3400, 0x4DBF,
|
| + // The basic CJK Unified Ideographs block.
|
| + 0x4E00, 0x9FFF,
|
| + // CJK Compatibility Ideographs.
|
| + 0xF900, 0xFAFF,
|
| + // CJK Unified Ideographs Extension B.
|
| + 0x20000, 0x2A6DF,
|
| + // CJK Unified Ideographs Extension C.
|
| + // CJK Unified Ideographs Extension D.
|
| + 0x2A700, 0x2B81F,
|
| + // CJK Compatibility Ideographs Supplement.
|
| + 0x2F800, 0x2FA1F
|
| +};
|
| +
|
| +static const UChar32 cjkSymbolRanges[] = {
|
| + 0x2156, 0x215A,
|
| + 0x2160, 0x216B,
|
| + 0x2170, 0x217B,
|
| + 0x23BE, 0x23CC,
|
| + 0x2460, 0x2492,
|
| + 0x249C, 0x24FF,
|
| + 0x25CE, 0x25D3,
|
| + 0x25E2, 0x25E6,
|
| + 0x2600, 0x2603,
|
| + 0x2660, 0x266F,
|
| + 0x2672, 0x267D,
|
| + 0x2776, 0x277F,
|
| + // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
|
| + // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F
|
| + 0x2FF0, 0x302F,
|
| + 0x3031, 0x312F,
|
| + // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
|
| + 0x3190, 0x31BF,
|
| + // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
|
| + // CJK Compatibility (0x3300 .. 0x33FF).
|
| + 0x3200, 0x33FF,
|
| + 0xF860, 0xF862,
|
| + // CJK Compatibility Forms.
|
| + 0xFE30, 0xFE4F,
|
| + // Halfwidth and Fullwidth Forms
|
| + // Usually only used in CJK
|
| + 0xFF00, 0xFF0C,
|
| + 0xFF0E, 0xFF1A,
|
| + 0xFF1F, 0xFFEF,
|
| + // Emoji.
|
| + 0x1F110, 0x1F129,
|
| + 0x1F130, 0x1F149,
|
| + 0x1F150, 0x1F169,
|
| + 0x1F170, 0x1F189,
|
| + 0x1F200, 0x1F6FF
|
| +};
|
| +
|
| +#if defined(USE_TRIE)
|
| +static const UChar32 isUprightInMixedVerticalArray[] = {
|
| + 0x000A7,
|
| + 0x000A9,
|
| + 0x000AE,
|
| + 0x000B1,
|
| + 0x000D7,
|
| + 0x000F7
|
| +};
|
| +
|
| +static const UChar32 isUprightInMixedVerticalRanges[] = {
|
| + 0x000BC, 0x000BE,
|
| + // Spacing Modifier Letters (Part of)
|
| + 0x002EA, 0x002EB,
|
| + // Hangul Jamo
|
| + 0x01100, 0x011FF,
|
| + // Unified Canadian Aboriginal Syllabics
|
| + 0x01401, 0x0167F,
|
| + // Unified Canadian Aboriginal Syllabics Extended
|
| + 0x018B0, 0x018FF,
|
| + // General Punctuation (Part of)
|
| + 0x02016, 0x02016,
|
| + 0x02020, 0x02021,
|
| + 0x02030, 0x02031,
|
| + 0x0203B, 0x0203C,
|
| + 0x02042, 0x02042,
|
| + 0x02047, 0x02049,
|
| + 0x02051, 0x02051,
|
| + 0x02065, 0x02069,
|
| + // Combining Diacritical Marks for Symbols (Part of)
|
| + 0x020DD, 0x020E0,
|
| + 0x020E2, 0x020E4,
|
| + // Letterlike Symbols (Part of)/Number Forms
|
| + 0x02100, 0x02101,
|
| + 0x02103, 0x02109,
|
| + 0x0210F, 0x0210F,
|
| + 0x02113, 0x02114,
|
| + 0x02116, 0x02117,
|
| + 0x0211E, 0x02123,
|
| + 0x02125, 0x02125,
|
| + 0x02127, 0x02127,
|
| + 0x02129, 0x02129,
|
| + 0x0212E, 0x0212E,
|
| + 0x02135, 0x0213F,
|
| + 0x02145, 0x0214A,
|
| + 0x0214C, 0x0214D,
|
| + 0x0214F, 0x0218F,
|
| + // Mathematical Operators (Part of)
|
| + 0x0221E, 0x0221E,
|
| + 0x02234, 0x02235,
|
| + // Miscellaneous Technical (Part of)
|
| + 0x02300, 0x02307,
|
| + 0x0230C, 0x0231F,
|
| + 0x02324, 0x0232B,
|
| + 0x0237D, 0x0239A,
|
| + 0x023BE, 0x023CD,
|
| + 0x023CF, 0x023CF,
|
| + 0x023D1, 0x023DB,
|
| + 0x023E2, 0x02422,
|
| + // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alphanumerics
|
| + 0x02424, 0x024FF,
|
| + // Geometric Shapes/Miscellaneous Symbols (Part of)
|
| + 0x025A0, 0x02619,
|
| + 0x02620, 0x02767,
|
| + 0x02776, 0x02793,
|
| + // Miscellaneous Symbols and Arrows (Part of)
|
| + 0x02B12, 0x02B2F,
|
| + 0x02B50, 0x02B59,
|
| + 0x02BB8, 0x02BFF,
|
| + // Common CJK
|
| + 0x02E80, 0x0A4CF,
|
| + // Hangul Jamo Extended-A
|
| + 0x0A960, 0x0A97F,
|
| + // Hangul Syllables/Hangul Jamo Extended-B
|
| + 0x0AC00, 0x0D7FF,
|
| + // Private Use Area/CJK Compatibility Ideographs
|
| + 0x0E000, 0x0FAFF,
|
| + // Vertical Forms
|
| + 0x0FE10, 0x0FE1F,
|
| + // CJK Compatibility Forms (Part of)
|
| + 0x0FE30, 0x0FE48,
|
| + // Small Form Variants (Part of)
|
| + 0x0FE50, 0x0FE57,
|
| + 0x0FE59, 0x0FE62,
|
| + 0x0FE67, 0x0FE6F,
|
| + // Halfwidth and Fullwidth Forms
|
| + 0x0FF01, 0x0FF0C,
|
| + 0x0FF0E, 0x0FF1B,
|
| + 0x0FF1F, 0x0FF60,
|
| + 0x0FFE0, 0x0FFE7,
|
| + // Specials (Part of)
|
| + 0x0FFF0, 0x0FFF8,
|
| + 0x0FFFC, 0x0FFFD,
|
| + // Meroitic Hieroglyphs
|
| + 0x10980, 0x1099F,
|
| + // Siddham
|
| + 0x11580, 0x115FF,
|
| + // Egyptian Hieroglyphs
|
| + 0x13000, 0x1342F,
|
| + // Kana Supplement
|
| + 0x1B000, 0x1B0FF,
|
| + // Byzantine Musical Symbols/Musical Symbols
|
| + 0x1D000, 0x1D1FF,
|
| + // Tai Xuan Jing Symbols/Counting Rod Numerals
|
| + 0x1D300, 0x1D37F,
|
| + // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supplement
|
| + // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement
|
| + // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Symbols
|
| + // Alchemical Symbols
|
| + 0x1F000, 0x1F7FF,
|
| + // CJK Unified Ideographs Extension B/C/D
|
| + // CJK Compatibility Ideographs Supplement
|
| + 0x20000, 0x2FFFD,
|
| + 0x30000, 0x3FFFD,
|
| + // Supplementary Private Use Area-A
|
| + 0xF0000, 0xFFFFD,
|
| + // Supplementary Private Use Area-B
|
| + 0x100000, 0x10FFFD,
|
| +};
|
| +
|
| +using CharacterPropertiesType = uint8_t;
|
| +
|
| +enum class CharacterProperties : CharacterPropertiesType {
|
| + isCJKIdeograph = 0x0001,
|
| + isCJKIdeographOrSymbol = 0x0002,
|
| + isUprightInMixedVertical = 0x0004,
|
| +};
|
| +
|
| +inline CharacterProperties operator | (CharacterProperties a, CharacterProperties b)
|
| +{
|
| + return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType>(a) | static_cast<CharacterPropertiesType>(b)));
|
| +}
|
| +
|
| +inline CharacterProperties operator & (CharacterProperties a, CharacterProperties b)
|
| +{
|
| + return static_cast<CharacterProperties>((static_cast<CharacterPropertiesType>(a) & static_cast<CharacterPropertiesType>(b)));
|
| +}
|
| +
|
| +inline CharacterProperties operator |= (CharacterProperties& a, CharacterProperties b)
|
| +{
|
| + a = a | b;
|
| + return a;
|
| +}
|
| +
|
| +const UChar32 maxCodePointForPropertyValues = 0x1FFFFF;
|
| +
|
| +static void setRanges(CharacterProperties* values, const UChar32* ranges, size_t length, CharacterProperties value)
|
| +{
|
| + const UChar32* end = ranges + length;
|
| + for (; ranges != end; ranges += 2) {
|
| + ASSERT(ranges[1] <= maxCodePointForPropertyValues);
|
| + for (UChar32 c = ranges[0]; c <= ranges[1]; c++)
|
| + values[c] |= value;
|
| + }
|
| +}
|
| +
|
| +static void setValues(CharacterProperties* values, const UChar32* begin, size_t length, CharacterProperties value)
|
| +{
|
| + const UChar32* end = begin + length;
|
| + for (; begin != end; begin++) {
|
| + ASSERT(*begin <= maxCodePointForPropertyValues);
|
| + values[*begin] |= value;
|
| + }
|
| +}
|
| +
|
| +static UTrie2* createTrie()
|
| +{
|
| + // Create a value array of all possible code points.
|
| + const UChar32 size = maxCodePointForPropertyValues + 1;
|
| + OwnPtr<CharacterProperties[]> values = adoptArrayPtr(new CharacterProperties[size]);
|
| + memset(values.get(), 0, sizeof(CharacterProperties) * size);
|
| + setRanges(values.get(), cjkIdeographRanges, WTF_ARRAY_LENGTH(cjkIdeographRanges),
|
| + CharacterProperties::isCJKIdeograph | CharacterProperties::isCJKIdeographOrSymbol);
|
| + setRanges(values.get(), cjkSymbolRanges, WTF_ARRAY_LENGTH(cjkSymbolRanges),
|
| + CharacterProperties::isCJKIdeographOrSymbol);
|
| + setValues(values.get(), cjkIsolatedSymbolsArray, WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray),
|
| + CharacterProperties::isCJKIdeographOrSymbol);
|
| + setRanges(values.get(), isUprightInMixedVerticalRanges, WTF_ARRAY_LENGTH(isUprightInMixedVerticalRanges),
|
| + CharacterProperties::isUprightInMixedVertical);
|
| + setValues(values.get(), isUprightInMixedVerticalArray, WTF_ARRAY_LENGTH(isUprightInMixedVerticalArray),
|
| + CharacterProperties::isUprightInMixedVertical);
|
| +
|
| + // Create a Trie from the value array.
|
| + UErrorCode error = U_ZERO_ERROR;
|
| + UTrie2* trie = utrie2_open(0, 0, &error);
|
| + UChar32 start = 0;
|
| + CharacterProperties value = values[0];
|
| + for (UChar32 c = 1; ; c++) {
|
| + if (c < size && values[c] == value)
|
| + continue;
|
| + if (static_cast<uint32_t>(value))
|
| + utrie2_setRange32(trie, start, c - 1, static_cast<uint32_t>(value), TRUE, &error);
|
| + if (c >= size)
|
| + break;
|
| + start = c;
|
| + value = values[c];
|
| + }
|
| + utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error);
|
| + // uint32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error);
|
| + // fprintf(stderr, "size=%u\n", serializedSize);
|
| + return trie;
|
| +}
|
| +
|
| +static bool hasCharacterProperty(UChar32 c, CharacterProperties property)
|
| +{
|
| + static UTrie2* trie = nullptr;
|
| + if (!trie)
|
| + trie = createTrie();
|
| + uint16_t value = UTRIE2_GET16(trie, c);
|
| + return value & static_cast<CharacterPropertiesType>(property);
|
| +}
|
| +#endif
|
| +
|
| // Takes a flattened list of closed intervals
|
| template <class T, size_t size>
|
| bool valueInIntervalList(const T (&intervalList)[size], const T& value)
|
| @@ -177,6 +454,9 @@ CodePath Character::characterRangeCodePath(const UChar* characters, unsigned len
|
|
|
| bool Character::isUprightInMixedVertical(UChar32 character)
|
| {
|
| +#if defined(USE_TRIE)
|
| + return hasCharacterProperty(character, CharacterProperties::isUprightInMixedVertical);
|
| +#else
|
| // Fast path for common non-CJK
|
| if (character < 0x000A7)
|
| return false;
|
| @@ -304,29 +584,14 @@ bool Character::isUprightInMixedVertical(UChar32 character)
|
| 0x100000, 0x10FFFD,
|
| };
|
| return valueInIntervalList(uprightRanges, character);
|
| +#endif
|
| }
|
|
|
| bool Character::isCJKIdeograph(UChar32 c)
|
| {
|
| - static const UChar32 cjkIdeographRanges[] = {
|
| - // CJK Radicals Supplement and Kangxi Radicals.
|
| - 0x2E80, 0x2FDF,
|
| - // CJK Strokes.
|
| - 0x31C0, 0x31EF,
|
| - // CJK Unified Ideographs Extension A.
|
| - 0x3400, 0x4DBF,
|
| - // The basic CJK Unified Ideographs block.
|
| - 0x4E00, 0x9FFF,
|
| - // CJK Compatibility Ideographs.
|
| - 0xF900, 0xFAFF,
|
| - // CJK Unified Ideographs Extension B.
|
| - 0x20000, 0x2A6DF,
|
| - // CJK Unified Ideographs Extension C.
|
| - // CJK Unified Ideographs Extension D.
|
| - 0x2A700, 0x2B81F,
|
| - // CJK Compatibility Ideographs Supplement.
|
| - 0x2F800, 0x2FA1F
|
| - };
|
| +#if defined(USE_TRIE)
|
| + return hasCharacterProperty(c, CharacterProperties::isCJKIdeograph);
|
| +#else
|
| static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges);
|
|
|
| // Early out
|
| @@ -334,6 +599,7 @@ bool Character::isCJKIdeograph(UChar32 c)
|
| return false;
|
|
|
| return valueInIntervalList(cjkIdeographRanges, c);
|
| +#endif
|
| }
|
|
|
| bool Character::isCJKIdeographOrSymbol(UChar32 c)
|
| @@ -342,46 +608,12 @@ bool Character::isCJKIdeographOrSymbol(UChar32 c)
|
| if (c < 0x2C7)
|
| return false;
|
|
|
| +#if defined(USE_TRIE)
|
| + return hasCharacterProperty(c, CharacterProperties::isCJKIdeographOrSymbol);
|
| +#else
|
| if (isCJKIdeograph(c))
|
| return true;
|
|
|
| - static const UChar32 cjkSymbolRanges[] = {
|
| - 0x2156, 0x215A,
|
| - 0x2160, 0x216B,
|
| - 0x2170, 0x217B,
|
| - 0x23BE, 0x23CC,
|
| - 0x2460, 0x2492,
|
| - 0x249C, 0x24FF,
|
| - 0x25CE, 0x25D3,
|
| - 0x25E2, 0x25E6,
|
| - 0x2600, 0x2603,
|
| - 0x2660, 0x266F,
|
| - 0x2672, 0x267D,
|
| - 0x2776, 0x277F,
|
| - // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
|
| - // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F
|
| - 0x2FF0, 0x302F,
|
| - 0x3031, 0x312F,
|
| - // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
|
| - 0x3190, 0x31BF,
|
| - // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
|
| - // CJK Compatibility (0x3300 .. 0x33FF).
|
| - 0x3200, 0x33FF,
|
| - 0xF860, 0xF862,
|
| - // CJK Compatibility Forms.
|
| - 0xFE30, 0xFE4F,
|
| - // Halfwidth and Fullwidth Forms
|
| - // Usually only used in CJK
|
| - 0xFF00, 0xFF0C,
|
| - 0xFF0E, 0xFF1A,
|
| - 0xFF1F, 0xFFEF,
|
| - // Emoji.
|
| - 0x1F110, 0x1F129,
|
| - 0x1F130, 0x1F149,
|
| - 0x1F150, 0x1F169,
|
| - 0x1F170, 0x1F189,
|
| - 0x1F200, 0x1F6FF
|
| - };
|
|
|
| if (c >= cjkSymbolRanges[0]
|
| && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1]
|
| @@ -400,6 +632,7 @@ bool Character::isCJKIdeographOrSymbol(UChar32 c)
|
| cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
|
| }
|
| return cjkIsolatedSymbols->contains(c);
|
| +#endif
|
| }
|
|
|
| unsigned Character::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion, const TextJustify textJustify)
|
|
|