Chromium Code Reviews| Index: third_party/WebKit/Source/core/editing/EditingUtilities.cpp |
| diff --git a/third_party/WebKit/Source/core/editing/EditingUtilities.cpp b/third_party/WebKit/Source/core/editing/EditingUtilities.cpp |
| index a5748f859505025388e33c7a977601d7afb6f13f..3b50f461edc0b501293681dfbc84d596cd3360a5 100644 |
| --- a/third_party/WebKit/Source/core/editing/EditingUtilities.cpp |
| +++ b/third_party/WebKit/Source/core/editing/EditingUtilities.cpp |
| @@ -52,10 +52,15 @@ |
| #include "core/html/HTMLUListElement.h" |
| #include "core/layout/LayoutObject.h" |
| #include "core/layout/LayoutTableCell.h" |
| +#include "platform/fonts/Character.h" |
| +#include "third_party/icu/source/common/unicode/uchar.h" |
| +#include "third_party/icu/source/common/unicode/utf16.h" |
| #include "wtf/Assertions.h" |
| #include "wtf/StdLibExtras.h" |
| #include "wtf/text/StringBuilder.h" |
| +#include <algorithm> |
| + |
| namespace blink { |
| using namespace HTMLNames; |
| @@ -542,18 +547,155 @@ PositionInFlatTree lastEditablePositionBeforePositionInRoot(const PositionInFlat |
| return lastEditablePositionBeforePositionInRootAlgorithm<EditingInFlatTreeStrategy>(position, highestRoot); |
| } |
| +// Returns true if the code point has Glue_After_Zwj grapheme break property. |
| +// See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9.0.0d18.txt |
| +bool isGlueAfterZwj(uint32_t codePoint) |
| +{ |
| + return codePoint == 0x2764 // HEAVY BLACK HEART. |
| + || codePoint == 0x1F48B // KISS MARK. |
| + || codePoint == 0x1F5E8; // LEFT_SPEECH BUBBLE. |
| +} |
| + |
| +// Returns true if the code point has E_Basae_GAZ grapheme break property. |
| +// See http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty-9.0.0d18.txt |
| +bool isEBaseGAZ(uint32_t codePoint) |
| +{ |
| + return codePoint >= 0x1F466 && codePoint <= 0x1F469; |
| +} |
| + |
| +// The list of code points which has Indic_Syllabic_Category=Virama property. |
| +// Must be sorted. |
| +const uint32_t kIndicSyllabicCategoryViramaList[] = { |
| + 0x094D, 0x09CD, 0x0A4D, 0x0ACD, 0x0B4D, 0x0BCD, 0x0C4D, 0x0CCD, 0x0D4D, 0x0DCA, 0x1B44, 0xA8C4, |
| + 0xA9C0, 0x11046, 0x110B9, 0x111C0, 0x11235, 0x1134D, 0x11442, 0x114C2, 0x115BF, 0x1163F, |
| + 0x116B6, 0x11C3F, |
| +}; |
| + |
| +// Returns true if the code point has Indic_Syllabic_Category=Virama property. |
| +bool isIndicSyllabicCategoryVirama(uint32_t codePoint) |
| +{ |
| + const int length = WTF_ARRAY_LENGTH(kIndicSyllabicCategoryViramaList); |
| + return std::binary_search(kIndicSyllabicCategoryViramaList, |
| + kIndicSyllabicCategoryViramaList + length, |
| + codePoint); |
| +} |
| + |
| +bool isGraphemeBreak(const uint16_t* buf, int length, int offset) |
|
yosin_UTC9
2016/03/29 01:27:30
Can we make this in state machine as BackspaceStat
|
| +{ |
| + DCHECK_GE(length, 0); |
| + DCHECK_GE(offset, 0); |
| + DCHECK_LE(offset, length); |
| + // The following breaking rules come from Unicode Standard Annex #29 on Unicode Text Segmaentation. |
| + // See http://www.unicode.org/reports/tr29/ |
| + // Note that some of rules are in proposal. also see http://www.unicode.org/reports/tr29/proposed.html |
| + const uint32_t ZWJ = 0x200D; |
| + |
| + // Rule1 GB1 sot ÷ |
| + // Rule2 GB2 ÷ eot |
| + if (offset <= 0 || offset >= length) |
| + return true; |
| + if (U16_IS_TRAIL(buf[offset])) { |
| + // Don't break a surrogate pair, but break lonely trailing surrogate. |
| + return !U16_IS_LEAD(buf[offset - 1]); |
| + } |
| + uint32_t prevCodePoint = 0; |
| + uint32_t nextCodePoint = 0; |
| + int offsetBack = offset; |
| + U16_PREV(buf, 0, offsetBack, prevCodePoint); |
| + U16_NEXT(buf, offset, length, nextCodePoint); |
| + int prevProp = u_getIntPropertyValue(prevCodePoint, UCHAR_GRAPHEME_CLUSTER_BREAK); |
| + int nextProp = u_getIntPropertyValue(nextCodePoint, UCHAR_GRAPHEME_CLUSTER_BREAK); |
| + |
| + // Rule GB3, CR x LF |
| + if (prevProp == U_GCB_CR && nextProp == U_GCB_LF) |
| + return false; |
| + |
| + // Rule GB4, (Control | CR | LF) ÷ |
| + if (prevProp == U_GCB_CONTROL || prevProp == U_GCB_CR || prevProp == U_GCB_LF) |
| + return true; |
| + |
| + // Rule GB5, ÷ (Control | CR | LF) |
| + if (nextProp == U_GCB_CONTROL || nextProp == U_GCB_CR || nextProp == U_GCB_LF) |
| + return true; |
| + |
| + // Rule GB6, L x (L | V | LV | LVT) |
| + if (prevProp == U_GCB_L |
| + && (nextProp == U_GCB_L || nextProp == U_GCB_V || nextProp == U_GCB_LV || nextProp == U_GCB_LVT)) |
| + return false; |
| + |
| + // Rule GB7, (LV | V) x (V | T) |
| + if ((prevProp == U_GCB_LV || prevProp == U_GCB_V) && (nextProp == U_GCB_V || nextProp == U_GCB_T)) |
| + return false; |
| + |
| + // Rule GB8, (LVT | T) x T |
| + if ((prevProp == U_GCB_LVT || prevProp == U_GCB_T) && nextProp == U_GCB_T) |
| + return false; |
| + |
| + // Rule GB8a |
| + // |
| + // sot (RI RI)* RI x RI |
| + // [^RI] (RI RI)* RI x RI |
| + // RI ÷ RI |
| + if (Character::isRegionalIndicator(prevCodePoint) && Character::isRegionalIndicator(nextCodePoint)) { |
| + while (offsetBack > 0) { |
| + U16_PREV(buf, 0, offsetBack, prevCodePoint); |
| + if (!Character::isRegionalIndicator(prevCodePoint)) { |
| + offsetBack += U16_LENGTH(prevCodePoint); |
| + break; |
| + } |
| + } |
| + |
| + // Note that the offset has moved forwared 2 code units by U16_NEXT. |
| + // The number 4 comes from the number of code units in a whole flag. |
| + return (offset - 2 - offsetBack) % 4 == 0; |
| + } |
| + |
| + // Rule GB9, x (Extend | ZWJ) |
| + // Rule GB9a, x SpacingMark |
| + if (nextProp == U_GCB_EXTEND || nextCodePoint == ZWJ || nextProp == U_GCB_SPACING_MARK) |
| + return false; |
| + |
| + // Rule GB9b, Prepend x |
| + if (prevProp == U_GCB_PREPEND) |
| + return false; |
| + |
| + // Cluster Indic syllables together. |
| + if (isIndicSyllabicCategoryVirama(prevCodePoint) |
| + && u_getIntPropertyValue(nextCodePoint, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) |
| + return false; |
| + |
| + // Proposed Rule GB10, (E_Base | EBG) x E_Modifier |
| + if ((Character::isEmojiModifierBase(prevCodePoint) || isEBaseGAZ(prevCodePoint)) |
| + && Character::isModifier(nextCodePoint)) |
| + return false; |
| + |
| + // Proposed Rule GB11, ZWJ x (Glue_After_Zwj | EBG) |
| + if (prevCodePoint == ZWJ && (isGlueAfterZwj(nextCodePoint) || isEBaseGAZ(nextCodePoint))) |
| + return false; |
| + |
| + // Rule GB999 any ÷ any |
| + return true; |
| +} |
| + |
| int uncheckedPreviousOffset(const Node* node, int current) |
| { |
| + DCHECK_GE(current, 0); |
| if (!node->isTextNode()) |
| return current - 1; |
| + if (current <= 1) |
| + return current - 1; // It's fine to return -1; |
| const String& text = toText(node)->data(); |
| - if (text.is8Bit()) |
| - return current - 1; // TODO(nona): Good to support CR x LF. |
| - TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), text.length()); |
| - if (!iterator) |
| - return current - 1; |
| - const int result = iterator->preceding(current); |
| - return result == TextBreakDone ? current - 1 : result; |
| + if (text.is8Bit()) { |
| + const uint8_t* buf = text.characters8(); |
| + --current; |
| + if (buf[current - 1] == 0x0D && buf[current] == 0x0A) |
| + --current; |
| + return current; |
| + } |
| + const uint16_t* buf = text.characters16(); |
| + const int length = text.length(); |
| + while (!isGraphemeBreak(buf, length, --current)) {} |
| + return current; |
| } |
| static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current) |
| @@ -563,16 +705,23 @@ static int uncheckedPreviousOffsetForBackwardDeletion(const Node* n, int current |
| int uncheckedNextOffset(const Node* node, int current) |
| { |
| + DCHECK_GE(current, 0); |
| if (!node->isTextNode()) |
| return current + 1; |
| const String& text = toText(node)->data(); |
| - if (text.is8Bit()) |
| - return current + 1; // TODO(nona): Good to support CR x LF. |
| - TextBreakIterator* iterator = cursorMovementIterator(text.characters16(), text.length()); |
| - if (!iterator) |
| - return current + 1; |
| - const int result = iterator->following(current); |
| - return result == TextBreakDone ? current + 1 : result; |
| + const int length = text.length(); |
| + if (current >= (length - 1)) |
| + return current + 1; // It's fine to return length + 1; |
| + if (text.is8Bit()) { |
| + const uint8_t* buf = text.characters8(); |
| + ++current; |
| + if (buf[current - 1] == 0x0D && buf[current] == 0x0A) |
| + ++current; |
| + return current; |
| + } |
| + const uint16_t* buf = text.characters16(); |
| + while (!isGraphemeBreak(buf, length, ++current)) {} |
| + return current; |
| } |
| template <typename Strategy> |