Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1165)

Unified Diff: third_party/WebKit/Source/platform/text/TextBreakIterator.cpp

Issue 1779693003: Fix emoji ZWJ and modifier sequence line breaking (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebased Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/text/TextBreakIterator.cpp
diff --git a/third_party/WebKit/Source/platform/text/TextBreakIterator.cpp b/third_party/WebKit/Source/platform/text/TextBreakIterator.cpp
index e1205bfd90dd2eecfc285b970d88b2f1dea0ecbf..8c84c5443e775dd4b161be44cbf75882223c9dad 100644
--- a/third_party/WebKit/Source/platform/text/TextBreakIterator.cpp
+++ b/third_party/WebKit/Source/platform/text/TextBreakIterator.cpp
@@ -22,6 +22,7 @@
#include "platform/text/TextBreakIterator.h"
+#include "platform/fonts/Character.h"
#include "wtf/ASCIICType.h"
#include "wtf/StdLibExtras.h"
#include "wtf/text/CharacterNames.h"
@@ -221,6 +222,56 @@ inline bool needsLineBreakIterator(UChar ch)
return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter;
}
+// Customization for ICU line breaking behavior. This allows us to reject ICU
+// line break suggestions which would split an emoji sequence.
+// FIXME crbug.com/593260: Remove this customization once ICU implements this
+// natively.
+static bool isBreakValid(const UChar* buf, size_t length, size_t breakPos)
+{
+ UChar32 codepoint;
+ size_t prevOffset = breakPos;
+ U16_PREV(buf, 0, prevOffset, codepoint);
+ uint32_t nextCodepoint;
+ size_t nextOffset = breakPos;
+ U16_NEXT(buf, nextOffset, length, nextCodepoint);
+
+ // Possible Emoji ZWJ sequence
+ if (codepoint == zeroWidthJoinerCharacter) {
+ if (nextCodepoint == 0x2764 // HEAVY BLACK HEART
+ || nextCodepoint == 0x1F466 // BOY
+ || nextCodepoint == 0x1F467 // GIRL
+ || nextCodepoint == 0x1F468 // MAN
+ || nextCodepoint == 0x1F469 // WOMAN
+ || nextCodepoint == 0x1F48B // KISS MARK
+ || nextCodepoint == 0x1F5E8) // LEFT SPEECH BUBBLE
+ {
+ return false;
+ }
+ }
+
+ // Possible emoji modifier sequence
+ // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
+ // EB x EM
+ if (Character::isModifier(nextCodepoint)) {
+ if (codepoint == variationSelector16Character && prevOffset > 0) {
+ // Skip over emoji variation selector.
+ U16_PREV(buf, 0, prevOffset, codepoint);
+ }
+ if (Character::isEmojiModifierBase(codepoint)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Trivial implementation to match possible template paramters in
+// nextBreakablePosition. There are no emoji sequences in 8bit strings, so we
+// accept all break opportunities.
+static bool isBreakValid(const LChar*, size_t, size_t)
+{
+ return true;
+}
+
template<typename CharacterType, LineBreakType lineBreakType>
static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos)
{
@@ -260,8 +311,9 @@ static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator
}
}
}
- if (i == nextBreak && !isBreakableSpace(lastCh))
+ if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str, length, i)) {
return i;
+ }
}
lastLastCh = lastCh;
@@ -307,7 +359,7 @@ static inline int nextBreakablePositionKeepAllInternal(LazyLineBreakIterator& la
}
}
}
- if (i == nextBreak && !isBreakableSpace(lastCh))
+ if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str, length, i))
return i;
}

Powered by Google App Engine
This is Rietveld 408576698