Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(333)

Unified Diff: third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp

Issue 1844663002: Implement backspace state machine for complex emoji sequence. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Move char definitions to CharacterNames.h Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp
diff --git a/third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp b/third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp
index 0f14512d1740f548818efeed50d36422a1f89abb..28cfb38c1aa885a1dc0b6960663d37a9246085bc 100644
--- a/third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp
+++ b/third_party/WebKit/Source/core/editing/state_machines/BackspaceStateMachine.cpp
@@ -4,24 +4,95 @@
#include "core/editing/state_machines/BackspaceStateMachine.h"
+// TODO(nona):Move Character.h to platform/text
+#include "platform/fonts/Character.h"
+#include "wtf/text/CharacterNames.h"
#include "wtf/text/Unicode.h"
+#include <ostream> // NOLINT
namespace blink {
namespace {
-const TextSegmentationMachineState kInvalid = TextSegmentationMachineState::Invalid;
-const TextSegmentationMachineState kNeedMoreCodeUnit = TextSegmentationMachineState::NeedMoreCodeUnit;
-const TextSegmentationMachineState kFinished = TextSegmentationMachineState::Finished;
+
+// Returns true if the code point can be a part of ZWJ sequence.
+bool isZwjEmoji(uint32_t codePoint)
+{
+ return codePoint == WTF::Unicode::boyCharacter
+ || codePoint == WTF::Unicode::eyeCharacter
+ || codePoint == WTF::Unicode::girlCharacter
+ || codePoint == WTF::Unicode::heavyBlackHeartCharacter
+ || codePoint == WTF::Unicode::kissMarkCharacter
+ || codePoint == WTF::Unicode::leftSpeechBubbleCharacter
+ || codePoint == WTF::Unicode::manCharacter
+ || codePoint == WTF::Unicode::womanCharacter;
+}
+
} // namespace
-TextSegmentationMachineState BackspaceStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
+#define FOR_EACH_BACKSPACE_STATE_MACHINE_STATE(V) \
+ /* Initial state */ \
+ V(Start) \
+ /* The current offset is just before line feed. */ \
+ V(BeforeLF) \
+ /* The current offset is just before keycap. */ \
+ V(BeforeKeycap) \
+ /* The current offset is just before variation selector and keycap. */ \
+ V(BeforeVSAndKeycap) \
+ /* The current offset is just before emoji modifier. */ \
+ V(BeforeEmojiModifier) \
+ /* The current offset is just before variation selector and emoji*/ \
+ /* modifier. */ \
+ V(BeforeVSAndEmojiModifier) \
+ /* The current offset is just before variation sequence. */ \
+ V(BeforeVS) \
+ /* The current offset is just before ZWJ emoji. */ \
+ V(BeforeZWJEmoji) \
+ /* The current offset is just before ZWJ. */ \
+ V(BeforeZWJ) \
+ /* The current offset is just before variation selector and ZWJ. */ \
+ V(BeforeVSAndZWJ) \
+ /* That there are odd numbered RIS from the beggining. */ \
+ V(OddNumberedRIS) \
+ /* That there are even numbered RIS from the begging. */ \
+ V(EvenNumberedRIS) \
+ /* This state machine has finished. */ \
+ V(Finished)
+
+enum class BackspaceStateMachine::BackspaceState {
+#define V(name) name,
+ FOR_EACH_BACKSPACE_STATE_MACHINE_STATE(V)
+#undef V
+};
+
+std::ostream& operator<<(std::ostream& os,
+ BackspaceStateMachine::BackspaceState state)
{
+ static const char* const texts[] = {
+#define V(name) #name,
+ FOR_EACH_BACKSPACE_STATE_MACHINE_STATE(V)
+#undef V
+ };
+ const auto& it = std::begin(texts) + static_cast<size_t>(state);
+ DCHECK_GE(it, std::begin(texts)) << "Unknown backspace value";
+ DCHECK_LT(it, std::end(texts)) << "Unknown backspace value";
+ return os << *it;
+}
+
+BackspaceStateMachine::BackspaceStateMachine()
+ : m_state(BackspaceState::Start)
+{
+}
+
+TextSegmentationMachineState
+BackspaceStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
+{
+ DCHECK_NE(BackspaceState::Finished, m_state);
uint32_t codePoint = codeUnit;
if (U16_IS_LEAD(codeUnit)) {
if (m_trailSurrogate == 0) {
// Unpaired lead surrogate. Aborting with deleting broken surrogate.
++m_codeUnitsToBeDeleted;
- return kFinished;
+ return TextSegmentationMachineState::Finished;
}
codePoint = U16_GET_SUPPLEMENTARY(codeUnit, m_trailSurrogate);
m_trailSurrogate = 0;
@@ -29,21 +100,123 @@ TextSegmentationMachineState BackspaceStateMachine::feedPrecedingCodeUnit(UChar
if (m_trailSurrogate != 0) {
// Unpaired trail surrogate. Aborting with deleting broken
// surrogate.
- return kFinished;
+ return TextSegmentationMachineState::Finished;
}
m_trailSurrogate = codeUnit;
- return kNeedMoreCodeUnit;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
} else {
if (m_trailSurrogate != 0) {
// Unpaired trail surrogate. Aborting with deleting broken
// surrogate.
- return kFinished;
+ return TextSegmentationMachineState::Finished;
}
}
- // TODO(nona): Handle emoji sequences.
- m_codeUnitsToBeDeleted = U16_LENGTH(codePoint);
- return kFinished;
+ switch (m_state) {
+ case BackspaceState::Start:
+ m_codeUnitsToBeDeleted = U16_LENGTH(codePoint);
+ if (codePoint == newlineCharacter)
+ return moveToNextState(BackspaceState::BeforeLF);
+ if (u_hasBinaryProperty(codePoint, UCHAR_VARIATION_SELECTOR))
+ return moveToNextState(BackspaceState::BeforeVS);
+ if (isZwjEmoji(codePoint))
+ return moveToNextState(BackspaceState::BeforeZWJEmoji);
+ if (Character::isRegionalIndicator(codePoint))
+ return moveToNextState(BackspaceState::OddNumberedRIS);
+ if (Character::isModifier(codePoint))
+ return moveToNextState(BackspaceState::BeforeEmojiModifier);
+ if (codePoint == combiningEnclosingKeycapCharacter)
+ return moveToNextState(BackspaceState::BeforeKeycap);
+ return finish();
+ case BackspaceState::BeforeLF:
+ if (codePoint == carriageReturnCharacter)
+ ++m_codeUnitsToBeDeleted;
+ return finish();
+ case BackspaceState::BeforeKeycap:
+ if (u_hasBinaryProperty(codePoint, UCHAR_VARIATION_SELECTOR)) {
+ DCHECK_EQ(m_lastSeenVSCodeUnits, 0);
+ m_lastSeenVSCodeUnits = U16_LENGTH(codePoint);
+ return moveToNextState(BackspaceState::BeforeVSAndKeycap);
+ }
+ if (Character::isEmojiKeycapBase(codePoint))
+ m_codeUnitsToBeDeleted += U16_LENGTH(codePoint);
+ return finish();
+ case BackspaceState::BeforeVSAndKeycap:
+ if (Character::isEmojiKeycapBase(codePoint)) {
+ DCHECK_GT(m_lastSeenVSCodeUnits, 0);
+ DCHECK_LE(m_lastSeenVSCodeUnits, 2);
+ m_codeUnitsToBeDeleted +=
+ m_lastSeenVSCodeUnits + U16_LENGTH(codePoint);
+ }
+ return finish();
+ case BackspaceState::BeforeEmojiModifier:
+ if (u_hasBinaryProperty(codePoint, UCHAR_VARIATION_SELECTOR)) {
+ DCHECK_EQ(m_lastSeenVSCodeUnits, 0);
+ m_lastSeenVSCodeUnits = U16_LENGTH(codePoint);
+ return moveToNextState(BackspaceState::BeforeVSAndEmojiModifier);
+ }
+ if (Character::isEmojiModifierBase(codePoint))
+ m_codeUnitsToBeDeleted += U16_LENGTH(codePoint);
+ return finish();
+ case BackspaceState::BeforeVSAndEmojiModifier:
+ if (Character::isEmojiModifierBase(codePoint)) {
+ DCHECK_GT(m_lastSeenVSCodeUnits, 0);
+ DCHECK_LE(m_lastSeenVSCodeUnits, 2);
+ m_codeUnitsToBeDeleted +=
+ m_lastSeenVSCodeUnits + U16_LENGTH(codePoint);
+ }
+ return finish();
+ case BackspaceState::BeforeVS:
+ if (isZwjEmoji(codePoint)) {
+ m_codeUnitsToBeDeleted += U16_LENGTH(codePoint);
+ return moveToNextState(BackspaceState::BeforeZWJEmoji);
+ }
+ if (!u_hasBinaryProperty(codePoint, UCHAR_VARIATION_SELECTOR)
+ && u_getCombiningClass(codePoint) == 0)
+ m_codeUnitsToBeDeleted += U16_LENGTH(codePoint);
+ return finish();
+ case BackspaceState::BeforeZWJEmoji:
+ return codePoint == zeroWidthJoinerCharacter ?
+ moveToNextState(BackspaceState::BeforeZWJ) : finish();
+ case BackspaceState::BeforeZWJ:
+ if (isZwjEmoji(codePoint)) {
+ m_codeUnitsToBeDeleted += U16_LENGTH(codePoint) + 1; // +1 for ZWJ
+ return moveToNextState(BackspaceState::BeforeZWJEmoji);
+ }
+ if (u_hasBinaryProperty(codePoint, UCHAR_VARIATION_SELECTOR)) {
+ DCHECK_EQ(m_lastSeenVSCodeUnits, 0);
+ m_lastSeenVSCodeUnits = U16_LENGTH(codePoint);
+ return moveToNextState(BackspaceState::BeforeVSAndZWJ);
+ }
+ return finish();
+ case BackspaceState::BeforeVSAndZWJ:
+ if (!isZwjEmoji(codePoint))
+ return finish();
+
+ DCHECK_GT(m_lastSeenVSCodeUnits, 0);
+ DCHECK_LE(m_lastSeenVSCodeUnits, 2);
+ // +1 for ZWJ
+ m_codeUnitsToBeDeleted +=
+ U16_LENGTH(codePoint) + 1 + m_lastSeenVSCodeUnits;
+ m_lastSeenVSCodeUnits = 0;
+ return moveToNextState(BackspaceState::BeforeZWJEmoji);
+ case BackspaceState::OddNumberedRIS:
+ if (!Character::isRegionalIndicator(codePoint))
+ return finish();
+ m_codeUnitsToBeDeleted += 2; // Code units of RIS
+ return moveToNextState(BackspaceState::EvenNumberedRIS);
+ case BackspaceState::EvenNumberedRIS:
+ if (!Character::isRegionalIndicator(codePoint))
+ return finish();
+ m_codeUnitsToBeDeleted -= 2; // Code units of RIS
+ return moveToNextState(BackspaceState::OddNumberedRIS);
+ case BackspaceState::Finished:
+ NOTREACHED() << "Do not call feedPrecedingCodeUnit() once it finishes.";
+ default:
+ NOTREACHED() << "Unhandled state: " << m_state;
+ }
+ NOTREACHED() << "Unhandled state: " << m_state;
+ return TextSegmentationMachineState::Invalid;
}
TextSegmentationMachineState BackspaceStateMachine::tellEndOfPrecedingText()
@@ -53,13 +226,14 @@ TextSegmentationMachineState BackspaceStateMachine::tellEndOfPrecedingText()
++m_codeUnitsToBeDeleted;
m_trailSurrogate = 0;
}
- return kFinished;
+ return TextSegmentationMachineState::Finished;
}
-TextSegmentationMachineState BackspaceStateMachine::feedFollowingCodeUnit(UChar codeUnit)
+TextSegmentationMachineState
+BackspaceStateMachine::feedFollowingCodeUnit(UChar codeUnit)
{
NOTREACHED();
- return kInvalid;
+ return TextSegmentationMachineState::Invalid;
}
int BackspaceStateMachine::finalizeAndGetBoundaryOffset()
@@ -69,6 +243,10 @@ int BackspaceStateMachine::finalizeAndGetBoundaryOffset()
++m_codeUnitsToBeDeleted;
m_trailSurrogate = 0;
}
+ if (m_state != BackspaceState::Finished) {
+ m_lastSeenVSCodeUnits = 0;
+ m_state = BackspaceState::Finished;
+ }
return -m_codeUnitsToBeDeleted;
}
@@ -76,6 +254,26 @@ void BackspaceStateMachine::reset()
{
m_codeUnitsToBeDeleted = 0;
m_trailSurrogate = 0;
+ m_state = BackspaceState::Start;
+ m_lastSeenVSCodeUnits = 0;
+}
+
+TextSegmentationMachineState
+BackspaceStateMachine::moveToNextState(BackspaceState newState)
+{
+ DCHECK_NE(BackspaceState::Finished, newState) << "Use finish() instead.";
+ DCHECK_NE(BackspaceState::Start, newState) << "Don't move to Start.";
+ // Below |DCHECK_NE()| prevent us to infinite loop in state machine.
+ DCHECK_NE(m_state, newState) << "State should be changed.";
+ m_state = newState;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+}
+
+TextSegmentationMachineState BackspaceStateMachine::finish()
+{
+ DCHECK_NE(BackspaceState::Finished, m_state);
+ m_state = BackspaceState::Finished;
+ return TextSegmentationMachineState::Finished;
}
} // namespace blink

Powered by Google App Engine
This is Rietveld 408576698