Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1015)

Unified Diff: third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp

Issue 1833413002: [All-in-one patch] Implement own grapheme boundary breaker for editing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Upload All-in-one patch Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
diff --git a/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp b/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1bbc701f7f0ad07bf3d4ea04da51630c00df2b30
--- /dev/null
+++ b/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
@@ -0,0 +1,190 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.h"
+
+#include "core/editing/state_machines/StateMachineUtil.h"
+#include "core/editing/state_machines/TextSegmentationMachineState.h"
+#include "platform/fonts/Character.h"
+#include "wtf/text/Unicode.h"
+#include <ostream> // NOLINT
+
+namespace blink {
+
+enum class BackwardGraphemeBoundaryStateMachine::InternalState {
+ SearchBoundary, // Searching grapheme boundary.
+ CountPrecedingRIS, // Counting preceding regional indicator symbols.
+ Finished, // The state machine has finished.
+};
+
+std::ostream& operator<<(std::ostream& os,
+ BackwardGraphemeBoundaryStateMachine::InternalState state) {
+ static const char* const texts[] = {
+ "SearchBoundary", "CountPrecedingRIS", "Finished",
+ };
+ const auto& it = std::begin(texts) + static_cast<size_t>(state);
+ DCHECK_GE(it, std::begin(texts)) << "Unknown backspace value";
+ DCHECK_LT(it, std::end(texts)) << "Unknown backspace value";
+ return os << *it;
+}
+
+BackwardGraphemeBoundaryStateMachine::BackwardGraphemeBoundaryStateMachine()
+ : m_internalState(InternalState::SearchBoundary)
+{
+}
+
+TextSegmentationMachineState
+BackwardGraphemeBoundaryStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
+{
+ // TODO(nona): Extract common surrogate pair logic.
+ uint32_t codePoint = codeUnit;
+ if (U16_IS_LEAD(codeUnit)) {
+ if (m_trailSurrogate == 0) // Unpaired lead surrogate.
+ return finishWithBrokenSurrogatePair();
+ codePoint = U16_GET_SUPPLEMENTARY(codeUnit, m_trailSurrogate);
+ m_trailSurrogate = 0;
+ } else if (U16_IS_TRAIL(codeUnit)) {
+ if (m_trailSurrogate != 0) // Unpaired trail surrogate.
+ return finishWithBrokenSurrogatePair();
+ m_trailSurrogate = codeUnit;
+ // Needs surrogate lead.
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ } else {
+ if (m_trailSurrogate != 0) // Unpaired trail surrogate.
+ return finishWithBrokenSurrogatePair();
+ }
+
+ if (m_internalState == InternalState::CountPrecedingRIS) {
+ if (Character::isRegionalIndicator(codePoint)) {
+ ++m_precedingRISCount;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ }
+
+ // If the number of preceding regional indicator symbols is even, the
+ // current offset is a grapheme boundary. If it is odd, previous offset
+ // is a boundary.
+ if (m_precedingRISCount % 2 != 0)
+ m_BoundaryOffset -= 2;
+ return finish();
+ }
+
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ if (m_prevCodePoint == 0) {
+ // First code point has arrived. Needs one more code point to determine
+ // the grapheme boundary.
+ m_prevCodePoint = codePoint;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ }
+
+ // Slide the offset to the previous.
+ m_BoundaryOffset -= U16_LENGTH(m_prevCodePoint);
+ m_nextCodePoint = m_prevCodePoint;
+ m_prevCodePoint = codePoint;
+
+ if (Character::isRegionalIndicator(m_prevCodePoint)
+ && Character::isRegionalIndicator(m_nextCodePoint)) {
+ // If previous and next code point is regional indicator symbols, need
+ // to count the preceding regional indicator symbols to determine the
+ // grapheme boundary.
+ DCHECK_EQ(m_precedingRISCount, 0);
+ m_precedingRISCount = 1; // Count from 1 since m_prevCodePoint is RIS.
+ return startCountPrecedingRIS();
+ }
+
+ if (isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
+ return finish();
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+}
+
+TextSegmentationMachineState
+BackwardGraphemeBoundaryStateMachine::tellEndOfPrecedingText()
+{
+ DCHECK_NE(m_internalState, InternalState::Finished);
+ if (m_trailSurrogate != 0)
+ return finishWithBrokenSurrogatePair();
+
+ if (m_internalState == InternalState::CountPrecedingRIS) {
+ // Reached to the start of text during counting preceding regional
+ // indicator symbols. Check the boundary here.
+ DCHECK_GT(m_precedingRISCount, 0);
+ if (m_precedingRISCount % 2 != 0) {
+ m_BoundaryOffset -= 2;
+ }
+ m_precedingRISCount = 0;
+ return finish();
+ }
+
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
+ // Haven't search any preceding text. Should return 0.
+ DCHECK_EQ(m_BoundaryOffset, 0);
+ return finish();
+ }
+
+ if (m_nextCodePoint == 0
+ || !isGraphemeBreak(m_prevCodePoint, m_nextCodePoint)) {
+ // Reached to the start of text. Move offset to the start of text.
+ m_BoundaryOffset -= U16_LENGTH(m_prevCodePoint);
+ }
+ return finish();
+}
+
+TextSegmentationMachineState
+BackwardGraphemeBoundaryStateMachine::feedFollowingCodeUnit(UChar codeUnit)
+{
+ NOTREACHED();
+ return TextSegmentationMachineState::Invalid;
+}
+
+int BackwardGraphemeBoundaryStateMachine::finalizeAndGetBoundaryOffset()
+{
+ if (m_internalState != InternalState::Finished)
+ tellEndOfPrecedingText();
+ return m_BoundaryOffset;
+}
+
+TextSegmentationMachineState
+BackwardGraphemeBoundaryStateMachine::startCountPrecedingRIS()
+{
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary)
+ << "Unable to start RIS count from other than SearchBoundary";
+ m_internalState = InternalState::CountPrecedingRIS;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+}
+
+TextSegmentationMachineState BackwardGraphemeBoundaryStateMachine::finish()
+{
+ DCHECK_NE(m_internalState, InternalState::Finished);
+ m_internalState = InternalState::Finished;
+ return TextSegmentationMachineState::Finished;
+}
+
+TextSegmentationMachineState
+BackwardGraphemeBoundaryStateMachine::finishWithBrokenSurrogatePair()
+{
+ DCHECK_NE(m_internalState, InternalState::Finished);
+ if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
+ // Immediate preceding code point is invalid surrogate.
+ // Move the offset to the before of that invalid surrogate.
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ m_BoundaryOffset = -1;
+ return finish();
+ }
+ // Other than immediate preceding code point, treat broken surrogate pair as
+ // end of preceding text.
+ m_trailSurrogate = 0;
+ return tellEndOfPrecedingText();
+}
+
+void BackwardGraphemeBoundaryStateMachine::reset()
+{
+ m_trailSurrogate = 0;
+ m_nextCodePoint = 0;
+ m_prevCodePoint = 0;
+ m_BoundaryOffset = 0;
+ m_precedingRISCount = 0;
+ m_internalState = InternalState::SearchBoundary;
+}
+
+} // namespace blink

Powered by Google App Engine
This is Rietveld 408576698