Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(243)

Unified Diff: third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp

Issue 1833413002: [All-in-one patch] Implement own grapheme boundary breaker for editing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Upload All-in-one patch Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
diff --git a/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp b/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6aa04e01ae4cdb3ef2f9ce360af69fb6cf15f445
--- /dev/null
+++ b/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
@@ -0,0 +1,206 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.h"
+
+#include "core/editing/state_machines/StateMachineUtil.h"
+#include "core/editing/state_machines/TextSegmentationMachineState.h"
+#include "platform/fonts/Character.h"
+#include "wtf/text/Unicode.h"
+#include <ostream> // NOLINT
+
+namespace blink {
+
+enum class ForwardGraphemeBoundaryStateMachine::InternalState {
+ CountPrecedingRIS, // Counting preceding regional indicator symbols.
+ SearchBoundary, // Searching grapheme cluster boundary.
+ Finished, // The state machine has finished.
+};
+
+std::ostream& operator<<(std::ostream& os,
+ ForwardGraphemeBoundaryStateMachine::InternalState state) {
+ static const char* const texts[] = {
+ "CountPrecedingRIS", "SearchBoundary", "Finished",
+ };
+ const auto& it = std::begin(texts) + static_cast<size_t>(state);
+ DCHECK_GE(it, std::begin(texts)) << "Unknown backspace value";
+ DCHECK_LT(it, std::end(texts)) << "Unknown backspace value";
+ return os << *it;
+}
+
+ForwardGraphemeBoundaryStateMachine::ForwardGraphemeBoundaryStateMachine()
+ : m_internalState(InternalState::CountPrecedingRIS)
+{
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
+{
+ DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
+ // TODO(nona): Extract common surrogate pair logic.
+ uint32_t codePoint = codeUnit;
+ if (U16_IS_LEAD(codeUnit)) {
+ if (m_pendingCodeUnit == 0) // Unpaired lead surrogate.
+ return startSearchingBoundary();
+ codePoint = U16_GET_SUPPLEMENTARY(codeUnit, m_pendingCodeUnit);
+ m_pendingCodeUnit = 0;
+ } else if (U16_IS_TRAIL(codeUnit)) {
+ if (m_pendingCodeUnit != 0) // Unpaired trail surrogate.
+ return startSearchingBoundary();
+ m_pendingCodeUnit = codeUnit;
+ // Needs surrogate lead.
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ } else {
+ if (m_pendingCodeUnit != 0) // Unpaired trail surrogate.
+ return startSearchingBoundary();
+ }
+
+ // Count the preceding regional indicator symbols for determining boundary
+ // later.
+ if (Character::isRegionalIndicator(codePoint)) {
+ ++m_precedingRISCount;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ }
+ return startSearchingBoundary();
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::feedFollowingCodeUnit(UChar codeUnit)
+{
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ uint32_t codePoint = codeUnit;
+ if (U16_IS_TRAIL(codeUnit)) {
+ if (m_pendingCodeUnit == 0) // Unpaired trail surrogate.
+ return finishWithBrokenSurrogatePair();
+ codePoint = U16_GET_SUPPLEMENTARY(m_pendingCodeUnit, codeUnit);
+ m_pendingCodeUnit = 0;
+ } else if (U16_IS_LEAD(codeUnit)) {
+ if (m_pendingCodeUnit != 0) // Unpaired lead surrogate.
+ return finishWithBrokenSurrogatePair();
+ m_pendingCodeUnit = codeUnit;
+ // Needs surrogate trail.
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ } else {
+ if (m_pendingCodeUnit != 0) // Unpaired lead surrogate.
+ return finishWithBrokenSurrogatePair();
+ }
+
+ if (m_nextCodePoint == 0) {
+ // First code point has arrived. Needs one more code point to determine
+ // grapheme boundary.
+ m_nextCodePoint = codePoint;
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+ }
+
+ // Slide the offset to the next.
+ m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
+ m_prevCodePoint = m_nextCodePoint;
+ m_nextCodePoint = codePoint;
+
+ if (Character::isRegionalIndicator(m_prevCodePoint)
+ && Character::isRegionalIndicator(m_nextCodePoint)) {
+ // If both previous/next code points are regional indicator symbol, need
+ // to check preceding regional indicator symbols. If it is even, current
+ // offset is the boundary. If it is odd, the next offset is the
+ // boundary.
+ if (m_precedingRISCount % 2 == 0) {
+ // Odd numbered regional indicator symbols. Note that
+ // m_precedingRISCount doesn't include m_prevCodePoint.
+ m_boundaryOffset += 2;
+ }
+ return finish();
+ }
+
+ if (isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
+ return finish();
+ return TextSegmentationMachineState::NeedMoreCodeUnit;
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::tellEndOfPrecedingText()
+{
+ DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
+ return startSearchingBoundary();
+}
+
+int ForwardGraphemeBoundaryStateMachine::finalizeAndGetBoundaryOffset()
+{
+ if (m_internalState == InternalState::CountPrecedingRIS)
+ finish(); // Haven't search anything to forward. Just finish.
+ else if (m_internalState == InternalState::SearchBoundary)
+ finishWithEndOfText();
+ return m_boundaryOffset;
+}
+
+void ForwardGraphemeBoundaryStateMachine::reset()
+{
+ m_pendingCodeUnit = 0;
+ m_boundaryOffset = 0;
+ m_precedingRISCount = 0;
+ m_prevCodePoint = 0;
+ m_nextCodePoint = 0;
+ m_internalState = InternalState::CountPrecedingRIS;
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::startSearchingBoundary()
+{
+ DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
+ m_pendingCodeUnit = 0;
+ m_internalState = InternalState::SearchBoundary;
+ return TextSegmentationMachineState::NeedFollowingCodeUnit;
+}
+
+TextSegmentationMachineState ForwardGraphemeBoundaryStateMachine::finish()
+{
+ DCHECK_NE(m_internalState, InternalState::Finished);
+ m_internalState = InternalState::Finished;
+ return TextSegmentationMachineState::Finished;
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::finishWithBrokenSurrogatePair()
+{
+ DCHECK_NE(m_internalState, InternalState::Finished);
+ if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
+ // Immediate following code point is invalid surrogate.
+ // Move the offset to the after of that invalid surrogate.
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ m_boundaryOffset = 1;
+ return finish();
+ }
+ // Other than immediate following code point, treat broken surrogate pair as
+ // end of text.
+ m_pendingCodeUnit = 0;
+ return finishWithEndOfText();
+}
+
+TextSegmentationMachineState
+ForwardGraphemeBoundaryStateMachine::finishWithEndOfText()
+{
+ DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
+ if (m_pendingCodeUnit != 0)
+ return finishWithBrokenSurrogatePair();
+
+ if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
+ // Haven't search any followihg text. Should be 0.
+ DCHECK_EQ(m_boundaryOffset, 0);
+ m_boundaryOffset = 1;
+ return finish();
+ }
+
+ // Only gives one code point. Move to the end of that code point.
+ if (m_prevCodePoint == 0) {
+ m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
+ return finish();
+ }
+
+ // Reached to the end of text. Move offset to the end of text.
+ if (!isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
+ m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
+
+ // Finishes the state machine.
+ return finish();
+}
+} // namespace blink

Powered by Google App Engine
This is Rietveld 408576698