| Index: third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
|
| diff --git a/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp b/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..6aa04e01ae4cdb3ef2f9ce360af69fb6cf15f445
|
| --- /dev/null
|
| +++ b/third_party/WebKit/Source/core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.cpp
|
| @@ -0,0 +1,206 @@
|
| +// Copyright 2016 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "core/editing/state_machines/ForwardGraphemeBoundaryStateMachine.h"
|
| +
|
| +#include "core/editing/state_machines/StateMachineUtil.h"
|
| +#include "core/editing/state_machines/TextSegmentationMachineState.h"
|
| +#include "platform/fonts/Character.h"
|
| +#include "wtf/text/Unicode.h"
|
| +#include <ostream> // NOLINT
|
| +
|
| +namespace blink {
|
| +
|
| +enum class ForwardGraphemeBoundaryStateMachine::InternalState {
|
| + CountPrecedingRIS, // Counting preceding regional indicator symbols.
|
| + SearchBoundary, // Searching grapheme cluster boundary.
|
| + Finished, // The state machine has finished.
|
| +};
|
| +
|
| +std::ostream& operator<<(std::ostream& os,
|
| + ForwardGraphemeBoundaryStateMachine::InternalState state) {
|
| + static const char* const texts[] = {
|
| + "CountPrecedingRIS", "SearchBoundary", "Finished",
|
| + };
|
| + const auto& it = std::begin(texts) + static_cast<size_t>(state);
|
| + DCHECK_GE(it, std::begin(texts)) << "Unknown backspace value";
|
| + DCHECK_LT(it, std::end(texts)) << "Unknown backspace value";
|
| + return os << *it;
|
| +}
|
| +
|
| +ForwardGraphemeBoundaryStateMachine::ForwardGraphemeBoundaryStateMachine()
|
| + : m_internalState(InternalState::CountPrecedingRIS)
|
| +{
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
|
| + // TODO(nona): Extract common surrogate pair logic.
|
| + uint32_t codePoint = codeUnit;
|
| + if (U16_IS_LEAD(codeUnit)) {
|
| + if (m_pendingCodeUnit == 0) // Unpaired lead surrogate.
|
| + return startSearchingBoundary();
|
| + codePoint = U16_GET_SUPPLEMENTARY(codeUnit, m_pendingCodeUnit);
|
| + m_pendingCodeUnit = 0;
|
| + } else if (U16_IS_TRAIL(codeUnit)) {
|
| + if (m_pendingCodeUnit != 0) // Unpaired trail surrogate.
|
| + return startSearchingBoundary();
|
| + m_pendingCodeUnit = codeUnit;
|
| + // Needs surrogate lead.
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + } else {
|
| + if (m_pendingCodeUnit != 0) // Unpaired trail surrogate.
|
| + return startSearchingBoundary();
|
| + }
|
| +
|
| + // Count the preceding regional indicator symbols for determining boundary
|
| + // later.
|
| + if (Character::isRegionalIndicator(codePoint)) {
|
| + ++m_precedingRISCount;
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + }
|
| + return startSearchingBoundary();
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::feedFollowingCodeUnit(UChar codeUnit)
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + uint32_t codePoint = codeUnit;
|
| + if (U16_IS_TRAIL(codeUnit)) {
|
| + if (m_pendingCodeUnit == 0) // Unpaired trail surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + codePoint = U16_GET_SUPPLEMENTARY(m_pendingCodeUnit, codeUnit);
|
| + m_pendingCodeUnit = 0;
|
| + } else if (U16_IS_LEAD(codeUnit)) {
|
| + if (m_pendingCodeUnit != 0) // Unpaired lead surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + m_pendingCodeUnit = codeUnit;
|
| + // Needs surrogate trail.
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + } else {
|
| + if (m_pendingCodeUnit != 0) // Unpaired lead surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + }
|
| +
|
| + if (m_nextCodePoint == 0) {
|
| + // First code point has arrived. Needs one more code point to determine
|
| + // grapheme boundary.
|
| + m_nextCodePoint = codePoint;
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + }
|
| +
|
| + // Slide the offset to the next.
|
| + m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
|
| + m_prevCodePoint = m_nextCodePoint;
|
| + m_nextCodePoint = codePoint;
|
| +
|
| + if (Character::isRegionalIndicator(m_prevCodePoint)
|
| + && Character::isRegionalIndicator(m_nextCodePoint)) {
|
| + // If both previous/next code points are regional indicator symbol, need
|
| + // to check preceding regional indicator symbols. If it is even, current
|
| + // offset is the boundary. If it is odd, the next offset is the
|
| + // boundary.
|
| + if (m_precedingRISCount % 2 == 0) {
|
| + // Odd numbered regional indicator symbols. Note that
|
| + // m_precedingRISCount doesn't include m_prevCodePoint.
|
| + m_boundaryOffset += 2;
|
| + }
|
| + return finish();
|
| + }
|
| +
|
| + if (isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
|
| + return finish();
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::tellEndOfPrecedingText()
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
|
| + return startSearchingBoundary();
|
| +}
|
| +
|
| +int ForwardGraphemeBoundaryStateMachine::finalizeAndGetBoundaryOffset()
|
| +{
|
| + if (m_internalState == InternalState::CountPrecedingRIS)
|
| + finish(); // Haven't search anything to forward. Just finish.
|
| + else if (m_internalState == InternalState::SearchBoundary)
|
| + finishWithEndOfText();
|
| + return m_boundaryOffset;
|
| +}
|
| +
|
| +void ForwardGraphemeBoundaryStateMachine::reset()
|
| +{
|
| + m_pendingCodeUnit = 0;
|
| + m_boundaryOffset = 0;
|
| + m_precedingRISCount = 0;
|
| + m_prevCodePoint = 0;
|
| + m_nextCodePoint = 0;
|
| + m_internalState = InternalState::CountPrecedingRIS;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::startSearchingBoundary()
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::CountPrecedingRIS);
|
| + m_pendingCodeUnit = 0;
|
| + m_internalState = InternalState::SearchBoundary;
|
| + return TextSegmentationMachineState::NeedFollowingCodeUnit;
|
| +}
|
| +
|
| +TextSegmentationMachineState ForwardGraphemeBoundaryStateMachine::finish()
|
| +{
|
| + DCHECK_NE(m_internalState, InternalState::Finished);
|
| + m_internalState = InternalState::Finished;
|
| + return TextSegmentationMachineState::Finished;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::finishWithBrokenSurrogatePair()
|
| +{
|
| + DCHECK_NE(m_internalState, InternalState::Finished);
|
| + if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
|
| + // Immediate following code point is invalid surrogate.
|
| + // Move the offset to the after of that invalid surrogate.
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + m_boundaryOffset = 1;
|
| + return finish();
|
| + }
|
| + // Other than immediate following code point, treat broken surrogate pair as
|
| + // end of text.
|
| + m_pendingCodeUnit = 0;
|
| + return finishWithEndOfText();
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +ForwardGraphemeBoundaryStateMachine::finishWithEndOfText()
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + if (m_pendingCodeUnit != 0)
|
| + return finishWithBrokenSurrogatePair();
|
| +
|
| + if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
|
| + // Haven't search any followihg text. Should be 0.
|
| + DCHECK_EQ(m_boundaryOffset, 0);
|
| + m_boundaryOffset = 1;
|
| + return finish();
|
| + }
|
| +
|
| + // Only gives one code point. Move to the end of that code point.
|
| + if (m_prevCodePoint == 0) {
|
| + m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
|
| + return finish();
|
| + }
|
| +
|
| + // Reached to the end of text. Move offset to the end of text.
|
| + if (!isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
|
| + m_boundaryOffset += U16_LENGTH(m_nextCodePoint);
|
| +
|
| + // Finishes the state machine.
|
| + return finish();
|
| +}
|
| +} // namespace blink
|
|
|