| Index: third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
|
| diff --git a/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp b/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1bbc701f7f0ad07bf3d4ea04da51630c00df2b30
|
| --- /dev/null
|
| +++ b/third_party/WebKit/Source/core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.cpp
|
| @@ -0,0 +1,190 @@
|
| +// Copyright 2016 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "core/editing/state_machines/BackwardGraphemeBoundaryStateMachine.h"
|
| +
|
| +#include "core/editing/state_machines/StateMachineUtil.h"
|
| +#include "core/editing/state_machines/TextSegmentationMachineState.h"
|
| +#include "platform/fonts/Character.h"
|
| +#include "wtf/text/Unicode.h"
|
| +#include <ostream> // NOLINT
|
| +
|
| +namespace blink {
|
| +
|
| +enum class BackwardGraphemeBoundaryStateMachine::InternalState {
|
| + SearchBoundary, // Searching grapheme boundary.
|
| + CountPrecedingRIS, // Counting preceding regional indicator symbols.
|
| + Finished, // The state machine has finished.
|
| +};
|
| +
|
| +std::ostream& operator<<(std::ostream& os,
|
| + BackwardGraphemeBoundaryStateMachine::InternalState state) {
|
| + static const char* const texts[] = {
|
| + "SearchBoundary", "CountPrecedingRIS", "Finished",
|
| + };
|
| + const auto& it = std::begin(texts) + static_cast<size_t>(state);
|
| + DCHECK_GE(it, std::begin(texts)) << "Unknown backspace value";
|
| + DCHECK_LT(it, std::end(texts)) << "Unknown backspace value";
|
| + return os << *it;
|
| +}
|
| +
|
| +BackwardGraphemeBoundaryStateMachine::BackwardGraphemeBoundaryStateMachine()
|
| + : m_internalState(InternalState::SearchBoundary)
|
| +{
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +BackwardGraphemeBoundaryStateMachine::feedPrecedingCodeUnit(UChar codeUnit)
|
| +{
|
| + // TODO(nona): Extract common surrogate pair logic.
|
| + uint32_t codePoint = codeUnit;
|
| + if (U16_IS_LEAD(codeUnit)) {
|
| + if (m_trailSurrogate == 0) // Unpaired lead surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + codePoint = U16_GET_SUPPLEMENTARY(codeUnit, m_trailSurrogate);
|
| + m_trailSurrogate = 0;
|
| + } else if (U16_IS_TRAIL(codeUnit)) {
|
| + if (m_trailSurrogate != 0) // Unpaired trail surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + m_trailSurrogate = codeUnit;
|
| + // Needs surrogate lead.
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + } else {
|
| + if (m_trailSurrogate != 0) // Unpaired trail surrogate.
|
| + return finishWithBrokenSurrogatePair();
|
| + }
|
| +
|
| + if (m_internalState == InternalState::CountPrecedingRIS) {
|
| + if (Character::isRegionalIndicator(codePoint)) {
|
| + ++m_precedingRISCount;
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + }
|
| +
|
| + // If the number of preceding regional indicator symbols is even, the
|
| + // current offset is a grapheme boundary. If it is odd, previous offset
|
| + // is a boundary.
|
| + if (m_precedingRISCount % 2 != 0)
|
| + m_BoundaryOffset -= 2;
|
| + return finish();
|
| + }
|
| +
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + if (m_prevCodePoint == 0) {
|
| + // First code point has arrived. Needs one more code point to determine
|
| + // the grapheme boundary.
|
| + m_prevCodePoint = codePoint;
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| + }
|
| +
|
| + // Slide the offset to the previous.
|
| + m_BoundaryOffset -= U16_LENGTH(m_prevCodePoint);
|
| + m_nextCodePoint = m_prevCodePoint;
|
| + m_prevCodePoint = codePoint;
|
| +
|
| + if (Character::isRegionalIndicator(m_prevCodePoint)
|
| + && Character::isRegionalIndicator(m_nextCodePoint)) {
|
| + // If previous and next code point is regional indicator symbols, need
|
| + // to count the preceding regional indicator symbols to determine the
|
| + // grapheme boundary.
|
| + DCHECK_EQ(m_precedingRISCount, 0);
|
| + m_precedingRISCount = 1; // Count from 1 since m_prevCodePoint is RIS.
|
| + return startCountPrecedingRIS();
|
| + }
|
| +
|
| + if (isGraphemeBreak(m_prevCodePoint, m_nextCodePoint))
|
| + return finish();
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +BackwardGraphemeBoundaryStateMachine::tellEndOfPrecedingText()
|
| +{
|
| + DCHECK_NE(m_internalState, InternalState::Finished);
|
| + if (m_trailSurrogate != 0)
|
| + return finishWithBrokenSurrogatePair();
|
| +
|
| + if (m_internalState == InternalState::CountPrecedingRIS) {
|
| + // Reached to the start of text during counting preceding regional
|
| + // indicator symbols. Check the boundary here.
|
| + DCHECK_GT(m_precedingRISCount, 0);
|
| + if (m_precedingRISCount % 2 != 0) {
|
| + m_BoundaryOffset -= 2;
|
| + }
|
| + m_precedingRISCount = 0;
|
| + return finish();
|
| + }
|
| +
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
|
| + // Haven't search any preceding text. Should return 0.
|
| + DCHECK_EQ(m_BoundaryOffset, 0);
|
| + return finish();
|
| + }
|
| +
|
| + if (m_nextCodePoint == 0
|
| + || !isGraphemeBreak(m_prevCodePoint, m_nextCodePoint)) {
|
| + // Reached to the start of text. Move offset to the start of text.
|
| + m_BoundaryOffset -= U16_LENGTH(m_prevCodePoint);
|
| + }
|
| + return finish();
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +BackwardGraphemeBoundaryStateMachine::feedFollowingCodeUnit(UChar codeUnit)
|
| +{
|
| + NOTREACHED();
|
| + return TextSegmentationMachineState::Invalid;
|
| +}
|
| +
|
| +int BackwardGraphemeBoundaryStateMachine::finalizeAndGetBoundaryOffset()
|
| +{
|
| + if (m_internalState != InternalState::Finished)
|
| + tellEndOfPrecedingText();
|
| + return m_BoundaryOffset;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +BackwardGraphemeBoundaryStateMachine::startCountPrecedingRIS()
|
| +{
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary)
|
| + << "Unable to start RIS count from other than SearchBoundary";
|
| + m_internalState = InternalState::CountPrecedingRIS;
|
| + return TextSegmentationMachineState::NeedMoreCodeUnit;
|
| +}
|
| +
|
| +TextSegmentationMachineState BackwardGraphemeBoundaryStateMachine::finish()
|
| +{
|
| + DCHECK_NE(m_internalState, InternalState::Finished);
|
| + m_internalState = InternalState::Finished;
|
| + return TextSegmentationMachineState::Finished;
|
| +}
|
| +
|
| +TextSegmentationMachineState
|
| +BackwardGraphemeBoundaryStateMachine::finishWithBrokenSurrogatePair()
|
| +{
|
| + DCHECK_NE(m_internalState, InternalState::Finished);
|
| + if (m_prevCodePoint == 0 && m_nextCodePoint == 0) {
|
| + // Immediate preceding code point is invalid surrogate.
|
| + // Move the offset to the before of that invalid surrogate.
|
| + DCHECK_EQ(m_internalState, InternalState::SearchBoundary);
|
| + m_BoundaryOffset = -1;
|
| + return finish();
|
| + }
|
| + // Other than immediate preceding code point, treat broken surrogate pair as
|
| + // end of preceding text.
|
| + m_trailSurrogate = 0;
|
| + return tellEndOfPrecedingText();
|
| +}
|
| +
|
| +void BackwardGraphemeBoundaryStateMachine::reset()
|
| +{
|
| + m_trailSurrogate = 0;
|
| + m_nextCodePoint = 0;
|
| + m_prevCodePoint = 0;
|
| + m_BoundaryOffset = 0;
|
| + m_precedingRISCount = 0;
|
| + m_internalState = InternalState::SearchBoundary;
|
| +}
|
| +
|
| +} // namespace blink
|
|
|