Chromium Code Reviews| Index: Source/platform/fonts/ScriptRunIterator.cpp |
| diff --git a/Source/platform/fonts/ScriptRunIterator.cpp b/Source/platform/fonts/ScriptRunIterator.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e68a30eb54b056719f65d2c4c04ca5c5a3ce5b72 |
| --- /dev/null |
| +++ b/Source/platform/fonts/ScriptRunIterator.cpp |
| @@ -0,0 +1,371 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "config.h" |
| +#include "ScriptRunIterator.h" |
| + |
| +#include "platform/Logging.h" |
| +#include "wtf/Threading.h" |
| + |
| +#include <ubidi_props.h> |
| + |
| +namespace blink { |
| + |
| +typedef ScriptData::PairedBracketType PairedBracketType; |
| + |
| +ScriptData::~ScriptData() |
| +{ |
| +} |
| + |
| +const int ScriptData::kMaxScriptCount; |
| + |
| +void ICUScriptData::getScripts(UChar32 ch, Vector<UScriptCode>& dst) const |
| +{ |
| + |
| + UErrorCode status = U_ZERO_ERROR; |
| + // Leave room to insert primary script. It's not strictly necessary but |
| + // it ensures that the result won't ever be greater than kMaxScriptCount, |
| + // which some client someday might expect. |
| + dst.resize(kMaxScriptCount - 1); |
| + // Note, ICU convention is to return the number of available items |
| + // regardless of the capacity passed to the call. So count can be greater |
| + // than dst->size(), if a later version of the unicode data has more |
| + // than kMaxScriptCount items. |
| + int count = uscript_getScriptExtensions( |
| + ch, &dst[0], dst.size(), &status); |
| + if (status == U_BUFFER_OVERFLOW_ERROR) { |
| + // Allow this, we'll just use what we have. |
| + WTF_LOG_ERROR("Exceeded maximum script count of %d for 0x%x", kMaxScriptCount, ch); |
| + count = dst.size(); |
| + status = U_ZERO_ERROR; |
| + } |
| + UScriptCode primaryScript = uscript_getScript(ch, &status); |
| + |
| + if (U_FAILURE(status)) { |
| + WTF_LOG_ERROR("Could not get icu script data: %d for 0x%x", status, ch); |
| + dst.clear(); |
| + return; |
| + } |
| + |
| + dst.resize(count); |
| + |
| + if (primaryScript == dst.at(0)) { |
| + // Only one script (might be common or inherited -- these are never in |
| + // the extensions unless they're the only script), or extensions are in |
| + // priority order already. |
| + return; |
| + } |
| + |
| + if (primaryScript > USCRIPT_INHERITED) { |
| + // Not common or primary, with extensions that are not in order. We know |
| + // the primary, so we insert it at the front and swap the previous front |
| + // to somewhere else in the list. |
| + auto it = std::find(dst.begin() + 1, dst.end(), primaryScript); |
| + if (it == dst.end()) { |
| + dst.append(primaryScript); |
| + } |
| + std::swap(*dst.begin(), *it); |
| + return; |
| + } |
| + |
| + if (primaryScript == USCRIPT_COMMON) { |
| + if (count == 1) { |
| + // Common with a preferred script. Keep common at head. |
| + dst.prepend(primaryScript); |
| + return; |
| + } |
| + |
| + // Ignore common. Find the preferred script of the multiple scripts that |
| + // remain, and ensure it is at the head. Just keep swapping them in, |
| + // there aren't likely to be many. |
| + for (size_t i = 1; i < dst.size(); ++i) { |
| + if (dst.at(0) == USCRIPT_LATIN || dst.at(i) < dst.at(0)) { |
| + std::swap(dst.at(0), dst.at(i)); |
| + } |
| + } |
| + return; |
| + } |
| + |
| + // The primary is inherited, and there are other scripts. Put inherited at |
| + // the front, the true primary next, and then the others in random order. |
| + dst.append(dst.at(0)); |
| + dst.at(0) = primaryScript; |
| + for (size_t i = 2; i < dst.size(); ++i) { |
| + if (dst.at(1) == USCRIPT_LATIN || dst.at(i) < dst.at(1)) { |
| + std::swap(dst.at(1), dst.at(i)); |
| + } |
| + } |
| +} |
| + |
| +UChar32 ICUScriptData::getPairedBracket(UChar32 ch) const |
| +{ |
| + return u_getBidiPairedBracket(ch); |
| +} |
| + |
| +PairedBracketType ICUScriptData::getPairedBracketType(UChar32 ch) const |
| +{ |
| + return static_cast<PairedBracketType>( |
| + u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE)); |
| +} |
| + |
| +const ICUScriptData* ICUScriptData::instance() |
| +{ |
| + AtomicallyInitializedStaticReference(const ICUScriptData, icuScriptDataInstance, (new ICUScriptData())); |
| + return &icuScriptDataInstance; |
| +} |
| + |
| +ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length, const ScriptData* data) |
| + : m_text(text) |
| + , m_length(length) |
| + , m_bracketsFixupDepth(0) |
| + , |
| + // The initial value of m_aheadCharacter is not used, use ffff to be distinctive. |
|
eae
2015/08/28 21:10:24
Why FFFF instead of 0?
|
| + m_aheadCharacter(0xffff) |
| + , m_aheadPos(0) |
| + , m_commonPreferred(USCRIPT_COMMON) |
| + , m_scriptData(data) |
| +{ |
| + ASSERT(text); |
| + ASSERT(data); |
| + |
| + if (m_aheadPos < m_length) { |
| + m_currentSet.clear(); |
| + m_currentSet.append(USCRIPT_COMMON); |
|
eae
2015/08/28 21:10:24
Why would we append commonn to the set here?
|
| + U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter); |
| + m_scriptData->getScripts(m_aheadCharacter, m_aheadSet); |
| + } |
| +} |
| + |
| +ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length) |
| + : ScriptRunIterator(text, length, ICUScriptData::instance()) |
| +{ |
| +} |
| + |
| +bool ScriptRunIterator::consume(unsigned& limit, UScriptCode& script) |
| +{ |
| + if (m_currentSet.isEmpty()) { |
| + return false; |
| + } |
| + |
| + size_t pos; |
| + UChar32 ch; |
| + while (fetch(&pos, &ch)) { |
| + PairedBracketType paired_type = m_scriptData->getPairedBracketType(ch); |
| + switch (paired_type) { |
| + case PairedBracketType::OPEN: |
| + openBracket(ch); |
| + break; |
| + case PairedBracketType::CLOSE: |
| + closeBracket(ch); |
| + break; |
| + default: |
| + break; |
| + } |
| + if (!mergeSets()) { |
| + limit = pos; |
| + script = resolveCurrentScript(); |
| + fixupStack(script); |
| + m_currentSet = m_nextSet; |
| + return true; |
| + } |
| + } |
| + |
| + limit = m_length; |
| + script = resolveCurrentScript(); |
| + m_currentSet.clear(); |
| + return true; |
| +} |
| + |
| +void ScriptRunIterator::openBracket(UChar32 ch) |
| +{ |
| + if (m_brackets.size() == kMaxBrackets) { |
| + m_brackets.removeFirst(); |
| + if (m_bracketsFixupDepth == kMaxBrackets) { |
| + --m_bracketsFixupDepth; |
| + } |
| + } |
| + m_brackets.append(BracketRec({ ch, USCRIPT_COMMON })); |
| + ++m_bracketsFixupDepth; |
| +} |
| + |
| +void ScriptRunIterator::closeBracket(UChar32 ch) |
| +{ |
| + if (m_brackets.size() > 0) { |
| + UChar32 target = m_scriptData->getPairedBracket(ch); |
| + for (auto it = m_brackets.rbegin(); it != m_brackets.rend(); ++it) { |
| + if (it->ch == target) { |
| + // Have a match, use open paren's resolved script. |
| + UScriptCode script = it->script; |
| + m_nextSet.clear(); |
| + m_nextSet.append(script); |
| + |
| + // And pop stack to this point. |
| + int num_popped = std::distance(m_brackets.rbegin(), it); |
| + // TODO: No resize operation in WTF::Deque? |
| + for (int i = 0; i < num_popped; ++i) |
| + m_brackets.removeLast(); |
| + m_bracketsFixupDepth = std::max(0ul, m_bracketsFixupDepth - num_popped); |
| + return; |
| + } |
| + } |
| + } |
| + // leave stack alone, no match |
| +} |
| + |
| +// Keep items in current_set that are in next_set. |
| +// |
| +// If the sets are disjoint, return false and leave current_set unchanged. Else |
| +// return true and make current set the intersection. Make sure to maintain |
| +// current priority script as priority if it remains, else retain next priority |
| +// script if it remains. |
| +// |
| +// Also maintain a common preferred script. If current and next are both |
| +// common, and there is no common preferred script and next has a preferred |
| +// script, set the common preferred script to that of next. |
| +bool ScriptRunIterator::mergeSets() |
| +{ |
| + if (m_nextSet.isEmpty() || m_currentSet.isEmpty()) { |
| + return false; |
| + } |
| + |
| + auto cur_it = m_currentSet.begin(); |
|
eae
2015/08/28 21:10:24
curIt, or better yet, currentScript
|
| + auto cur_end = m_currentSet.end(); |
| + // Most of the time, this is the only one. |
| + // Advance the current iterator, we won't need to check it again later. |
| + UScriptCode priority_script = *cur_it++; |
| + |
| + // If next is common or inherited, the only thing that might change |
| + // is the common preferred script. |
| + if (m_nextSet.at(0) <= USCRIPT_INHERITED) { |
| + if (m_nextSet.size() == 2 && priority_script <= USCRIPT_INHERITED && m_commonPreferred == USCRIPT_COMMON) { |
| + m_commonPreferred = m_nextSet.at(1); |
| + } |
| + return true; |
| + } |
| + |
| + // If current is common or inherited, use the next script set. |
| + if (priority_script <= USCRIPT_INHERITED) { |
| + m_currentSet = m_nextSet; |
| + return true; |
| + } |
| + |
| + // Neither is common or inherited. If current is a singleton, |
| + // just see if it exists in the next set. This is the common case. |
| + auto next_it = m_nextSet.begin(); |
| + auto next_end = m_nextSet.end(); |
| + if (cur_it == cur_end) { |
| + return std::find(next_it, next_end, priority_script) != next_end; |
| + } |
| + |
| + // Establish the priority script, if we have one. |
| + // First try current priority script. |
| + bool have_priority = std::find(next_it, next_end, priority_script) |
| + != next_end; |
| + if (!have_priority) { |
| + // So try next priority script. |
| + // Skip the first current script, we already know it's not there. |
| + // Advance the next iterator, later we won't need to check it again. |
| + priority_script = *next_it++; |
| + have_priority = std::find(cur_it, cur_end, priority_script) != cur_end; |
| + } |
| + |
| + // Note that we can never write more scripts into the current vector than |
| + // it already contains, so cur_write_it won't ever exceed the size/capacity. |
| + auto cur_write_it = m_currentSet.begin(); |
| + if (have_priority) { |
| + // keep the priority script. |
| + *cur_write_it++ = priority_script; |
| + } |
| + |
| + if (next_it != next_end) { |
| + // Iterate over the remaining current scripts, and keep them if |
| + // they occur in the remaining next scripts. |
| + while (cur_it != cur_end) { |
| + UScriptCode sc = *cur_it++; |
| + if (std::find(next_it, next_end, sc) != next_end) { |
| + *cur_write_it++ = sc; |
| + } |
| + } |
| + } |
| + |
| + // Only change current if the run continues. |
| + int written = std::distance(m_currentSet.begin(), cur_write_it); |
| + if (written > 0) { |
| + m_currentSet.resize(written); |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +// When we hit the end of the run, and resolve the script, we now know the |
| +// resolved script of any open bracket that was pushed on the stack since |
| +// the start of the run. Fixup depth records how many of these there |
| +// were. We've maintained this count during pushes, and taken care to |
| +// adjust it if the stack got overfull and open brackets were pushed off |
| +// the bottom. This sets the script of the fixup_depth topmost entries of the |
| +// stack to the resolved script. |
| +void ScriptRunIterator::fixupStack(UScriptCode resolved_script) |
| +{ |
| + if (m_bracketsFixupDepth > 0) { |
| + if (m_bracketsFixupDepth > m_brackets.size()) { |
| + // Should never happen unless someone breaks the code. |
| + WTF_LOG_ERROR("Brackets fixup depth exceeds size of bracket vector."); |
| + m_bracketsFixupDepth = m_brackets.size(); |
| + } |
| + auto it = m_brackets.rbegin(); |
| + for (size_t i = 0; i < m_bracketsFixupDepth; ++i) { |
| + it->script = resolved_script; |
| + ++it; |
| + } |
| + m_bracketsFixupDepth = 0; |
| + } |
| +} |
| + |
| +bool ScriptRunIterator::fetch(size_t* pos, UChar32* ch) |
| +{ |
| + if (m_aheadPos > m_length) { |
| + return false; |
| + } |
| + *pos = m_aheadPos - (m_aheadCharacter >= 0x10000 ? 2 : 1); |
| + *ch = m_aheadCharacter; |
| + |
| + m_nextSet.swap(m_aheadSet); |
| + if (m_aheadPos == m_length) { |
| + // No more data to fetch, but last character still needs to be |
| + // processed. Advance m_aheadPos so that next time we will know |
| + // this has been done. |
| + m_aheadPos++; |
| + return true; |
| + } |
| + |
| + U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter); |
| + m_scriptData->getScripts(m_aheadCharacter, m_aheadSet); |
| + if (m_aheadSet.isEmpty()) { |
| + // No scripts for this character. This has already been logged, so |
| + // we just terminate processing this text. |
| + return false; |
| + } |
| + if (m_aheadSet[0] == USCRIPT_INHERITED && m_aheadSet.size() > 1) { |
| + if (m_nextSet[0] == USCRIPT_COMMON) { |
| + // Overwrite the next set with the non-inherited portion of the set. |
| + m_nextSet = m_aheadSet; |
| + m_nextSet.remove(0); |
| + // Discard the remaining values, we'll inherit. |
| + m_aheadSet.resize(1); |
| + } |
| + else { |
| + // Else, this applies to anything. |
| + m_aheadSet.resize(1); |
| + } |
| + } |
| + return true; |
| +} |
| + |
| +UScriptCode ScriptRunIterator::resolveCurrentScript() const |
| +{ |
| + UScriptCode result = m_currentSet.at(0); |
| + return result == USCRIPT_COMMON ? m_commonPreferred : result; |
| +} |
| + |
| +} // namespace blink |