 Chromium Code Reviews
 Chromium Code Reviews Issue 1397423004:
  Improve shaping segmentation for grapheme cluster based font fallback  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master
    
  
    Issue 1397423004:
  Improve shaping segmentation for grapheme cluster based font fallback  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master| Index: third_party/WebKit/Source/platform/fonts/shaping/HarfBuzzShaper.cpp | 
| diff --git a/third_party/WebKit/Source/platform/fonts/shaping/HarfBuzzShaper.cpp b/third_party/WebKit/Source/platform/fonts/shaping/HarfBuzzShaper.cpp | 
| index 6e47ccf7442173fb82c1ed7d5f1366239720b404..1877f0d29fcb520f263500fa5af9d34075cbedf1 100644 | 
| --- a/third_party/WebKit/Source/platform/fonts/shaping/HarfBuzzShaper.cpp | 
| +++ b/third_party/WebKit/Source/platform/fonts/shaping/HarfBuzzShaper.cpp | 
| @@ -34,12 +34,15 @@ | 
| #include "hb.h" | 
| #include "platform/LayoutUnit.h" | 
| +#include "platform/Logging.h" | 
| #include "platform/RuntimeEnabledFeatures.h" | 
| #include "platform/fonts/Character.h" | 
| #include "platform/fonts/Font.h" | 
| +#include "platform/fonts/FontFallbackIterator.h" | 
| #include "platform/fonts/GlyphBuffer.h" | 
| #include "platform/fonts/UTF16TextIterator.h" | 
| #include "platform/fonts/shaping/HarfBuzzFace.h" | 
| +#include "platform/fonts/shaping/RunSegmenter.h" | 
| #include "platform/text/TextBreakIterator.h" | 
| #include "wtf/Compiler.h" | 
| #include "wtf/MathExtras.h" | 
| @@ -779,203 +782,6 @@ void HarfBuzzShaper::setFontFeatures() | 
| } | 
| } | 
| -PassRefPtr<ShapeResult> HarfBuzzShaper::shapeResult() | 
| -{ | 
| - if (!createHarfBuzzRuns()) | 
| - return nullptr; | 
| - return shapeHarfBuzzRuns(); | 
| -} | 
| - | 
| -struct CandidateRun { | 
| - UChar32 character; | 
| - unsigned start; | 
| - unsigned end; | 
| - const SimpleFontData* fontData; | 
| - UScriptCode script; | 
| -}; | 
| - | 
| -static inline bool collectCandidateRuns(const UChar* normalizedBuffer, | 
| - size_t bufferLength, const Font* font, Vector<CandidateRun>* runs, bool isSpaceNormalize) | 
| -{ | 
| - UTF16TextIterator iterator(normalizedBuffer, bufferLength); | 
| - UChar32 character; | 
| - unsigned startIndexOfCurrentRun = 0; | 
| - | 
| - if (!iterator.consume(character)) | 
| - return false; | 
| - | 
| - const SimpleFontData* nextFontData = font->glyphDataForCharacter(character, false, isSpaceNormalize).fontData; | 
| - UErrorCode errorCode = U_ZERO_ERROR; | 
| - UScriptCode nextScript = uscript_getScript(character, &errorCode); | 
| - if (U_FAILURE(errorCode)) | 
| - return false; | 
| - | 
| - do { | 
| - const UChar* currentCharacterPosition = iterator.characters(); | 
| - const SimpleFontData* currentFontData = nextFontData; | 
| - UScriptCode currentScript = nextScript; | 
| - | 
| - UChar32 lastCharacter = character; | 
| - for (iterator.advance(); iterator.consume(character); iterator.advance()) { | 
| - if (Character::treatAsZeroWidthSpace(character)) | 
| - continue; | 
| - if ((U_GET_GC_MASK(character) & U_GC_M_MASK) | 
| - && (Character::isUnicodeVariationSelector(character) | 
| - || currentFontData->canRenderCombiningCharacterSequence( | 
| - currentCharacterPosition, | 
| - iterator.glyphEnd() - currentCharacterPosition))) | 
| - continue; | 
| - | 
| - nextFontData = font->glyphDataForCharacter(character, false, isSpaceNormalize).fontData; | 
| - nextScript = uscript_getScript(character, &errorCode); | 
| - if (U_FAILURE(errorCode)) | 
| - return false; | 
| - if (lastCharacter == zeroWidthJoinerCharacter) | 
| - currentFontData = nextFontData; | 
| - if ((nextFontData != currentFontData) || ((currentScript != nextScript) && (nextScript != USCRIPT_INHERITED) && (!uscript_hasScript(character, currentScript)))) | 
| - break; | 
| - currentCharacterPosition = iterator.characters(); | 
| - lastCharacter = character; | 
| - } | 
| - | 
| - CandidateRun run = { lastCharacter, startIndexOfCurrentRun, static_cast<unsigned>(iterator.offset()), currentFontData, currentScript }; | 
| - runs->append(run); | 
| - | 
| - startIndexOfCurrentRun = iterator.offset(); | 
| - } while (iterator.consume(character)); | 
| - | 
| - return true; | 
| -} | 
| - | 
| -static inline bool matchesAdjacentRun(UScriptCode* scriptExtensions, int length, | 
| - CandidateRun& adjacentRun) | 
| -{ | 
| - for (int i = 0; i < length; i++) { | 
| - if (scriptExtensions[i] == adjacentRun.script) | 
| - return true; | 
| - } | 
| - return false; | 
| -} | 
| - | 
| -static inline void resolveRunBasedOnScriptExtensions(Vector<CandidateRun>& runs, | 
| - CandidateRun& run, size_t i, size_t length, UScriptCode* scriptExtensions, | 
| - int extensionsLength, size_t& nextResolvedRun) | 
| -{ | 
| - // If uscript_getScriptExtensions returns 1 it only contains the script value, | 
| - // we only care about ScriptExtensions which is indicated by a value >= 2. | 
| - if (extensionsLength <= 1) | 
| - return; | 
| - | 
| - if (i > 0 && matchesAdjacentRun(scriptExtensions, extensionsLength, runs[i - 1])) { | 
| - run.script = runs[i - 1].script; | 
| - return; | 
| - } | 
| - | 
| - for (size_t j = i + 1; j < length; j++) { | 
| - if (runs[j].script != USCRIPT_COMMON | 
| - && runs[j].script != USCRIPT_INHERITED | 
| - && matchesAdjacentRun(scriptExtensions, extensionsLength, runs[j])) { | 
| - nextResolvedRun = j; | 
| - break; | 
| - } | 
| - } | 
| -} | 
| - | 
| -static inline void resolveRunBasedOnScriptValue(Vector<CandidateRun>& runs, | 
| - CandidateRun& run, size_t i, size_t length, size_t& nextResolvedRun) | 
| -{ | 
| - if (run.script != USCRIPT_COMMON) | 
| - return; | 
| - | 
| - if (i > 0 && runs[i - 1].script != USCRIPT_COMMON) { | 
| - run.script = runs[i - 1].script; | 
| - return; | 
| - } | 
| - | 
| - for (size_t j = i + 1; j < length; j++) { | 
| - if (runs[j].script != USCRIPT_COMMON | 
| - && runs[j].script != USCRIPT_INHERITED) { | 
| - nextResolvedRun = j; | 
| - break; | 
| - } | 
| - } | 
| -} | 
| - | 
| -static inline bool resolveCandidateRuns(Vector<CandidateRun>& runs) | 
| -{ | 
| - UScriptCode scriptExtensions[USCRIPT_CODE_LIMIT]; | 
| - UErrorCode errorCode = U_ZERO_ERROR; | 
| - size_t length = runs.size(); | 
| - for (size_t i = 0; i < length; i++) { | 
| - CandidateRun& run = runs[i]; | 
| - size_t nextResolvedRun = 0; | 
| - | 
| - if (run.script == USCRIPT_INHERITED) | 
| - run.script = i > 0 ? runs[i - 1].script : USCRIPT_COMMON; | 
| - | 
| - int extensionsLength = uscript_getScriptExtensions(run.character, | 
| - scriptExtensions, sizeof(scriptExtensions) / sizeof(scriptExtensions[0]), | 
| - &errorCode); | 
| - if (U_FAILURE(errorCode)) | 
| - return false; | 
| - | 
| - resolveRunBasedOnScriptExtensions(runs, run, i, length, | 
| - scriptExtensions, extensionsLength, nextResolvedRun); | 
| - resolveRunBasedOnScriptValue(runs, run, i, length, | 
| - nextResolvedRun); | 
| - for (size_t j = i; j < nextResolvedRun; j++) | 
| - runs[j].script = runs[nextResolvedRun].script; | 
| - | 
| - i = std::max(i, nextResolvedRun); | 
| - } | 
| - return true; | 
| -} | 
| - | 
| -// For ideographic (CJK) documents, 90-95% of calls from width() are one character length | 
| -// because most characters have break opportunities both before and after. | 
| -bool HarfBuzzShaper::createHarfBuzzRunsForSingleCharacter() | 
| -{ | 
| - ASSERT(m_normalizedBufferLength == 1); | 
| - UChar32 character = m_normalizedBuffer[0]; | 
| - if (!U16_IS_SINGLE(character)) | 
| - return false; | 
| - const SimpleFontData* fontData = m_font->glyphDataForCharacter(character, false, m_textRun.normalizeSpace()).fontData; | 
| - UErrorCode errorCode = U_ZERO_ERROR; | 
| - UScriptCode script = uscript_getScript(character, &errorCode); | 
| - if (U_FAILURE(errorCode)) | 
| - return false; | 
| - addHarfBuzzRun(0, 1, fontData, script); | 
| - return true; | 
| -} | 
| - | 
| -bool HarfBuzzShaper::createHarfBuzzRuns() | 
| -{ | 
| - if (m_normalizedBufferLength == 1) | 
| - return createHarfBuzzRunsForSingleCharacter(); | 
| - | 
| - Vector<CandidateRun> candidateRuns; | 
| - if (!collectCandidateRuns(m_normalizedBuffer.get(), | 
| - m_normalizedBufferLength, m_font, &candidateRuns, m_textRun.normalizeSpace())) | 
| - return false; | 
| - | 
| - if (!resolveCandidateRuns(candidateRuns)) | 
| - return false; | 
| - | 
| - size_t length = candidateRuns.size(); | 
| - for (size_t i = 0; i < length; ) { | 
| - CandidateRun& run = candidateRuns[i]; | 
| - CandidateRun lastMatchingRun = run; | 
| - for (i++; i < length; i++) { | 
| - if (candidateRuns[i].script != run.script | 
| - || candidateRuns[i].fontData != run.fontData) | 
| - break; | 
| - lastMatchingRun = candidateRuns[i]; | 
| - } | 
| - addHarfBuzzRun(run.start, lastMatchingRun.end, run.fontData, run.script); | 
| - } | 
| - return !m_harfBuzzRuns.isEmpty(); | 
| -} | 
| - | 
| // A port of hb_icu_script_to_script because harfbuzz on CrOS is built | 
| // without hb-icu. See http://crbug.com/356929 | 
| static inline hb_script_t ICUScriptToHBScript(UScriptCode script) | 
| @@ -992,22 +798,6 @@ static inline hb_direction_t TextDirectionToHBDirection(TextDirection dir, FontO | 
| return dir == RTL ? HB_DIRECTION_REVERSE(harfBuzzDirection) : harfBuzzDirection; | 
| } | 
| -void HarfBuzzShaper::addHarfBuzzRun(unsigned startCharacter, | 
| - unsigned endCharacter, const SimpleFontData* fontData, | 
| - UScriptCode script) | 
| -{ | 
| - ASSERT(endCharacter > startCharacter); | 
| - ASSERT(script != USCRIPT_INVALID_CODE); | 
| - | 
| - hb_direction_t direction = TextDirectionToHBDirection(m_textRun.direction(), | 
| - m_font->fontDescription().orientation(), fontData); | 
| - HarfBuzzRun harfBuzzRun = { | 
| - fontData, startCharacter, endCharacter - startCharacter, | 
| - direction, ICUScriptToHBScript(script) | 
| - }; | 
| - m_harfBuzzRuns.append(harfBuzzRun); | 
| -} | 
| - | 
| static const uint16_t* toUint16(const UChar* src) | 
| { | 
| // FIXME: This relies on undefined behavior however it works on the | 
| @@ -1021,8 +811,11 @@ static inline void addToHarfBuzzBufferInternal(hb_buffer_t* buffer, | 
| const FontDescription& fontDescription, const UChar* normalizedBuffer, | 
| unsigned startIndex, unsigned numCharacters) | 
| { | 
| - if (fontDescription.variant() == FontVariantSmallCaps | 
| - && u_islower(normalizedBuffer[startIndex])) { | 
| + // TODO: Revisit whether we can always fill the hb_buffer_t with the | 
| + // full run text, but only specify startIndex and numCharacters for the part | 
| + // to be shaped. Then simplify/change the complicated index computations in | 
| + // extractShapeResults(). | 
| + if (fontDescription.variant() == FontVariantSmallCaps) { | 
| String upperText = String(normalizedBuffer + startIndex, numCharacters) | 
| .upper(); | 
| // TextRun is 16 bit, therefore upperText is 16 bit, even after we call | 
| @@ -1036,72 +829,303 @@ static inline void addToHarfBuzzBufferInternal(hb_buffer_t* buffer, | 
| } | 
| } | 
| -PassRefPtr<ShapeResult> HarfBuzzShaper::shapeHarfBuzzRuns() | 
| +inline bool HarfBuzzShaper::shapeRange(hb_buffer_t* harfBuzzBuffer, | 
| + int startIndex, | 
| + int numCharacters, | 
| + const SimpleFontData* currentFont, | 
| + UScriptCode currentRunScript, | 
| + hb_language_t language) | 
| { | 
| - RefPtr<ShapeResult> result = ShapeResult::create(m_font, | 
| - m_normalizedBufferLength, m_textRun.direction()); | 
| - HarfBuzzScopedPtr<hb_buffer_t> harfBuzzBuffer(hb_buffer_create(), hb_buffer_destroy); | 
| + FontPlatformData* platformData = const_cast<FontPlatformData*>(&(currentFont->platformData())); | 
| 
eae
2015/10/14 08:12:00
Is this const_cast really needed? harfBuzzFace() i
 | 
| + HarfBuzzFace* face = platformData->harfBuzzFace(); | 
| + if (!face) { | 
| + WTF_LOG_ERROR("Could not create HarfBuzzFace from FontPlatformData."); | 
| + return false; | 
| + } | 
| - const FontDescription& fontDescription = m_font->fontDescription(); | 
| - const String& localeString = fontDescription.locale(); | 
| - CString locale = localeString.latin1(); | 
| - const hb_language_t language = hb_language_from_string(locale.data(), locale.length()); | 
| + hb_buffer_set_language(harfBuzzBuffer, language); | 
| + hb_buffer_set_script(harfBuzzBuffer, ICUScriptToHBScript(currentRunScript)); | 
| + hb_buffer_set_direction(harfBuzzBuffer, TextDirectionToHBDirection(m_textRun.direction(), | 
| + m_font->fontDescription().orientation(), currentFont)); | 
| - result->m_runs.resize(m_harfBuzzRuns.size()); | 
| - for (unsigned i = 0; i < m_harfBuzzRuns.size(); ++i) { | 
| - unsigned runIndex = m_textRun.rtl() ? m_harfBuzzRuns.size() - i - 1 : i; | 
| - const HarfBuzzRun* currentRun = &m_harfBuzzRuns[runIndex]; | 
| + // Add a space as pre-context to the buffer. This prevents showing dotted-circle | 
| + // for combining marks at the beginning of runs. | 
| + static const uint16_t preContext = spaceCharacter; | 
| + hb_buffer_add_utf16(harfBuzzBuffer, &preContext, 1, 1, 0); | 
| 
eae
2015/10/14 08:12:01
We don't need this any more, right? (If you want t
 | 
| - const SimpleFontData* currentFontData = currentRun->m_fontData; | 
| - FontPlatformData* platformData = const_cast<FontPlatformData*>(¤tFontData->platformData()); | 
| - HarfBuzzFace* face = platformData->harfBuzzFace(); | 
| - if (!face) | 
| - return nullptr; | 
| + addToHarfBuzzBufferInternal(harfBuzzBuffer, | 
| + m_font->fontDescription(), m_normalizedBuffer.get(), startIndex, | 
| + numCharacters); | 
| - hb_buffer_set_language(harfBuzzBuffer.get(), language); | 
| - hb_buffer_set_script(harfBuzzBuffer.get(), currentRun->m_script); | 
| - hb_buffer_set_direction(harfBuzzBuffer.get(), currentRun->m_direction); | 
| + HarfBuzzScopedPtr<hb_font_t> harfBuzzFont(face->createFont(), hb_font_destroy); | 
| + hb_shape(harfBuzzFont.get(), harfBuzzBuffer, m_features.isEmpty() ? 0 : m_features.data(), m_features.size()); | 
| - // Add a space as pre-context to the buffer. This prevents showing dotted-circle | 
| - // for combining marks at the beginning of runs. | 
| - static const uint16_t preContext = spaceCharacter; | 
| - hb_buffer_add_utf16(harfBuzzBuffer.get(), &preContext, 1, 1, 0); | 
| + return true; | 
| +} | 
| - addToHarfBuzzBufferInternal(harfBuzzBuffer.get(), | 
| - fontDescription, m_normalizedBuffer.get(), currentRun->m_startIndex, | 
| - currentRun->m_numCharacters); | 
| +bool HarfBuzzShaper::extractShapeResults(hb_buffer_t* harfBuzzBuffer, | 
| + ShapeResult* shapeResult, | 
| 
eae
2015/10/14 08:12:01
const ShapeResult?
 | 
| + Deque<HarfBuzzShaper::HolesQueueItem>* holesQueue, | 
| + bool& fontCycleQueued, const HolesQueueItem& currentQueueItem, | 
| + const SimpleFontData* currentFont, | 
| + UScriptCode currentRunScript, | 
| + bool isLastResort) | 
| +{ | 
| + enum ClusterResult { | 
| + Shaped, | 
| + NotDef, | 
| + Unknown | 
| + }; | 
| + ClusterResult currentClusterResult = Unknown; | 
| + ClusterResult previousClusterResult = Unknown; | 
| + unsigned previousCluster = 0; | 
| + unsigned currentCluster = 0; | 
| - HarfBuzzScopedPtr<hb_font_t> harfBuzzFont(face->createFont(), hb_font_destroy); | 
| - hb_shape(harfBuzzFont.get(), harfBuzzBuffer.get(), m_features.isEmpty() ? 0 : m_features.data(), m_features.size()); | 
| - shapeResult(result.get(), i, currentRun, harfBuzzBuffer.get()); | 
| + // Find first notdef glyph in harfBuzzBuffer. | 
| + int numGlyphs = hb_buffer_get_length(harfBuzzBuffer); | 
| 
eae
2015/10/14 08:12:01
unsigned?
 | 
| + hb_glyph_info_t* glyphInfo = hb_buffer_get_glyph_infos(harfBuzzBuffer, 0); | 
| - hb_buffer_reset(harfBuzzBuffer.get()); | 
| + int lastChangePosition = 0; | 
| 
eae
2015/10/14 08:12:00
unsigned
 | 
| + | 
| + if (!numGlyphs) { | 
| + WTF_LOG_ERROR("HarfBuzz returned empty glyph buffer after shaping."); | 
| + return false; | 
| } | 
| - // We should have consumed all expansion opportunities. | 
| - // Failures here means that our logic does not match to the one in expansionOpportunityCount(). | 
| - // FIXME: Ideally, we should ASSERT(!m_expansionOpportunityCount) here to ensure that, | 
| - // or unify the two logics (and the one in SimplePath too,) but there are some cases where our impl | 
| - // does not support justification very well yet such as U+3099, and it'll cause the ASSERT to fail. | 
| - // It's to be fixed because they're very rarely used, and a broken justification is still somewhat readable. | 
| + for (int j = 0; j <= numGlyphs; ++j) { | 
| 
eae
2015/10/14 08:12:00
unsigned glyphIndex or i?
 | 
| + // Iterating by clusters, check for when the state switches from shaped | 
| + // to non-shaped and vice versa. Taking into account the edge cases of | 
| + // beginning of the run and end of the run. | 
| + previousCluster = currentCluster; | 
| + currentCluster = glyphInfo[j].cluster; | 
| + | 
| + if (j < numGlyphs) { | 
| + // Still the same cluster, merge shaping status. | 
| + if (previousCluster == currentCluster && j != 0) { | 
| + if (glyphInfo[j].codepoint == 0) { | 
| + currentClusterResult = NotDef; | 
| + } else { | 
| + currentClusterResult = currentClusterResult == Shaped ? Shaped : NotDef; | 
| 
eae
2015/10/14 08:12:01
When would currentClusterResult be anything but Sh
 | 
| + } | 
| + continue; | 
| + } | 
| + // We've moved to a new cluster. | 
| + previousClusterResult = currentClusterResult; | 
| + currentClusterResult = glyphInfo[j].codepoint == 0 ? NotDef : Shaped; | 
| + } else { | 
| + previousClusterResult = currentClusterResult; | 
| + // Terminate explicitly at the end. | 
| + if (previousClusterResult == NotDef) { | 
| + currentClusterResult = Shaped; | 
| + } else { | 
| + currentClusterResult = NotDef; | 
| + } | 
| 
eae
2015/10/14 08:12:01
This logic is a little hard to follow. If the curr
 | 
| + } | 
| + | 
| + bool atChange = (previousClusterResult != currentClusterResult) && previousClusterResult != Unknown; | 
| + if (!atChange) | 
| + continue; | 
| - return result.release(); | 
| + // Compute the range indices of consecutive shaped or .notdef glyphs. | 
| + // Cluster information for RTL runs becomes reversed, e.g. character 0 | 
| + // has cluster index 5 in a run of 6 characters. | 
| + int numCharacters = 0; | 
| 
eae
2015/10/14 08:12:01
Can we use unsigned for these? Mixing signed and u
 | 
| + int numGlyphsToInsert = 0; | 
| + int startIndex = 0; | 
| + if (HB_DIRECTION_IS_FORWARD(hb_buffer_get_direction(harfBuzzBuffer))) { | 
| + startIndex = currentQueueItem.m_startIndex + glyphInfo[lastChangePosition].cluster; | 
| + if (j == numGlyphs) { | 
| + numCharacters = currentQueueItem.m_numCharacters - glyphInfo[lastChangePosition].cluster; | 
| + numGlyphsToInsert = numGlyphs - lastChangePosition; | 
| + } else { | 
| + numCharacters = glyphInfo[j].cluster - glyphInfo[lastChangePosition].cluster; | 
| + numGlyphsToInsert = j - lastChangePosition; | 
| + } | 
| + } else { | 
| 
eae
2015/10/14 08:12:01
// Direction Backwards
 | 
| + startIndex = currentQueueItem.m_startIndex + glyphInfo[j - 1].cluster; | 
| + if (lastChangePosition == 0) { | 
| + numCharacters = currentQueueItem.m_numCharacters - glyphInfo[j - 1].cluster; | 
| + } else { | 
| + numCharacters = glyphInfo[lastChangePosition - 1].cluster - glyphInfo[j - 1].cluster; | 
| + } | 
| + numGlyphsToInsert = j - lastChangePosition; | 
| + } | 
| + | 
| + if (currentClusterResult == Shaped && !isLastResort) { | 
| + // Now it's clear that we need to continue processing. | 
| + if (!fontCycleQueued) { | 
| + holesQueue->append(HolesQueueItem(HolesQueueNextFont, 0, 0)); | 
| + fontCycleQueued = true; | 
| + } | 
| + | 
| + // Here we need to put character positions. | 
| + ASSERT(numCharacters); | 
| + holesQueue->append(HolesQueueItem(HolesQueueRange, | 
| + startIndex, numCharacters)); | 
| + } | 
| + | 
| + // If numCharacters is 0, that means we hit a NotDef before shaping the | 
| + // whole grapheme. We do not append it here. For the next glyph we | 
| + // encounter, atChange will be true, and the characters corresponding to | 
| + // the grapheme will be added to the TODO queue again, attempting to | 
| + // shape the whole grapheme with the next font. | 
| + // When we're getting here with the last resort font, we have no other | 
| + // choice than adding boxes to the ShapeResult. | 
| + if ((currentClusterResult == NotDef && numCharacters != 0) || isLastResort) { | 
| 
eae
2015/10/14 08:12:01
&& numCharacters will do. No need to have the expl
 | 
| + // Here we need to specify glyph positions. | 
| + OwnPtr<ShapeResult::RunInfo> run = adoptPtr(new ShapeResult::RunInfo(currentFont, | 
| + TextDirectionToHBDirection(m_textRun.direction(), | 
| + m_font->fontDescription().orientation(), currentFont), | 
| + ICUScriptToHBScript(currentRunScript), | 
| + startIndex, | 
| + numGlyphsToInsert, numCharacters)); | 
| + insertRunIntoShapeResult(shapeResult, run.release(), lastChangePosition, numGlyphsToInsert, currentQueueItem.m_startIndex, harfBuzzBuffer); | 
| + } | 
| + lastChangePosition = j; | 
| + } | 
| + return true; | 
| } | 
| -void HarfBuzzShaper::shapeResult(ShapeResult* result, unsigned index, | 
| - const HarfBuzzRun* currentRun, hb_buffer_t* harfBuzzBuffer) | 
| +static inline const SimpleFontData* mangleFontDirection(const SimpleFontData* originalFont, | 
| 
eae
2015/10/14 08:12:01
The name makes it sound like this operates on the
 | 
| + FontOrientation runOrientation, | 
| + OrientationIterator::RenderOrientation renderOrientation) | 
| { | 
| - unsigned numGlyphs = hb_buffer_get_length(harfBuzzBuffer); | 
| - if (!numGlyphs) { | 
| - result->m_runs[index] = nullptr; | 
| - return; | 
| + if (!isVerticalBaseline(runOrientation)) | 
| + return originalFont; | 
| + | 
| + if (runOrientation == FontOrientation::VerticalRotated | 
| + || (runOrientation == FontOrientation::VerticalMixed && renderOrientation == OrientationIterator::OrientationRotateSideways)) | 
| + return originalFont->verticalRightOrientationFontData().get(); | 
| + | 
| + return originalFont; | 
| +} | 
| + | 
| +bool HarfBuzzShaper::buildHint(Vector<UChar32>& hint, bool needsList) | 
| +{ | 
| + if (!m_holesQueue.size()) | 
| + return false; | 
| + | 
| + hint.clear(); | 
| + | 
| + size_t numCharsAdded = 0; | 
| + for (auto it = m_holesQueue.begin(); it != m_holesQueue.end(); ++it) { | 
| + if (it->m_action == HolesQueueNextFont) | 
| + break; | 
| + | 
| + UChar32 hintChar; | 
| + UTF16TextIterator iterator(m_normalizedBuffer.get() + it->m_startIndex, it->m_numCharacters); | 
| 
eae
2015/10/14 08:12:01
Could we add an assert ensuring that the index is
 | 
| + while (iterator.consume(hintChar)) { | 
| + hint.append(hintChar); | 
| + numCharsAdded++; | 
| + if (!needsList) | 
| + break; | 
| + iterator.advance(); | 
| + } | 
| + } | 
| + return numCharsAdded > 0; | 
| +} | 
| + | 
| +PassRefPtr<ShapeResult> HarfBuzzShaper::shapeResult() | 
| +{ | 
| + RefPtr<ShapeResult> result = ShapeResult::create(m_font, | 
| + m_normalizedBufferLength, m_textRun.direction()); | 
| + HarfBuzzScopedPtr<hb_buffer_t> harfBuzzBuffer(hb_buffer_create(), hb_buffer_destroy); | 
| + | 
| + const FontDescription& fontDescription = m_font->fontDescription(); | 
| + const String& localeString = fontDescription.locale(); | 
| + CString locale = localeString.latin1(); | 
| + const hb_language_t language = hb_language_from_string(locale.data(), locale.length()); | 
| + | 
| + RunSegmenter::RunSegmenterRange segmentRange = {0, 0, USCRIPT_INVALID_CODE, OrientationIterator::OrientationInvalid, SmallCapsIterator::SmallCapsSameCase }; | 
| 
eae
2015/10/14 08:12:01
These two lines are really long at > 150, would yo
 | 
| + RunSegmenter runSegmenter(m_normalizedBuffer.get(), m_normalizedBufferLength, m_font->fontDescription().orientation(), fontDescription.variant()); | 
| + | 
| + Vector<UChar32> hint; | 
| + | 
| + // TODO: Check whether this treatAsZerowidthspace from the previous script | 
| + // segmentation plays a role here, does the new scriptRuniterator handle that correctly? | 
| + while (runSegmenter.consume(&segmentRange)) { | 
| + RefPtr<FontFallbackIterator> fallbackIterator = m_font->createFontFallbackIterator(); | 
| + | 
| + m_holesQueue.append(HolesQueueItem(HolesQueueNextFont, 0, 0)); | 
| 
eae
2015/10/14 08:12:01
Having a static helper for appending might make th
 | 
| + m_holesQueue.append(HolesQueueItem(HolesQueueRange, segmentRange.start, segmentRange.end - segmentRange.start)); | 
| + | 
| + const SimpleFontData* currentFont = nullptr; | 
| + | 
| + bool fontCycleQueued = false; | 
| + while (m_holesQueue.size()) { | 
| + HolesQueueItem currentQueueItem = m_holesQueue.takeFirst(); | 
| + | 
| + if (currentQueueItem.m_action == HolesQueueNextFont) { | 
| + // For now, we're building a character list with which we probe | 
| + // for needed fonts depending on the declared unicode-range of a | 
| + // segmented CSS font. Alternatively, we can build a fake font | 
| + // for the shaper and check whether any glyphs were found, or | 
| + // define a new API on the shaper which will give us coverage | 
| + // information? | 
| + if (!buildHint(hint, fallbackIterator->needsHintList())) { | 
| + // Give up shaping since we cannot retrieve a font fallback | 
| + // font without a hintlist. | 
| + m_holesQueue.clear(); | 
| + break; | 
| + } | 
| + | 
| + currentFont = fallbackIterator->next(hint); | 
| + if (!currentFont) { | 
| + ASSERT(!m_holesQueue.size()); | 
| + break; | 
| + } | 
| + fontCycleQueued = false; | 
| + continue; | 
| + } | 
| + | 
| + // TODO crbug.com/522964: Only use smallCapsFontData when the font does not support true smcp. The spec | 
| + // says: "To match the surrounding text, a font may provide alternate glyphs for caseless characters when | 
| + // these features are enabled but when a user agent simulates small capitals, it must not attempt to | 
| + // simulate alternates for codepoints which are considered caseless." | 
| + const SimpleFontData* smallcapsMangledFont = segmentRange.smallCapsBehavior == SmallCapsIterator::SmallCapsUppercaseNeeded | 
| + ? currentFont->smallCapsFontData(fontDescription).get() | 
| + : currentFont; | 
| + | 
| + // Compatibility with SimpleFontData approach of keeping a flag for overriding drawing direction. | 
| + // TODO: crbug.com/506224 This should go away in favor of storing that information elsewhere, for example in | 
| + // ShapeResult. | 
| + const SimpleFontData* directionAndSmallCapsMangledFont = mangleFontDirection(smallcapsMangledFont, | 
| + m_font->fontDescription().orientation(), | 
| + segmentRange.renderOrientation); | 
| + | 
| + if (!shapeRange(harfBuzzBuffer.get(), | 
| + currentQueueItem.m_startIndex, | 
| + currentQueueItem.m_numCharacters, | 
| + directionAndSmallCapsMangledFont, | 
| + segmentRange.script, | 
| + language)) | 
| + WTF_LOG_ERROR("Shaping range failed."); | 
| + | 
| + if (!extractShapeResults(harfBuzzBuffer.get(), | 
| + result.get(), | 
| + &m_holesQueue, | 
| + fontCycleQueued, | 
| + currentQueueItem, | 
| + directionAndSmallCapsMangledFont, | 
| + segmentRange.script, | 
| + !fallbackIterator->hasNext())) | 
| + WTF_LOG_ERROR("Shape result extraction failed."); | 
| + | 
| + hb_buffer_reset(harfBuzzBuffer.get()); | 
| + } | 
| } | 
| + return result.release(); | 
| +} | 
| - OwnPtr<ShapeResult::RunInfo> run = adoptPtr(new ShapeResult::RunInfo(currentRun->m_fontData, | 
| - currentRun->m_direction, currentRun->m_script, currentRun->m_startIndex, | 
| - numGlyphs, currentRun->m_numCharacters)); | 
| +// TODO crbug.com/542701: This should be a method on ShapeResult. | 
| +void HarfBuzzShaper::insertRunIntoShapeResult(ShapeResult* result, | 
| + PassOwnPtr<ShapeResult::RunInfo> runToInsert, int startGlyph, int numGlyphs, | 
| + int bufferStartCharIndex, hb_buffer_t* harfBuzzBuffer) | 
| +{ | 
| + ASSERT(numGlyphs > 0); | 
| + OwnPtr<ShapeResult::RunInfo> run(runToInsert); | 
| - const SimpleFontData* currentFontData = currentRun->m_fontData; | 
| + const SimpleFontData* currentFontData = run->m_fontData.get(); | 
| hb_glyph_info_t* glyphInfos = hb_buffer_get_glyph_infos(harfBuzzBuffer, 0); | 
| hb_glyph_position_t* glyphPositions = hb_buffer_get_glyph_positions(harfBuzzBuffer, 0); | 
| @@ -1111,21 +1135,29 @@ void HarfBuzzShaper::shapeResult(ShapeResult* result, unsigned index, | 
| float* directionOffset = m_font->fontDescription().isVerticalAnyUpright() ? &offsetY : &offsetX; | 
| // HarfBuzz returns result in visual order, no need to flip for RTL. | 
| - for (size_t i = 0; i < numGlyphs; ++i) { | 
| + for (int i = 0; i < numGlyphs; ++i) { | 
| 
eae
2015/10/14 08:12:01
unsigned, pretty please?
 | 
| bool runEnd = i + 1 == numGlyphs; | 
| - uint16_t glyph = glyphInfos[i].codepoint; | 
| - offsetX = harfBuzzPositionToFloat(glyphPositions[i].x_offset); | 
| - offsetY = -harfBuzzPositionToFloat(glyphPositions[i].y_offset); | 
| + uint16_t glyph = glyphInfos[startGlyph + i].codepoint; | 
| + offsetX = harfBuzzPositionToFloat(glyphPositions[startGlyph + i].x_offset); | 
| + offsetY = -harfBuzzPositionToFloat(glyphPositions[startGlyph + i].y_offset); | 
| // One out of x_advance and y_advance is zero, depending on | 
| // whether the buffer direction is horizontal or vertical. | 
| - float advance = harfBuzzPositionToFloat(glyphPositions[i].x_advance - glyphPositions[i].y_advance); | 
| - unsigned currentCharacterIndex = currentRun->m_startIndex + glyphInfos[i].cluster; | 
| + float advance = harfBuzzPositionToFloat(glyphPositions[startGlyph + i].x_advance - glyphPositions[startGlyph + i].y_advance); | 
| + unsigned currentCharacterIndex = bufferStartCharIndex + glyphInfos[startGlyph + i].cluster; | 
| RELEASE_ASSERT(m_normalizedBufferLength > currentCharacterIndex); | 
| - bool isClusterEnd = runEnd || glyphInfos[i].cluster != glyphInfos[i + 1].cluster; | 
| + bool isClusterEnd = runEnd || glyphInfos[startGlyph + i].cluster != glyphInfos[startGlyph + i + 1].cluster; | 
| float spacing = 0; | 
| - run->m_glyphData[i].characterIndex = glyphInfos[i].cluster; | 
| + // The characterIndex of one ShapeResult run is normalized to the run's | 
| + // startIndex and length. TODO crbug.com/542703: Consider changing that | 
| + // and instead pass the whole run to hb_buffer_t each time. | 
| + run->m_glyphData.resize(numGlyphs); | 
| + if (HB_DIRECTION_IS_FORWARD(hb_buffer_get_direction(harfBuzzBuffer))) { | 
| + run->m_glyphData[i].characterIndex = glyphInfos[startGlyph + i].cluster - glyphInfos[startGlyph].cluster; | 
| + } else { | 
| + run->m_glyphData[i].characterIndex = glyphInfos[startGlyph + i].cluster - glyphInfos[startGlyph + numGlyphs - 1].cluster; | 
| + } | 
| if (isClusterEnd) | 
| spacing += adjustSpacing(run.get(), i, currentCharacterIndex, *directionOffset, totalAdvance); | 
| @@ -1151,11 +1183,33 @@ void HarfBuzzShaper::shapeResult(ShapeResult* result, unsigned index, | 
| result->m_glyphBoundingBox.unite(glyphBounds); | 
| glyphOrigin += FloatSize(advance + offsetX, offsetY); | 
| } | 
| - | 
| run->m_width = std::max(0.0f, totalAdvance); | 
| result->m_width += run->m_width; | 
| result->m_numGlyphs += numGlyphs; | 
| - result->m_runs[index] = run.release(); | 
| + | 
| + // The runs are stored in result->m_runs in visual order. For LTR, we place | 
| + // the run to be inserted before the next run with a bigger character | 
| + // start index. For RTL, we place the run before the next run with a lower | 
| + // character index. Otherwise, for both directions, at the end. | 
| + if (HB_DIRECTION_IS_FORWARD(run->m_direction)) { | 
| + for (size_t pos = 0; pos < result->m_runs.size(); ++pos) { | 
| + if (result->m_runs.at(pos)->m_startIndex > run->m_startIndex) { | 
| + result->m_runs.insert(pos, run.release()); | 
| + break; | 
| + } | 
| + } | 
| + } else { | 
| + for (size_t pos = 0; pos < result->m_runs.size(); ++pos) { | 
| + if (result->m_runs.at(pos)->m_startIndex < run->m_startIndex) { | 
| + result->m_runs.insert(pos, run.release()); | 
| + break; | 
| + } | 
| + } | 
| + } | 
| + // If we didn't find an existing slot to place it, append. | 
| + if (run) { | 
| + result->m_runs.append(run.release()); | 
| + } | 
| } | 
| PassRefPtr<ShapeResult> ShapeResult::createForTabulationCharacters(const Font* font, | 
| @@ -1239,4 +1293,5 @@ float HarfBuzzShaper::adjustSpacing(ShapeResult::RunInfo* run, size_t glyphIndex | 
| return spacing; | 
| } | 
| + | 
| } // namespace blink |