Chromium Code Reviews| Index: src/pdf/SkPDFDevice.cpp |
| diff --git a/src/pdf/SkPDFDevice.cpp b/src/pdf/SkPDFDevice.cpp |
| index 608d284bf2befcc30ec8043a2014a8c7c5a9178a..c39e744cfa35e84cf50a279b1e2655f2b3688c0b 100644 |
| --- a/src/pdf/SkPDFDevice.cpp |
| +++ b/src/pdf/SkPDFDevice.cpp |
| @@ -7,6 +7,7 @@ |
| #include "SkPDFDevice.h" |
| +#include "SkAdvancedTypefaceMetrics.h" |
| #include "SkAnnotationKeys.h" |
| #include "SkBitmapDevice.h" |
| #include "SkBitmapKey.h" |
| @@ -37,6 +38,7 @@ |
| #include "SkTemplates.h" |
| #include "SkTextBlobRunIterator.h" |
| #include "SkTextFormatParams.h" |
| +#include "SkUtils.h" |
| #include "SkXfermodeInterpretation.h" |
| #define DPI_FOR_RASTER_SCALE_ONE 72 |
| @@ -922,8 +924,199 @@ private: |
| bool fInitialized = false; |
| const bool fDefaultPositioning; |
| }; |
| + |
| +/** Given the m-to-n glyph-to-character mapping data (as returned by |
| + harfbuzz), iterate over the clusters. */ |
| +class Clusterator { |
| +public: |
| + Clusterator() : fClusters(nullptr), fGlyphCount(0), fTextByteLength(0), fUtf8Text(nullptr) {} |
| + explicit Clusterator(uint32_t glyphCount) |
| + : fClusters(nullptr) |
| + , fGlyphCount(glyphCount) |
| + , fTextByteLength(0) |
| + , fUtf8Text(nullptr) {} |
| + Clusterator(const uint32_t* clusters, |
| + uint32_t glyphCount, |
| + uint32_t textByteLength, |
| + const char* utf8Text) |
| + : fClusters(clusters) |
| + , fGlyphCount(glyphCount) |
| + , fTextByteLength(textByteLength) |
| + , fUtf8Text(utf8Text) { |
| + // This is a cheap heuristic for /ReversedChars which seems to |
| + // work for clusters produced by HarfBuzz |
|
tomhudson
2016/09/14 16:15:03
This is surprising? What does harfbuzz output that
hal.canary
2016/09/15 20:27:16
Done.
// This is a cheap heuristic for /R
|
| + fReversedChars = |
| + fUtf8Text && fClusters && fGlyphCount && fClusters[0] != 0; |
| + } |
| + struct Cluster { |
|
tomhudson
2016/09/14 16:15:03
Nit: Clusterator constructor parameters are nearly
hal.canary
2016/09/15 20:27:17
Fixed. It is now in the same order, and I packed
|
| + const char* fUtf8Text; |
| + uint32_t fTextByteLength; |
| + uint32_t fGlyphIndex; |
| + uint32_t fGlyphCount; |
| + explicit operator bool() const { return fGlyphCount != 0; } |
| + }; |
| + bool reversedChars() const { return fReversedChars; } |
|
tomhudson
2016/09/14 16:15:02
What's the *meaning* of this bool? Is it context t
hal.canary
2016/09/15 20:27:17
Done. // True if this looks like right-to-left
|
| + Cluster next() { |
| + if (!fUtf8Text || !fClusters) { |
| + // These glyphs have no text. Treat as one "cluster". |
| + uint32_t glyphCount = fGlyphCount; |
| + fGlyphCount = 0; |
|
tomhudson
2016/09/14 16:15:03
You want to do this test before you check for empt
hal.canary
2016/09/15 20:27:17
It works either way (check it yourself).
Done
tomhudson
2016/09/16 17:49:09
Only because you added the && fGlyphCount term?
|
| + return Cluster{nullptr, 0, 0, glyphCount}; |
| + } |
| + if (fGlyphCount == 0 || fTextByteLength == 0) { |
| + return Cluster{nullptr, 0, 0, 0}; // empty |
| + } |
| + uint32_t cluster = fClusters[0]; |
| + if (cluster >= fTextByteLength) { |
| + return Cluster{nullptr, 0, 0, 0}; // bad input. |
| + } |
| + uint32_t glyphsInCluster = 1; |
| + while (fClusters[glyphsInCluster] == cluster && |
| + glyphsInCluster < fGlyphCount) { |
| + ++glyphsInCluster; |
| + } |
| + SkASSERT(glyphsInCluster <= fGlyphCount); |
| + uint32_t textLength = 0; |
| + if (glyphsInCluster == fGlyphCount) { |
| + // consumes rest of glyphs and rest of text |
| + if (kInvalidCluster == fPreviousCluster) { // LTR text or single cluster |
| + textLength = fTextByteLength - cluster; |
| + } else { // RTL text; last cluster. |
| + SkASSERT(fPreviousCluster < fTextByteLength); |
| + if (fPreviousCluster <= cluster) { // bad input. |
| + return Cluster{nullptr, 0, 0, 0}; |
| + } |
| + textLength = fPreviousCluster - cluster; |
| + } |
| + fGlyphCount = 0; |
| + return Cluster{fUtf8Text + cluster, |
| + textLength, |
| + fGlyphIndex, |
| + glyphsInCluster}; |
| + } |
| + uint32_t nextCluster = fClusters[glyphsInCluster]; |
| + if (nextCluster >= fTextByteLength) { |
| + return Cluster{nullptr, 0, 0, 0}; // bad input. |
| + } |
| + if (nextCluster > cluster) { // LTR text |
|
tomhudson
2016/09/14 16:15:03
If this is the test for LTR vs RTL, what's the mea
hal.canary
2016/09/15 20:27:16
I wrote this before fReversedChars. It still work
tomhudson
2016/09/16 17:49:09
Yes, but now we have two different tests/flags for
|
| + if (kInvalidCluster != fPreviousCluster) { |
| + return Cluster{nullptr, 0, 0, 0}; // bad input. |
| + } |
| + textLength = nextCluster - cluster; |
| + } else { // RTL text |
| + SkASSERT(nextCluster < cluster); |
| + if (kInvalidCluster == fPreviousCluster) { // first cluster |
| + textLength = fTextByteLength - cluster; |
| + } else { // later cluster |
| + if (fPreviousCluster <= cluster) { |
| + return Cluster{nullptr, 0, 0, 0}; // bad input. |
| + } |
| + textLength = fPreviousCluster - cluster; |
| + } |
| + fPreviousCluster = cluster; |
| + } |
| + uint32_t glyphIndex = fGlyphIndex; |
| + fGlyphCount -= glyphsInCluster; |
| + fGlyphIndex += glyphsInCluster; |
| + fClusters += glyphsInCluster; |
| + return Cluster{fUtf8Text + cluster, |
| + textLength, |
| + glyphIndex, |
| + glyphsInCluster}; |
| + } |
| + |
| +private: |
| + static constexpr uint32_t kInvalidCluster = 0xFFFFFFFF; |
| + const uint32_t* fClusters; |
| + uint32_t fGlyphIndex = 0; |
| + uint32_t fGlyphCount; |
| + uint32_t fPreviousCluster = kInvalidCluster; |
| + uint32_t fTextByteLength; |
| + const char* fUtf8Text; |
| + bool fReversedChars = false; |
| +}; |
| + |
| +struct TextStorage { |
| + SkAutoTMalloc<char> fUtf8textStorage; |
| + SkAutoTMalloc<uint32_t> fClusterStorage; |
| + SkAutoTMalloc<SkGlyphID> fGlyphStorage; |
| +}; |
| } // namespace |
| +/** Given some unicode text (as passed to drawText(), convert to |
| + glyphs (via primitive shaping), while preserving |
| + glyph-to-character mapping information. */ |
| +static Clusterator make_clusterator( |
|
tomhudson
2016/09/14 16:15:03
nit, for consistency: why not put this in the anon
hal.canary
2016/09/15 20:27:16
Skia style seems to prefer static functions over f
|
| + const void* sourceText, |
| + size_t sourceByteCount, |
| + const SkPaint& paint, |
| + TextStorage* storage, |
| + int* glyphCountOut) { |
| + SkASSERT(SkPaint::kGlyphID_TextEncoding != paint.getTextEncoding()); |
| + int glyphCount = paint.textToGlyphs(sourceText, sourceByteCount, nullptr); |
| + if (glyphCountOut) { |
| + *glyphCountOut = glyphCount; |
| + } |
| + if (!glyphCount) { |
| + return Clusterator(); |
| + } |
| + storage->fGlyphStorage.reset(SkToSizeT(glyphCount)); |
| + (void)paint.textToGlyphs(sourceText, sourceByteCount, storage->fGlyphStorage.get()); |
| + storage->fClusterStorage.reset(SkToSizeT(glyphCount)); |
| + uint32_t* clusters = storage->fClusterStorage.get(); |
| + uint32_t utf8ByteCount = 0; |
| + const char* utf8Text = nullptr; |
| + switch (paint.getTextEncoding()) { |
| + case SkPaint::kUTF8_TextEncoding: { |
| + const char* txtPtr = (const char*)sourceText; |
| + for (int i = 0; i < glyphCount; ++i) { |
| + clusters[i] = SkToU32(txtPtr - (const char*)sourceText); |
| + txtPtr += SkUTF8_LeadByteToCount(*(const unsigned char*)txtPtr); |
| + SkASSERT(txtPtr <= (const char*)sourceText + sourceByteCount); |
| + } |
| + utf8ByteCount = SkToU32(sourceByteCount); |
| + utf8Text = (const char*)sourceText; |
| + break; |
| + } |
| + case SkPaint::kUTF16_TextEncoding: { |
| + const uint16_t* utf16ptr = (const uint16_t*)sourceText; |
| + int utf16count = SkToInt(sourceByteCount / sizeof(uint16_t)); |
| + utf8ByteCount = SkToU32(SkUTF16_ToUTF8(utf16ptr, utf16count)); |
| + storage->fUtf8textStorage.reset(utf8ByteCount); |
| + char* txtPtr = storage->fUtf8textStorage.get(); |
| + utf8Text = txtPtr; |
| + int clusterIndex = 0; |
| + while (utf16ptr < (const uint16_t*)sourceText + sourceByteCount) { |
| + clusters[clusterIndex++] = SkToU32(txtPtr - utf8Text); |
|
tomhudson
2016/09/14 16:15:02
You're storing a 32-bit cast of a pointer math res
hal.canary
2016/09/15 20:27:17
// The clusters[] array is an array of offsets int
tomhudson
2016/09/16 17:49:09
Slight preference for improved names over comments
|
| + SkUnichar uni = SkUTF16_NextUnichar(&utf16ptr); |
| + txtPtr += SkUTF8_FromUnichar(uni, txtPtr); |
| + } |
| + SkASSERT(clusterIndex == glyphCount); |
| + SkASSERT(txtPtr == storage->fUtf8textStorage.get() + utf8ByteCount); |
| + SkASSERT(utf16ptr == (const uint16_t*)sourceText + sourceByteCount); |
| + break; |
| + } |
| + case SkPaint::kUTF32_TextEncoding: { |
| + const SkUnichar* utf32 = (const SkUnichar*)sourceText; |
| + for (size_t i = 0; i < sourceByteCount / sizeof(SkUnichar); ++i) { |
| + utf8ByteCount += SkToU32(SkUTF8_FromUnichar(utf32[i])); |
| + } |
| + storage->fUtf8textStorage.reset(SkToSizeT(utf8ByteCount)); |
| + char* txtPtr = storage->fUtf8textStorage.get(); |
| + utf8Text = txtPtr; |
| + for (size_t i = 0; i < sourceByteCount / sizeof(SkUnichar); ++i) { |
| + clusters[i] = SkToU32(txtPtr - utf8Text); |
| + txtPtr += SkUTF8_FromUnichar(utf32[i], txtPtr); |
| + } |
| + break; |
| + } |
| + default: |
| + SkDEBUGFAIL(""); |
| + break; |
| + } |
| + return Clusterator(clusters, SkToU32(glyphCount), utf8ByteCount, utf8Text); |
| +} |
| + |
| static void draw_transparent_text(SkPDFDevice* device, |
| const SkDraw& d, |
| const void* text, size_t len, |
| @@ -965,6 +1158,10 @@ static void draw_transparent_text(SkPDFDevice* device, |
| } |
| } |
| +static SkUnichar map_glyph(const SkTDArray<SkUnichar>& glyphToUnicode, SkGlyphID glyph) { |
| + return SkToInt(glyph) < glyphToUnicode.count() ? glyphToUnicode[SkToInt(glyph)] : -1; |
| +} |
| + |
| static void update_font(SkWStream* wStream, int fontIndex, SkScalar textSize) { |
| wStream->writeText("/"); |
| char prefix = SkPDFResourceDict::GetResourceTypePrefix(SkPDFResourceDict::kFont_ResourceType); |
| @@ -994,19 +1191,9 @@ void SkPDFDevice::internalDrawText( |
| // https://bug.skia.org/5665 |
| return; |
| } |
| - // TODO(halcanary): implement /ActualText with these values. |
| - (void)clusters; |
| - (void)textByteLength; |
| - (void)utf8Text; |
| - if (textByteLength > 0) { |
| - SkASSERT(clusters); |
| - SkASSERT(utf8Text); |
| - SkASSERT(srcPaint.getTextEncoding() == SkPaint::kGlyphID_TextEncoding); |
| - } else { |
| - SkASSERT(nullptr == clusters); |
| - SkASSERT(nullptr == utf8Text); |
| + if (0 == sourceByteCount || !sourceText) { |
| + return; |
| } |
| - |
| SkPaint paint = calculate_text_paint(srcPaint); |
| replace_srcmode_on_opaque_paint(&paint); |
| if (!paint.getTypeface()) { |
| @@ -1024,7 +1211,6 @@ void SkPDFDevice::internalDrawText( |
| return; |
| } |
| // TODO(halcanary): use metrics->fGlyphToUnicode to check Unicode mapping. |
| - const SkGlyphID maxGlyphID = metrics->fLastGlyphID; |
| if (!SkPDFFont::CanEmbedTypeface(typeface, fDocument->canon())) { |
| SkPath path; // https://bug.skia.org/3866 |
| paint.getTextPath(sourceText, sourceByteCount, |
| @@ -1035,22 +1221,38 @@ void SkPDFDevice::internalDrawText( |
| offset.x(), offset.y(), paint); |
| return; |
| } |
| - int glyphCount = paint.textToGlyphs(sourceText, sourceByteCount, nullptr); |
| - if (glyphCount <= 0) { |
| - return; |
| - } |
| - SkAutoSTMalloc<128, SkGlyphID> glyphStorage; |
| + |
| + // These three heap buffers are only used in the case where no glyphs |
| + // are passed to drawText() (most clients pass glyphs or a textblob). |
|
tomhudson
2016/09/14 16:15:03
So what *is* the use case where no glyphs are pass
hal.canary
2016/09/15 20:27:16
SkPaint::TextEncoding has been a part of our API f
|
| + TextStorage storage; |
| + int glyphCount = 0; |
| const SkGlyphID* glyphs = nullptr; |
| - if (paint.getTextEncoding() == SkPaint::kGlyphID_TextEncoding) { |
| + Clusterator clusterator; |
| + if (textByteLength > 0) { |
| + glyphCount = SkToInt(sourceByteCount / sizeof(SkGlyphID)); |
| glyphs = (const SkGlyphID*)sourceText; |
| - // validate input later. |
| + clusterator = Clusterator(clusters, SkToU32(glyphCount), textByteLength, utf8Text); |
| + SkASSERT(clusters); |
| + SkASSERT(utf8Text); |
| + SkASSERT(srcPaint.getTextEncoding() == SkPaint::kGlyphID_TextEncoding); |
| + SkASSERT(glyphCount == paint.textToGlyphs(sourceText, sourceByteCount, nullptr)); |
| + } else if (SkPaint::kGlyphID_TextEncoding == srcPaint.getTextEncoding()) { |
| + glyphCount = SkToInt(sourceByteCount / sizeof(SkGlyphID)); |
| + glyphs = (const SkGlyphID*)sourceText; |
| + clusterator = Clusterator(SkToU32(glyphCount)); |
| + SkASSERT(glyphCount == paint.textToGlyphs(sourceText, sourceByteCount, nullptr)); |
| + SkASSERT(nullptr == clusters); |
| + SkASSERT(nullptr == utf8Text); |
| } else { |
| - glyphStorage.reset(SkToSizeT(glyphCount)); |
| - (void)paint.textToGlyphs(sourceText, sourceByteCount, glyphStorage.get()); |
| - glyphs = glyphStorage.get(); |
| - paint.setTextEncoding(SkPaint::kGlyphID_TextEncoding); |
| + SkASSERT(nullptr == clusters); |
| + SkASSERT(nullptr == utf8Text); |
| + clusterator = make_clusterator(sourceText, sourceByteCount, srcPaint, |
| + &storage, &glyphCount); |
| + glyphs = storage.fGlyphStorage; |
| + } |
| + if (0 == glyphCount) { |
| + return; |
| } |
| - |
| bool defaultPositioning = (positioning == SkTextBlob::kDefault_Positioning); |
| paint.setHinting(SkPaint::kNo_Hinting); |
| SkAutoGlyphCache glyphCache(paint, nullptr, nullptr); |
| @@ -1072,51 +1274,91 @@ void SkPDFDevice::internalDrawText( |
| } |
| SkDynamicMemoryWStream* out = &content.entry()->fContent; |
| SkScalar textSize = paint.getTextSize(); |
| + const SkTDArray<SkUnichar>& glyphToUnicode = metrics->fGlyphToUnicode; |
| out->writeText("BT\n"); |
| SK_AT_SCOPE_EXIT(out->writeText("ET\n")); |
| + const SkGlyphID maxGlyphID = metrics->fLastGlyphID; |
|
tomhudson
2016/09/14 16:15:02
This should be the same as glyphToUnicode.count()?
hal.canary
2016/09/15 20:27:17
fGlyphToUnicode is not guaranteed to be populated.
tomhudson
2016/09/16 17:49:09
Acknowledged.
|
| bool multiByteGlyphs = SkPDFFont::IsMultiByte(SkPDFFont::FontType(*metrics)); |
| + if (clusterator.reversedChars()) { |
| + out->writeText("/ReversedChars BMC\n"); |
| + } |
| + SK_AT_SCOPE_EXIT(if (clusterator.reversedChars()) { out->writeText("EMC\n"); } ); |
| GlyphPositioner glyphPositioner(out, |
| paint.getTextSkewX(), |
| multiByteGlyphs, |
| defaultPositioning, |
| offset); |
| SkPDFFont* font = nullptr; |
| - for (int index = 0; index < glyphCount; ++index) { |
| - SkGlyphID gid = glyphs[index]; |
| - if (gid > maxGlyphID) { |
| - continue; // Skip this invalid glyphID. |
| - } |
| - if (!font || !font->hasGlyph(gid)) { |
| - // Either this is the first loop iteration or the current |
| - // PDFFont cannot encode this glyph. |
| - glyphPositioner.flush(); |
| - // Try to get a font which can encode the glyph. |
| - int fontIndex = this->getFontResourceIndex(typeface, gid); |
| - SkASSERT(fontIndex >= 0); |
| - if (fontIndex < 0) { return; } |
| - update_font(out, fontIndex, textSize); |
| - font = fFontResources[fontIndex]; |
| - SkASSERT(font); // All preconditions for SkPDFFont::GetFontResource are met. |
| - if (!font) { return; } |
| - SkASSERT(font->multiByteGlyphs() == multiByteGlyphs); |
| + |
| + while (Clusterator::Cluster c = clusterator.next()) { |
|
tomhudson
2016/09/14 16:15:02
Ugh - this looks like a nicely-separable chunk of
hal.canary
2016/09/15 20:27:17
Acknowledged.
|
| + int index = c.fGlyphIndex; |
| + int glyphLimit = index + c.fGlyphCount; |
| + |
| + bool actualText = false; |
| + SK_AT_SCOPE_EXIT(if (actualText) { glyphPositioner.flush(); out->writeText("EMC\n"); } ); |
| + if (c.fUtf8Text) { // real cluster |
| + // Check if `/ActualText` needed. |
| + const char* textPtr = c.fUtf8Text; |
| + // TODO(halcanary): validate utf8 input. |
| + SkUnichar unichar = SkUTF8_NextUnichar(&textPtr); |
| + const char* textEnd = c.fUtf8Text + c.fTextByteLength; |
| + if (textPtr < textEnd || // more characters left |
| + glyphLimit > index + 1 || // toUnicode wouldn't work |
| + unichar != map_glyph(glyphToUnicode, glyphs[index])) // test single Unichar map |
| + { |
| + glyphPositioner.flush(); |
| + out->writeText("/Span<</ActualText <"); |
| + SkPDFUtils::WriteUTF16beHex(out, 0xFEFF); // U+FEFF = BYTE ORDER MARK |
| + // the BOM marks this text as UTF-16BE, not PDFDocEncoding. |
| + SkPDFUtils::WriteUTF16beHex(out, unichar); // first char |
| + while (textPtr < textEnd) { |
| + unichar = SkUTF8_NextUnichar(&textPtr); |
| + SkPDFUtils::WriteUTF16beHex(out, unichar); |
| + } |
| + out->writeText("> >> BDC\n"); // begin marked-content sequence |
| + // with an associated property list. |
| + actualText = true; |
| + } |
| } |
| - font->noteGlyphUsage(gid); |
| - SkScalar advance{0.0f}; |
| - SkPoint xy{0.0f, 0.0f}; |
| - if (!defaultPositioning) { |
| - advance = glyphCache->getGlyphIDAdvance(gid).fAdvanceX; |
| - xy = SkTextBlob::kFull_Positioning == positioning |
| - ? SkPoint{pos[2 * index], pos[2 * index + 1]} |
| - : SkPoint{pos[index], 0}; |
| - if (alignment != SkPaint::kLeft_Align) { |
| - xy.offset(alignmentFactor * advance, 0); |
| + for (; index < glyphLimit; ++index) { |
| + SkGlyphID gid = glyphs[index]; |
| + if (gid > maxGlyphID) { |
| + continue; |
| + } |
| + if (!font || !font->hasGlyph(gid)) { |
| + // Not yet specified font or need to switch font. |
| + int fontIndex = this->getFontResourceIndex(typeface, gid); |
| + // All preconditions for SkPDFFont::GetFontResource are met. |
| + SkASSERT(fontIndex >= 0); |
| + if (fontIndex < 0) { |
| + return; |
| + } |
| + glyphPositioner.flush(); |
| + update_font(out, fontIndex, textSize); |
| + font = fFontResources[fontIndex]; |
| + SkASSERT(font); // All preconditions for SkPDFFont::GetFontResource are met. |
| + if (!font) { |
| + return; |
| + } |
| + SkASSERT(font->multiByteGlyphs() == multiByteGlyphs); |
| + } |
| + SkPoint xy{0, 0}; |
| + SkScalar advance{0}; |
| + if (!defaultPositioning) { |
| + advance = glyphCache->getGlyphIDAdvance(gid).fAdvanceX; |
| + xy = SkTextBlob::kFull_Positioning == positioning |
| + ? SkPoint{pos[2 * index], pos[2 * index + 1]} |
| + : SkPoint{pos[index], 0}; |
| + if (alignment != SkPaint::kLeft_Align) { |
| + xy.offset(alignmentFactor * advance, 0); |
| + } |
| } |
| + font->noteGlyphUsage(gid); |
| + SkGlyphID encodedGlyph = multiByteGlyphs ? gid : font->glyphToPDFFontEncoding(gid); |
| + glyphPositioner.writeGlyph(xy, advance, encodedGlyph); |
| } |
| - SkGlyphID encodedGlyph = |
| - multiByteGlyphs ? gid : font->glyphToPDFFontEncoding(gid); |
| - glyphPositioner.writeGlyph(xy, advance, encodedGlyph); |
| } |
| } |