| Index: src/pdf/SkPDFMakeToUnicodeCmap.cpp | 
| diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..6fd8b1ca16db2e36eeff0ac7605caf95bae89010 | 
| --- /dev/null | 
| +++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp | 
| @@ -0,0 +1,230 @@ | 
| +/* | 
| + * Copyright 2011 Google Inc. | 
| + * | 
| + * Use of this source code is governed by a BSD-style license that can be | 
| + * found in the LICENSE file. | 
| + */ | 
| + | 
| +#include "SkPDFMakeToUnicodeCmap.h" | 
| +#include "SkPDFUtils.h" | 
| +#include "SkUtils.h" | 
| + | 
| +static void append_tounicode_header(SkDynamicMemoryWStream* cmap, | 
| +                                    SkGlyphID firstGlyphID, | 
| +                                    SkGlyphID lastGlyphID) { | 
| +    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. | 
| +    // It's there to prevent old version Adobe Readers from malfunctioning. | 
| +    const char* kHeader = | 
| +        "/CIDInit /ProcSet findresource begin\n" | 
| +        "12 dict begin\n" | 
| +        "begincmap\n"; | 
| +    cmap->writeText(kHeader); | 
| + | 
| +    // The /CIDSystemInfo must be consistent to the one in | 
| +    // SkPDFFont::populateCIDFont(). | 
| +    // We can not pass over the system info object here because the format is | 
| +    // different. This is not a reference object. | 
| +    const char* kSysInfo = | 
| +        "/CIDSystemInfo\n" | 
| +        "<<  /Registry (Adobe)\n" | 
| +        "/Ordering (UCS)\n" | 
| +        "/Supplement 0\n" | 
| +        ">> def\n"; | 
| +    cmap->writeText(kSysInfo); | 
| + | 
| +    // The CMapName must be consistent to /CIDSystemInfo above. | 
| +    // /CMapType 2 means ToUnicode. | 
| +    // Codespace range just tells the PDF processor the valid range. | 
| +    const char* kTypeInfoHeader = | 
| +        "/CMapName /Adobe-Identity-UCS def\n" | 
| +        "/CMapType 2 def\n" | 
| +        "1 begincodespacerange\n"; | 
| +    cmap->writeText(kTypeInfoHeader); | 
| + | 
| +    // e.g.     "<0000> <FFFF>\n" | 
| +    SkString range; | 
| +    range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); | 
| +    cmap->writeText(range.c_str()); | 
| + | 
| +    const char* kTypeInfoFooter = "endcodespacerange\n"; | 
| +    cmap->writeText(kTypeInfoFooter); | 
| +} | 
| + | 
| +static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { | 
| +    const char kFooter[] = | 
| +        "endcmap\n" | 
| +        "CMapName currentdict /CMap defineresource pop\n" | 
| +        "end\n" | 
| +        "end"; | 
| +    cmap->writeText(kFooter); | 
| +} | 
| + | 
| +namespace { | 
| +struct BFChar { | 
| +    SkGlyphID fGlyphId; | 
| +    SkUnichar fUnicode; | 
| +}; | 
| + | 
| +struct BFRange { | 
| +    SkGlyphID fStart; | 
| +    SkGlyphID fEnd; | 
| +    SkUnichar fUnicode; | 
| +}; | 
| +}  // namespace | 
| + | 
| +static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { | 
| +    SkGlyphID utf16[2] = {0, 0}; | 
| +    size_t len = SkUTF16_FromUnichar(utf32, utf16); | 
| +    SkASSERT(len == 1 || len == 2); | 
| +    SkPDFUtils::WriteUInt16BE(wStream, utf16[0]); | 
| +    if (len == 2) { | 
| +        SkPDFUtils::WriteUInt16BE(wStream, utf16[1]); | 
| +    } | 
| +} | 
| + | 
| +static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, | 
| +                                  SkDynamicMemoryWStream* cmap) { | 
| +    // PDF spec defines that every bf* list can have at most 100 entries. | 
| +    for (int i = 0; i < bfchar.count(); i += 100) { | 
| +        int count = bfchar.count() - i; | 
| +        count = SkMin32(count, 100); | 
| +        cmap->writeDecAsText(count); | 
| +        cmap->writeText(" beginbfchar\n"); | 
| +        for (int j = 0; j < count; ++j) { | 
| +            cmap->writeText("<"); | 
| +            SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); | 
| +            cmap->writeText("> <"); | 
| +            write_utf16be(cmap, bfchar[i + j].fUnicode); | 
| +            cmap->writeText(">\n"); | 
| +        } | 
| +        cmap->writeText("endbfchar\n"); | 
| +    } | 
| +} | 
| + | 
| +static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, | 
| +                                   SkDynamicMemoryWStream* cmap) { | 
| +    // PDF spec defines that every bf* list can have at most 100 entries. | 
| +    for (int i = 0; i < bfrange.count(); i += 100) { | 
| +        int count = bfrange.count() - i; | 
| +        count = SkMin32(count, 100); | 
| +        cmap->writeDecAsText(count); | 
| +        cmap->writeText(" beginbfrange\n"); | 
| +        for (int j = 0; j < count; ++j) { | 
| +            cmap->writeText("<"); | 
| +            SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); | 
| +            cmap->writeText("> <"); | 
| +            SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); | 
| +            cmap->writeText("> <"); | 
| +            write_utf16be(cmap, bfrange[i + j].fUnicode); | 
| +            cmap->writeText(">\n"); | 
| +        } | 
| +        cmap->writeText("endbfrange\n"); | 
| +    } | 
| +} | 
| + | 
| +// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe | 
| +// Technote 5014. | 
| +// The function is not static so we can test it in unit tests. | 
| +// | 
| +// Current implementation guarantees bfchar and bfrange entries do not overlap. | 
| +// | 
| +// Current implementation does not attempt aggresive optimizations against | 
| +// following case because the specification is not clear. | 
| +// | 
| +// 4 beginbfchar          1 beginbfchar | 
| +// <0003> <0013>          <0020> <0014> | 
| +// <0005> <0015>    to    endbfchar | 
| +// <0007> <0017>          1 beginbfrange | 
| +// <0020> <0014>          <0003> <0007> <0013> | 
| +// endbfchar              endbfrange | 
| +// | 
| +// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may | 
| +// overlap, but succeeding maps supersede preceding maps." | 
| +// | 
| +// In case of searching text in PDF, bfrange will have higher precedence so | 
| +// typing char id 0x0014 in search box will get glyph id 0x0004 first.  However, | 
| +// the spec does not mention how will this kind of conflict being resolved. | 
| +// | 
| +// For the worst case (having 65536 continuous unicode and we use every other | 
| +// one of them), the possible savings by aggressive optimization is 416KB | 
| +// pre-compressed and does not provide enough motivation for implementation. | 
| +void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, | 
| +                             const SkPDFGlyphSet* subset, | 
| +                             SkDynamicMemoryWStream* cmap, | 
| +                             bool multiByteGlyphs, | 
| +                             SkGlyphID firstGlyphID, | 
| +                             SkGlyphID lastGlyphID) { | 
| +    if (glyphToUnicode.isEmpty()) { | 
| +        return; | 
| +    } | 
| +    int glyphOffset = 0; | 
| +    if (!multiByteGlyphs) { | 
| +        glyphOffset = firstGlyphID - 1; | 
| +    } | 
| + | 
| +    SkTDArray<BFChar> bfcharEntries; | 
| +    SkTDArray<BFRange> bfrangeEntries; | 
| + | 
| +    BFRange currentRangeEntry = {0, 0, 0}; | 
| +    bool rangeEmpty = true; | 
| +    const int limit = | 
| +            SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; | 
| + | 
| +    for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { | 
| +        bool inSubset = i < limit && | 
| +                        (subset == nullptr || subset->has(i + glyphOffset)); | 
| +        if (!rangeEmpty) { | 
| +            // PDF spec requires bfrange not changing the higher byte, | 
| +            // e.g. <1035> <10FF> <2222> is ok, but | 
| +            //      <1035> <1100> <2222> is no good | 
| +            bool inRange = | 
| +                i == currentRangeEntry.fEnd + 1 && | 
| +                i >> 8 == currentRangeEntry.fStart >> 8 && | 
| +                i < limit && | 
| +                glyphToUnicode[i + glyphOffset] == | 
| +                    currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; | 
| +            if (!inSubset || !inRange) { | 
| +                if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { | 
| +                    bfrangeEntries.push(currentRangeEntry); | 
| +                } else { | 
| +                    BFChar* entry = bfcharEntries.append(); | 
| +                    entry->fGlyphId = currentRangeEntry.fStart; | 
| +                    entry->fUnicode = currentRangeEntry.fUnicode; | 
| +                } | 
| +                rangeEmpty = true; | 
| +            } | 
| +        } | 
| +        if (inSubset) { | 
| +            currentRangeEntry.fEnd = i; | 
| +            if (rangeEmpty) { | 
| +              currentRangeEntry.fStart = i; | 
| +              currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; | 
| +              rangeEmpty = false; | 
| +            } | 
| +        } | 
| +    } | 
| + | 
| +    // The spec requires all bfchar entries for a font must come before bfrange | 
| +    // entries. | 
| +    append_bfchar_section(bfcharEntries, cmap); | 
| +    append_bfrange_section(bfrangeEntries, cmap); | 
| +} | 
| + | 
| +sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( | 
| +        const SkTDArray<SkUnichar>& glyphToUnicode, | 
| +        const SkPDFGlyphSet* subset, | 
| +        bool multiByteGlyphs, | 
| +        SkGlyphID firstGlyphID, | 
| +        SkGlyphID lastGlyphID) { | 
| +    SkDynamicMemoryWStream cmap; | 
| +    if (multiByteGlyphs) { | 
| +        append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); | 
| +    } else { | 
| +        append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); | 
| +    } | 
| +    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, | 
| +                            firstGlyphID, lastGlyphID); | 
| +    append_cmap_footer(&cmap); | 
| +    return sk_make_sp<SkPDFStream>( | 
| +            std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); | 
| +} | 
|  |