| Index: src/pdf/SkPDFMakeToUnicodeCmap.cpp
|
| diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..6fd8b1ca16db2e36eeff0ac7605caf95bae89010
|
| --- /dev/null
|
| +++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
|
| @@ -0,0 +1,230 @@
|
| +/*
|
| + * Copyright 2011 Google Inc.
|
| + *
|
| + * Use of this source code is governed by a BSD-style license that can be
|
| + * found in the LICENSE file.
|
| + */
|
| +
|
| +#include "SkPDFMakeToUnicodeCmap.h"
|
| +#include "SkPDFUtils.h"
|
| +#include "SkUtils.h"
|
| +
|
| +static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
|
| + SkGlyphID firstGlyphID,
|
| + SkGlyphID lastGlyphID) {
|
| + // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
|
| + // It's there to prevent old version Adobe Readers from malfunctioning.
|
| + const char* kHeader =
|
| + "/CIDInit /ProcSet findresource begin\n"
|
| + "12 dict begin\n"
|
| + "begincmap\n";
|
| + cmap->writeText(kHeader);
|
| +
|
| + // The /CIDSystemInfo must be consistent to the one in
|
| + // SkPDFFont::populateCIDFont().
|
| + // We can not pass over the system info object here because the format is
|
| + // different. This is not a reference object.
|
| + const char* kSysInfo =
|
| + "/CIDSystemInfo\n"
|
| + "<< /Registry (Adobe)\n"
|
| + "/Ordering (UCS)\n"
|
| + "/Supplement 0\n"
|
| + ">> def\n";
|
| + cmap->writeText(kSysInfo);
|
| +
|
| + // The CMapName must be consistent to /CIDSystemInfo above.
|
| + // /CMapType 2 means ToUnicode.
|
| + // Codespace range just tells the PDF processor the valid range.
|
| + const char* kTypeInfoHeader =
|
| + "/CMapName /Adobe-Identity-UCS def\n"
|
| + "/CMapType 2 def\n"
|
| + "1 begincodespacerange\n";
|
| + cmap->writeText(kTypeInfoHeader);
|
| +
|
| + // e.g. "<0000> <FFFF>\n"
|
| + SkString range;
|
| + range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
|
| + cmap->writeText(range.c_str());
|
| +
|
| + const char* kTypeInfoFooter = "endcodespacerange\n";
|
| + cmap->writeText(kTypeInfoFooter);
|
| +}
|
| +
|
| +static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
|
| + const char kFooter[] =
|
| + "endcmap\n"
|
| + "CMapName currentdict /CMap defineresource pop\n"
|
| + "end\n"
|
| + "end";
|
| + cmap->writeText(kFooter);
|
| +}
|
| +
|
| +namespace {
|
| +struct BFChar {
|
| + SkGlyphID fGlyphId;
|
| + SkUnichar fUnicode;
|
| +};
|
| +
|
| +struct BFRange {
|
| + SkGlyphID fStart;
|
| + SkGlyphID fEnd;
|
| + SkUnichar fUnicode;
|
| +};
|
| +} // namespace
|
| +
|
| +static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
|
| + SkGlyphID utf16[2] = {0, 0};
|
| + size_t len = SkUTF16_FromUnichar(utf32, utf16);
|
| + SkASSERT(len == 1 || len == 2);
|
| + SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
|
| + if (len == 2) {
|
| + SkPDFUtils::WriteUInt16BE(wStream, utf16[1]);
|
| + }
|
| +}
|
| +
|
| +static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
|
| + SkDynamicMemoryWStream* cmap) {
|
| + // PDF spec defines that every bf* list can have at most 100 entries.
|
| + for (int i = 0; i < bfchar.count(); i += 100) {
|
| + int count = bfchar.count() - i;
|
| + count = SkMin32(count, 100);
|
| + cmap->writeDecAsText(count);
|
| + cmap->writeText(" beginbfchar\n");
|
| + for (int j = 0; j < count; ++j) {
|
| + cmap->writeText("<");
|
| + SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
|
| + cmap->writeText("> <");
|
| + write_utf16be(cmap, bfchar[i + j].fUnicode);
|
| + cmap->writeText(">\n");
|
| + }
|
| + cmap->writeText("endbfchar\n");
|
| + }
|
| +}
|
| +
|
| +static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
|
| + SkDynamicMemoryWStream* cmap) {
|
| + // PDF spec defines that every bf* list can have at most 100 entries.
|
| + for (int i = 0; i < bfrange.count(); i += 100) {
|
| + int count = bfrange.count() - i;
|
| + count = SkMin32(count, 100);
|
| + cmap->writeDecAsText(count);
|
| + cmap->writeText(" beginbfrange\n");
|
| + for (int j = 0; j < count; ++j) {
|
| + cmap->writeText("<");
|
| + SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
|
| + cmap->writeText("> <");
|
| + SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
|
| + cmap->writeText("> <");
|
| + write_utf16be(cmap, bfrange[i + j].fUnicode);
|
| + cmap->writeText(">\n");
|
| + }
|
| + cmap->writeText("endbfrange\n");
|
| + }
|
| +}
|
| +
|
| +// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
|
| +// Technote 5014.
|
| +// The function is not static so we can test it in unit tests.
|
| +//
|
| +// Current implementation guarantees bfchar and bfrange entries do not overlap.
|
| +//
|
| +// Current implementation does not attempt aggresive optimizations against
|
| +// following case because the specification is not clear.
|
| +//
|
| +// 4 beginbfchar 1 beginbfchar
|
| +// <0003> <0013> <0020> <0014>
|
| +// <0005> <0015> to endbfchar
|
| +// <0007> <0017> 1 beginbfrange
|
| +// <0020> <0014> <0003> <0007> <0013>
|
| +// endbfchar endbfrange
|
| +//
|
| +// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
|
| +// overlap, but succeeding maps supersede preceding maps."
|
| +//
|
| +// In case of searching text in PDF, bfrange will have higher precedence so
|
| +// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
|
| +// the spec does not mention how will this kind of conflict being resolved.
|
| +//
|
| +// For the worst case (having 65536 continuous unicode and we use every other
|
| +// one of them), the possible savings by aggressive optimization is 416KB
|
| +// pre-compressed and does not provide enough motivation for implementation.
|
| +void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
|
| + const SkPDFGlyphSet* subset,
|
| + SkDynamicMemoryWStream* cmap,
|
| + bool multiByteGlyphs,
|
| + SkGlyphID firstGlyphID,
|
| + SkGlyphID lastGlyphID) {
|
| + if (glyphToUnicode.isEmpty()) {
|
| + return;
|
| + }
|
| + int glyphOffset = 0;
|
| + if (!multiByteGlyphs) {
|
| + glyphOffset = firstGlyphID - 1;
|
| + }
|
| +
|
| + SkTDArray<BFChar> bfcharEntries;
|
| + SkTDArray<BFRange> bfrangeEntries;
|
| +
|
| + BFRange currentRangeEntry = {0, 0, 0};
|
| + bool rangeEmpty = true;
|
| + const int limit =
|
| + SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
|
| +
|
| + for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
|
| + bool inSubset = i < limit &&
|
| + (subset == nullptr || subset->has(i + glyphOffset));
|
| + if (!rangeEmpty) {
|
| + // PDF spec requires bfrange not changing the higher byte,
|
| + // e.g. <1035> <10FF> <2222> is ok, but
|
| + // <1035> <1100> <2222> is no good
|
| + bool inRange =
|
| + i == currentRangeEntry.fEnd + 1 &&
|
| + i >> 8 == currentRangeEntry.fStart >> 8 &&
|
| + i < limit &&
|
| + glyphToUnicode[i + glyphOffset] ==
|
| + currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
|
| + if (!inSubset || !inRange) {
|
| + if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
|
| + bfrangeEntries.push(currentRangeEntry);
|
| + } else {
|
| + BFChar* entry = bfcharEntries.append();
|
| + entry->fGlyphId = currentRangeEntry.fStart;
|
| + entry->fUnicode = currentRangeEntry.fUnicode;
|
| + }
|
| + rangeEmpty = true;
|
| + }
|
| + }
|
| + if (inSubset) {
|
| + currentRangeEntry.fEnd = i;
|
| + if (rangeEmpty) {
|
| + currentRangeEntry.fStart = i;
|
| + currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
|
| + rangeEmpty = false;
|
| + }
|
| + }
|
| + }
|
| +
|
| + // The spec requires all bfchar entries for a font must come before bfrange
|
| + // entries.
|
| + append_bfchar_section(bfcharEntries, cmap);
|
| + append_bfrange_section(bfrangeEntries, cmap);
|
| +}
|
| +
|
| +sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
|
| + const SkTDArray<SkUnichar>& glyphToUnicode,
|
| + const SkPDFGlyphSet* subset,
|
| + bool multiByteGlyphs,
|
| + SkGlyphID firstGlyphID,
|
| + SkGlyphID lastGlyphID) {
|
| + SkDynamicMemoryWStream cmap;
|
| + if (multiByteGlyphs) {
|
| + append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
|
| + } else {
|
| + append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
|
| + }
|
| + SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
|
| + firstGlyphID, lastGlyphID);
|
| + append_cmap_footer(&cmap);
|
| + return sk_make_sp<SkPDFStream>(
|
| + std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
|
| +}
|
|
|