Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(592)

Unified Diff: src/pdf/SkPDFMakeToUnicodeCmap.cpp

Issue 2221163002: SkPDF: SkPDFFont organization changes. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: 2016-08-09 (Tuesday) 15:24:15 EDT Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/pdf/SkPDFMakeToUnicodeCmap.h ('k') | tests/PDFGlyphsToUnicodeTest.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/pdf/SkPDFMakeToUnicodeCmap.cpp
diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6fd8b1ca16db2e36eeff0ac7605caf95bae89010
--- /dev/null
+++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkPDFMakeToUnicodeCmap.h"
+#include "SkPDFUtils.h"
+#include "SkUtils.h"
+
+static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
+ // It's there to prevent old version Adobe Readers from malfunctioning.
+ const char* kHeader =
+ "/CIDInit /ProcSet findresource begin\n"
+ "12 dict begin\n"
+ "begincmap\n";
+ cmap->writeText(kHeader);
+
+ // The /CIDSystemInfo must be consistent to the one in
+ // SkPDFFont::populateCIDFont().
+ // We can not pass over the system info object here because the format is
+ // different. This is not a reference object.
+ const char* kSysInfo =
+ "/CIDSystemInfo\n"
+ "<< /Registry (Adobe)\n"
+ "/Ordering (UCS)\n"
+ "/Supplement 0\n"
+ ">> def\n";
+ cmap->writeText(kSysInfo);
+
+ // The CMapName must be consistent to /CIDSystemInfo above.
+ // /CMapType 2 means ToUnicode.
+ // Codespace range just tells the PDF processor the valid range.
+ const char* kTypeInfoHeader =
+ "/CMapName /Adobe-Identity-UCS def\n"
+ "/CMapType 2 def\n"
+ "1 begincodespacerange\n";
+ cmap->writeText(kTypeInfoHeader);
+
+ // e.g. "<0000> <FFFF>\n"
+ SkString range;
+ range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
+ cmap->writeText(range.c_str());
+
+ const char* kTypeInfoFooter = "endcodespacerange\n";
+ cmap->writeText(kTypeInfoFooter);
+}
+
+static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
+ const char kFooter[] =
+ "endcmap\n"
+ "CMapName currentdict /CMap defineresource pop\n"
+ "end\n"
+ "end";
+ cmap->writeText(kFooter);
+}
+
+namespace {
+struct BFChar {
+ SkGlyphID fGlyphId;
+ SkUnichar fUnicode;
+};
+
+struct BFRange {
+ SkGlyphID fStart;
+ SkGlyphID fEnd;
+ SkUnichar fUnicode;
+};
+} // namespace
+
+static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
+ SkGlyphID utf16[2] = {0, 0};
+ size_t len = SkUTF16_FromUnichar(utf32, utf16);
+ SkASSERT(len == 1 || len == 2);
+ SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
+ if (len == 2) {
+ SkPDFUtils::WriteUInt16BE(wStream, utf16[1]);
+ }
+}
+
+static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
+ SkDynamicMemoryWStream* cmap) {
+ // PDF spec defines that every bf* list can have at most 100 entries.
+ for (int i = 0; i < bfchar.count(); i += 100) {
+ int count = bfchar.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfchar\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
+ cmap->writeText("> <");
+ write_utf16be(cmap, bfchar[i + j].fUnicode);
+ cmap->writeText(">\n");
+ }
+ cmap->writeText("endbfchar\n");
+ }
+}
+
+static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
+ SkDynamicMemoryWStream* cmap) {
+ // PDF spec defines that every bf* list can have at most 100 entries.
+ for (int i = 0; i < bfrange.count(); i += 100) {
+ int count = bfrange.count() - i;
+ count = SkMin32(count, 100);
+ cmap->writeDecAsText(count);
+ cmap->writeText(" beginbfrange\n");
+ for (int j = 0; j < count; ++j) {
+ cmap->writeText("<");
+ SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
+ cmap->writeText("> <");
+ SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
+ cmap->writeText("> <");
+ write_utf16be(cmap, bfrange[i + j].fUnicode);
+ cmap->writeText(">\n");
+ }
+ cmap->writeText("endbfrange\n");
+ }
+}
+
+// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
+// Technote 5014.
+// The function is not static so we can test it in unit tests.
+//
+// Current implementation guarantees bfchar and bfrange entries do not overlap.
+//
+// Current implementation does not attempt aggresive optimizations against
+// following case because the specification is not clear.
+//
+// 4 beginbfchar 1 beginbfchar
+// <0003> <0013> <0020> <0014>
+// <0005> <0015> to endbfchar
+// <0007> <0017> 1 beginbfrange
+// <0020> <0014> <0003> <0007> <0013>
+// endbfchar endbfrange
+//
+// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
+// overlap, but succeeding maps supersede preceding maps."
+//
+// In case of searching text in PDF, bfrange will have higher precedence so
+// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
+// the spec does not mention how will this kind of conflict being resolved.
+//
+// For the worst case (having 65536 continuous unicode and we use every other
+// one of them), the possible savings by aggressive optimization is 416KB
+// pre-compressed and does not provide enough motivation for implementation.
+void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ SkDynamicMemoryWStream* cmap,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ if (glyphToUnicode.isEmpty()) {
+ return;
+ }
+ int glyphOffset = 0;
+ if (!multiByteGlyphs) {
+ glyphOffset = firstGlyphID - 1;
+ }
+
+ SkTDArray<BFChar> bfcharEntries;
+ SkTDArray<BFRange> bfrangeEntries;
+
+ BFRange currentRangeEntry = {0, 0, 0};
+ bool rangeEmpty = true;
+ const int limit =
+ SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
+
+ for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
+ bool inSubset = i < limit &&
+ (subset == nullptr || subset->has(i + glyphOffset));
+ if (!rangeEmpty) {
+ // PDF spec requires bfrange not changing the higher byte,
+ // e.g. <1035> <10FF> <2222> is ok, but
+ // <1035> <1100> <2222> is no good
+ bool inRange =
+ i == currentRangeEntry.fEnd + 1 &&
+ i >> 8 == currentRangeEntry.fStart >> 8 &&
+ i < limit &&
+ glyphToUnicode[i + glyphOffset] ==
+ currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
+ if (!inSubset || !inRange) {
+ if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
+ bfrangeEntries.push(currentRangeEntry);
+ } else {
+ BFChar* entry = bfcharEntries.append();
+ entry->fGlyphId = currentRangeEntry.fStart;
+ entry->fUnicode = currentRangeEntry.fUnicode;
+ }
+ rangeEmpty = true;
+ }
+ }
+ if (inSubset) {
+ currentRangeEntry.fEnd = i;
+ if (rangeEmpty) {
+ currentRangeEntry.fStart = i;
+ currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
+ rangeEmpty = false;
+ }
+ }
+ }
+
+ // The spec requires all bfchar entries for a font must come before bfrange
+ // entries.
+ append_bfchar_section(bfcharEntries, cmap);
+ append_bfrange_section(bfrangeEntries, cmap);
+}
+
+sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
+ const SkTDArray<SkUnichar>& glyphToUnicode,
+ const SkPDFGlyphSet* subset,
+ bool multiByteGlyphs,
+ SkGlyphID firstGlyphID,
+ SkGlyphID lastGlyphID) {
+ SkDynamicMemoryWStream cmap;
+ if (multiByteGlyphs) {
+ append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
+ } else {
+ append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
+ }
+ SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
+ firstGlyphID, lastGlyphID);
+ append_cmap_footer(&cmap);
+ return sk_make_sp<SkPDFStream>(
+ std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
+}
« no previous file with comments | « src/pdf/SkPDFMakeToUnicodeCmap.h ('k') | tests/PDFGlyphsToUnicodeTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698