OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2011 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #include "SkPDFMakeToUnicodeCmap.h" |
| 9 #include "SkPDFUtils.h" |
| 10 #include "SkUtils.h" |
| 11 |
| 12 static void append_tounicode_header(SkDynamicMemoryWStream* cmap, |
| 13 SkGlyphID firstGlyphID, |
| 14 SkGlyphID lastGlyphID) { |
| 15 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. |
| 16 // It's there to prevent old version Adobe Readers from malfunctioning. |
| 17 const char* kHeader = |
| 18 "/CIDInit /ProcSet findresource begin\n" |
| 19 "12 dict begin\n" |
| 20 "begincmap\n"; |
| 21 cmap->writeText(kHeader); |
| 22 |
| 23 // The /CIDSystemInfo must be consistent to the one in |
| 24 // SkPDFFont::populateCIDFont(). |
| 25 // We can not pass over the system info object here because the format is |
| 26 // different. This is not a reference object. |
| 27 const char* kSysInfo = |
| 28 "/CIDSystemInfo\n" |
| 29 "<< /Registry (Adobe)\n" |
| 30 "/Ordering (UCS)\n" |
| 31 "/Supplement 0\n" |
| 32 ">> def\n"; |
| 33 cmap->writeText(kSysInfo); |
| 34 |
| 35 // The CMapName must be consistent to /CIDSystemInfo above. |
| 36 // /CMapType 2 means ToUnicode. |
| 37 // Codespace range just tells the PDF processor the valid range. |
| 38 const char* kTypeInfoHeader = |
| 39 "/CMapName /Adobe-Identity-UCS def\n" |
| 40 "/CMapType 2 def\n" |
| 41 "1 begincodespacerange\n"; |
| 42 cmap->writeText(kTypeInfoHeader); |
| 43 |
| 44 // e.g. "<0000> <FFFF>\n" |
| 45 SkString range; |
| 46 range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); |
| 47 cmap->writeText(range.c_str()); |
| 48 |
| 49 const char* kTypeInfoFooter = "endcodespacerange\n"; |
| 50 cmap->writeText(kTypeInfoFooter); |
| 51 } |
| 52 |
| 53 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { |
| 54 const char kFooter[] = |
| 55 "endcmap\n" |
| 56 "CMapName currentdict /CMap defineresource pop\n" |
| 57 "end\n" |
| 58 "end"; |
| 59 cmap->writeText(kFooter); |
| 60 } |
| 61 |
| 62 namespace { |
| 63 struct BFChar { |
| 64 SkGlyphID fGlyphId; |
| 65 SkUnichar fUnicode; |
| 66 }; |
| 67 |
| 68 struct BFRange { |
| 69 SkGlyphID fStart; |
| 70 SkGlyphID fEnd; |
| 71 SkUnichar fUnicode; |
| 72 }; |
| 73 } // namespace |
| 74 |
| 75 static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { |
| 76 SkGlyphID utf16[2] = {0, 0}; |
| 77 size_t len = SkUTF16_FromUnichar(utf32, utf16); |
| 78 SkASSERT(len == 1 || len == 2); |
| 79 SkPDFUtils::WriteUInt16BE(wStream, utf16[0]); |
| 80 if (len == 2) { |
| 81 SkPDFUtils::WriteUInt16BE(wStream, utf16[1]); |
| 82 } |
| 83 } |
| 84 |
| 85 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, |
| 86 SkDynamicMemoryWStream* cmap) { |
| 87 // PDF spec defines that every bf* list can have at most 100 entries. |
| 88 for (int i = 0; i < bfchar.count(); i += 100) { |
| 89 int count = bfchar.count() - i; |
| 90 count = SkMin32(count, 100); |
| 91 cmap->writeDecAsText(count); |
| 92 cmap->writeText(" beginbfchar\n"); |
| 93 for (int j = 0; j < count; ++j) { |
| 94 cmap->writeText("<"); |
| 95 SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); |
| 96 cmap->writeText("> <"); |
| 97 write_utf16be(cmap, bfchar[i + j].fUnicode); |
| 98 cmap->writeText(">\n"); |
| 99 } |
| 100 cmap->writeText("endbfchar\n"); |
| 101 } |
| 102 } |
| 103 |
| 104 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, |
| 105 SkDynamicMemoryWStream* cmap) { |
| 106 // PDF spec defines that every bf* list can have at most 100 entries. |
| 107 for (int i = 0; i < bfrange.count(); i += 100) { |
| 108 int count = bfrange.count() - i; |
| 109 count = SkMin32(count, 100); |
| 110 cmap->writeDecAsText(count); |
| 111 cmap->writeText(" beginbfrange\n"); |
| 112 for (int j = 0; j < count; ++j) { |
| 113 cmap->writeText("<"); |
| 114 SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); |
| 115 cmap->writeText("> <"); |
| 116 SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); |
| 117 cmap->writeText("> <"); |
| 118 write_utf16be(cmap, bfrange[i + j].fUnicode); |
| 119 cmap->writeText(">\n"); |
| 120 } |
| 121 cmap->writeText("endbfrange\n"); |
| 122 } |
| 123 } |
| 124 |
| 125 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe |
| 126 // Technote 5014. |
| 127 // The function is not static so we can test it in unit tests. |
| 128 // |
| 129 // Current implementation guarantees bfchar and bfrange entries do not overlap. |
| 130 // |
| 131 // Current implementation does not attempt aggresive optimizations against |
| 132 // following case because the specification is not clear. |
| 133 // |
| 134 // 4 beginbfchar 1 beginbfchar |
| 135 // <0003> <0013> <0020> <0014> |
| 136 // <0005> <0015> to endbfchar |
| 137 // <0007> <0017> 1 beginbfrange |
| 138 // <0020> <0014> <0003> <0007> <0013> |
| 139 // endbfchar endbfrange |
| 140 // |
| 141 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may |
| 142 // overlap, but succeeding maps supersede preceding maps." |
| 143 // |
| 144 // In case of searching text in PDF, bfrange will have higher precedence so |
| 145 // typing char id 0x0014 in search box will get glyph id 0x0004 first. However, |
| 146 // the spec does not mention how will this kind of conflict being resolved. |
| 147 // |
| 148 // For the worst case (having 65536 continuous unicode and we use every other |
| 149 // one of them), the possible savings by aggressive optimization is 416KB |
| 150 // pre-compressed and does not provide enough motivation for implementation. |
| 151 void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, |
| 152 const SkPDFGlyphSet* subset, |
| 153 SkDynamicMemoryWStream* cmap, |
| 154 bool multiByteGlyphs, |
| 155 SkGlyphID firstGlyphID, |
| 156 SkGlyphID lastGlyphID) { |
| 157 if (glyphToUnicode.isEmpty()) { |
| 158 return; |
| 159 } |
| 160 int glyphOffset = 0; |
| 161 if (!multiByteGlyphs) { |
| 162 glyphOffset = firstGlyphID - 1; |
| 163 } |
| 164 |
| 165 SkTDArray<BFChar> bfcharEntries; |
| 166 SkTDArray<BFRange> bfrangeEntries; |
| 167 |
| 168 BFRange currentRangeEntry = {0, 0, 0}; |
| 169 bool rangeEmpty = true; |
| 170 const int limit = |
| 171 SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; |
| 172 |
| 173 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { |
| 174 bool inSubset = i < limit && |
| 175 (subset == nullptr || subset->has(i + glyphOffset)); |
| 176 if (!rangeEmpty) { |
| 177 // PDF spec requires bfrange not changing the higher byte, |
| 178 // e.g. <1035> <10FF> <2222> is ok, but |
| 179 // <1035> <1100> <2222> is no good |
| 180 bool inRange = |
| 181 i == currentRangeEntry.fEnd + 1 && |
| 182 i >> 8 == currentRangeEntry.fStart >> 8 && |
| 183 i < limit && |
| 184 glyphToUnicode[i + glyphOffset] == |
| 185 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; |
| 186 if (!inSubset || !inRange) { |
| 187 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { |
| 188 bfrangeEntries.push(currentRangeEntry); |
| 189 } else { |
| 190 BFChar* entry = bfcharEntries.append(); |
| 191 entry->fGlyphId = currentRangeEntry.fStart; |
| 192 entry->fUnicode = currentRangeEntry.fUnicode; |
| 193 } |
| 194 rangeEmpty = true; |
| 195 } |
| 196 } |
| 197 if (inSubset) { |
| 198 currentRangeEntry.fEnd = i; |
| 199 if (rangeEmpty) { |
| 200 currentRangeEntry.fStart = i; |
| 201 currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; |
| 202 rangeEmpty = false; |
| 203 } |
| 204 } |
| 205 } |
| 206 |
| 207 // The spec requires all bfchar entries for a font must come before bfrange |
| 208 // entries. |
| 209 append_bfchar_section(bfcharEntries, cmap); |
| 210 append_bfrange_section(bfrangeEntries, cmap); |
| 211 } |
| 212 |
| 213 sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( |
| 214 const SkTDArray<SkUnichar>& glyphToUnicode, |
| 215 const SkPDFGlyphSet* subset, |
| 216 bool multiByteGlyphs, |
| 217 SkGlyphID firstGlyphID, |
| 218 SkGlyphID lastGlyphID) { |
| 219 SkDynamicMemoryWStream cmap; |
| 220 if (multiByteGlyphs) { |
| 221 append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); |
| 222 } else { |
| 223 append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); |
| 224 } |
| 225 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, |
| 226 firstGlyphID, lastGlyphID); |
| 227 append_cmap_footer(&cmap); |
| 228 return sk_make_sp<SkPDFStream>( |
| 229 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); |
| 230 } |
OLD | NEW |