Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(400)

Side by Side Diff: src/pdf/SkPDFMakeToUnicodeCmap.cpp

Issue 2322403002: SkPDF: Implement /ActualText to make text extraction correct. (Closed)
Patch Set: asserts, bounds check before read, not after Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pdf/SkPDFDevice.cpp ('k') | src/pdf/SkPDFUtils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 Google Inc. 2 * Copyright 2011 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkPDFMakeToUnicodeCmap.h" 8 #include "SkPDFMakeToUnicodeCmap.h"
9 #include "SkPDFUtils.h" 9 #include "SkPDFUtils.h"
10 #include "SkUtils.h" 10 #include "SkUtils.h"
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 SkUnichar fUnicode; 62 SkUnichar fUnicode;
63 }; 63 };
64 64
65 struct BFRange { 65 struct BFRange {
66 SkGlyphID fStart; 66 SkGlyphID fStart;
67 SkGlyphID fEnd; 67 SkGlyphID fEnd;
68 SkUnichar fUnicode; 68 SkUnichar fUnicode;
69 }; 69 };
70 } // namespace 70 } // namespace
71 71
72 static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
73 SkGlyphID utf16[2] = {0, 0};
74 size_t len = SkUTF16_FromUnichar(utf32, utf16);
75 SkASSERT(len == 1 || len == 2);
76 SkPDFUtils::WriteUInt16BE(wStream, utf16[0]);
77 if (len == 2) {
78 SkPDFUtils::WriteUInt16BE(wStream, utf16[1]);
79 }
80 }
81
82 static void write_glyph(SkDynamicMemoryWStream* cmap, 72 static void write_glyph(SkDynamicMemoryWStream* cmap,
83 bool multiByte, 73 bool multiByte,
84 SkGlyphID gid) { 74 SkGlyphID gid) {
85 if (multiByte) { 75 if (multiByte) {
86 SkPDFUtils::WriteUInt16BE(cmap, gid); 76 SkPDFUtils::WriteUInt16BE(cmap, gid);
87 } else { 77 } else {
88 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); 78 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
89 } 79 }
90 } 80 }
91 81
92 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, 82 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
93 bool multiByte, 83 bool multiByte,
94 SkDynamicMemoryWStream* cmap) { 84 SkDynamicMemoryWStream* cmap) {
95 // PDF spec defines that every bf* list can have at most 100 entries. 85 // PDF spec defines that every bf* list can have at most 100 entries.
96 for (int i = 0; i < bfchar.count(); i += 100) { 86 for (int i = 0; i < bfchar.count(); i += 100) {
97 int count = bfchar.count() - i; 87 int count = bfchar.count() - i;
98 count = SkMin32(count, 100); 88 count = SkMin32(count, 100);
99 cmap->writeDecAsText(count); 89 cmap->writeDecAsText(count);
100 cmap->writeText(" beginbfchar\n"); 90 cmap->writeText(" beginbfchar\n");
101 for (int j = 0; j < count; ++j) { 91 for (int j = 0; j < count; ++j) {
102 cmap->writeText("<"); 92 cmap->writeText("<");
103 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); 93 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
104 cmap->writeText("> <"); 94 cmap->writeText("> <");
105 write_utf16be(cmap, bfchar[i + j].fUnicode); 95 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
106 cmap->writeText(">\n"); 96 cmap->writeText(">\n");
107 } 97 }
108 cmap->writeText("endbfchar\n"); 98 cmap->writeText("endbfchar\n");
109 } 99 }
110 } 100 }
111 101
112 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, 102 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
113 bool multiByte, 103 bool multiByte,
114 SkDynamicMemoryWStream* cmap) { 104 SkDynamicMemoryWStream* cmap) {
115 // PDF spec defines that every bf* list can have at most 100 entries. 105 // PDF spec defines that every bf* list can have at most 100 entries.
116 for (int i = 0; i < bfrange.count(); i += 100) { 106 for (int i = 0; i < bfrange.count(); i += 100) {
117 int count = bfrange.count() - i; 107 int count = bfrange.count() - i;
118 count = SkMin32(count, 100); 108 count = SkMin32(count, 100);
119 cmap->writeDecAsText(count); 109 cmap->writeDecAsText(count);
120 cmap->writeText(" beginbfrange\n"); 110 cmap->writeText(" beginbfrange\n");
121 for (int j = 0; j < count; ++j) { 111 for (int j = 0; j < count; ++j) {
122 cmap->writeText("<"); 112 cmap->writeText("<");
123 write_glyph(cmap, multiByte, bfrange[i + j].fStart); 113 write_glyph(cmap, multiByte, bfrange[i + j].fStart);
124 cmap->writeText("> <"); 114 cmap->writeText("> <");
125 write_glyph(cmap, multiByte, bfrange[i + j].fEnd); 115 write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
126 cmap->writeText("> <"); 116 cmap->writeText("> <");
127 write_utf16be(cmap, bfrange[i + j].fUnicode); 117 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
128 cmap->writeText(">\n"); 118 cmap->writeText(">\n");
129 } 119 }
130 cmap->writeText("endbfrange\n"); 120 cmap->writeText("endbfrange\n");
131 } 121 }
132 } 122 }
133 123
134 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe 124 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
135 // Technote 5014. 125 // Technote 5014.
136 // The function is not static so we can test it in unit tests. 126 // The function is not static so we can test it in unit tests.
137 // 127 //
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 SkGlyphID firstGlyphID, 216 SkGlyphID firstGlyphID,
227 SkGlyphID lastGlyphID) { 217 SkGlyphID lastGlyphID) {
228 SkDynamicMemoryWStream cmap; 218 SkDynamicMemoryWStream cmap;
229 append_tounicode_header(&cmap, multiByteGlyphs); 219 append_tounicode_header(&cmap, multiByteGlyphs);
230 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, 220 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
231 firstGlyphID, lastGlyphID); 221 firstGlyphID, lastGlyphID);
232 append_cmap_footer(&cmap); 222 append_cmap_footer(&cmap);
233 return sk_make_sp<SkPDFStream>( 223 return sk_make_sp<SkPDFStream>(
234 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); 224 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
235 } 225 }
OLDNEW
« no previous file with comments | « src/pdf/SkPDFDevice.cpp ('k') | src/pdf/SkPDFUtils.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698