Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: src/pdf/SkPDFMetadata.cpp

Issue 1394263003: SkPDF: Optionally output PDF/A-2b archive format. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: works on windows Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkPDFMetadata.h"
9 #include "SkMD5.h"
10 #include "SkPDFTypes.h"
11
12 SkPDFMetadata::UUID SkPDFMetadata::uuid() const {
13 SkMD5 md5;
14 const char uuidNamespace[] = "org.skia.pdf\n";
tomhudson 2015/10/09 15:31:37 The require newline-termination of the namespace?
hal.canary 2015/10/09 19:13:27 // The main requirement is for the UUID to be uniq
15 md5.write(uuidNamespace, strlen(uuidNamespace));
16 SkMSec msec = SkTime::GetMSecs();
17 md5.write(&msec, sizeof(msec));
18 SkTime::DateTime dateTime;
19 SkTime::GetDateTime(&dateTime);
20 md5.write(&dateTime, sizeof(dateTime));
21 if (fCreation) {
22 md5.write(fCreation.get(), sizeof(fCreation));
23 }
24 if (fModified) {
25 md5.write(fModified.get(), sizeof(fModified));
26 }
27 for (const auto& kv : fInfo) {
28 md5.write(kv.fKey.c_str(), kv.fKey.size());
29 md5.write("\037", 1);
30 md5.write(kv.fValue.c_str(), kv.fValue.size());
31 md5.write("\036", 1);
32 }
33 SkMD5::Digest digest;
34 md5.finish(digest);
35 digest.data[6] = (digest.data[6] & 0x0F) | 0x40;
36 digest.data[8] = (digest.data[6] & 0x3F) | 0x80;
tomhudson 2015/10/09 15:31:37 Where do these magic numbers come from, the spec?
hal.canary 2015/10/09 19:13:27 // See RFC 4122, page 6-7.
tomhudson 2015/10/09 19:21:37 Acknowledged.
37 static_assert(sizeof(digest) == sizeof(UUID), "uuid_size");
38 SkPDFMetadata::UUID uuid;
39 memcpy(&uuid, &digest, sizeof(digest));
40 return uuid;
41 }
42
43 SkPDFObject* SkPDFMetadata::CreatePdfId(const UUID& doc, const UUID& instance) {
44 // /ID [ <81b14aafa313db63dbd6f981e49f94f4>
45 // <81b14aafa313db63dbd6f981e49f94f4> ]
46 SkAutoTUnref<SkPDFArray> array(new SkPDFArray);
47 static_assert(sizeof(UUID) == 16, "uuid_size");
48 array->appendString(
49 SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID)));
50 array->appendString(
51 SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID)));
52 return array.detach();
53 }
54
55 static SkString pdf_date(const SkTime::DateTime& dt) {
56 int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes);
57 char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-';
58 int timeZoneHours = SkTAbs(timeZoneMinutes) / 60;
59 timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60;
60 return SkStringPrintf(
61 "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'",
62 static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth),
63 static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour),
64 static_cast<unsigned>(dt.fMinute),
65 static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours,
66 timeZoneMinutes);
67 }
68
69 SkPDFObject* SkPDFMetadata::createDocumentInformationDict() const {
70 SkAutoTUnref<SkPDFDict> dict(new SkPDFDict);
71 static const char* keys[] = {
72 "Title", "Author", "Subject", "Keywords", "Creator" };
73 for (const char* key : keys) {
74 for (const SkDocument::Attribute& keyValue : fInfo) {
75 if (keyValue.fKey.equals(key)) {
76 dict->insertString(key, keyValue.fValue);
77 }
78 }
79 }
80 dict->insertString("Producer", "Skia/PDF");
81 if (fCreation) {
82 dict->insertString("CreationDate", pdf_date(*fCreation.get()));
83 }
84 if (fModified) {
85 dict->insertString("ModDate", pdf_date(*fModified.get()));
86 }
87 return dict.detach();
88 }
89
90 #ifdef SK_PDF_GENERATE_PDFA
91 // Improvement on SkStringPrintf to allow for arbitrarily long output.
92 // TODO: replace SkStringPrintf.
93 static SkString sk_string_printf(const char* format, ...) {
94 #ifdef SK_BUILD_FOR_WIN
95 va_list args;
96 va_start(args, format);
97 char buffer[1024];
98 int length = _vsnprintf_s(buffer, sizeof(buffer), _TRUNCATE, format, args);
99 va_end(args);
100 if (length >= 0 && length < (int)sizeof(buffer)) {
101 return SkString(buffer, length);
102 }
103 va_start(args, format);
104 length = _vscprintf(format, args);
105 va_end(args);
106
107 SkString string((size_t)length);
108 va_start(args, format);
109 SkDEBUGCODE(int check =)
110 _vsnprintf_s(string.writable_str(), length + 1, _TRUNCATE, format, args);
111 va_end(args);
112 SkASSERT(check == length);
113 SkASSERT(string[length] == '\0');
114 return skstd::move(string);
115 #else // C99/C++11 standard vsnprintf
116 // TODO: When all compilers support this, remove windows-specific code.
tomhudson 2015/10/09 15:31:37 Nit: how far are we from being able to get rid of
hal.canary 2015/10/09 19:13:27 We have C++11 *semantics* everywhere. The version
tomhudson 2015/10/09 19:21:37 Oh, ouch.
117 va_list args;
118 va_start(args, format);
119 char buffer[1024];
120 int length = vsnprintf(buffer, sizeof(buffer), format, args);
121 va_end(args);
122 if (length < 0) {
123 return SkString();
124 }
125 if (length < (int)sizeof(buffer)) {
126 return SkString(buffer, length);
127 }
128 SkString string((size_t)length);
129 va_start(args, format);
130 SkDEBUGCODE(int check =)
131 vsnprintf(string.writable_str(), length + 1, format, args);
132 va_end(args);
133 SkASSERT(check == length);
134 SkASSERT(string[length] == '\0');
135 return skstd::move(string);
136 #endif
137 }
138
139 static const SkString get(const SkTArray<SkDocument::Attribute>& info,
140 const char* key) {
141 for (const auto& keyValue : info) {
142 if (keyValue.fKey.equals(key)) {
143 return keyValue.fValue;
144 }
145 }
146 return SkString();
147 }
148
149 #define HEXIFY(INPUT_PTR, OUTPUT_PTR, HEX_STRING, BYTE_COUNT) \
150 do { \
151 for (int i = 0; i < (BYTE_COUNT); ++i) { \
152 uint8_t value = *(INPUT_PTR)++; \
153 *(OUTPUT_PTR)++ = (HEX_STRING)[value >> 4]; \
154 *(OUTPUT_PTR)++ = (HEX_STRING)[value & 0xF]; \
155 } \
156 } while (false)
tomhudson 2015/10/09 15:31:37 Nit: if you already have a for() statement with ev
hal.canary 2015/10/09 19:13:27 It means nothing, but allows me to put a semi-colo
tomhudson 2015/10/09 19:21:37 Acknowledged.
157 static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) {
158 // 8-4-4-4-12
159 char buffer[36]; // [32 + 4]
160 static const char gHex[] = "0123456789abcdef";
161 SkASSERT(strlen(gHex) == 16);
162 char* ptr = buffer;
163 const uint8_t* data = uuid.fData;
164 HEXIFY(data, ptr, gHex, 4);
165 *ptr++ = '-';
166 HEXIFY(data, ptr, gHex, 2);
167 *ptr++ = '-';
168 HEXIFY(data, ptr, gHex, 2);
169 *ptr++ = '-';
170 HEXIFY(data, ptr, gHex, 2);
171 *ptr++ = '-';
172 HEXIFY(data, ptr, gHex, 6);
173 SkASSERT(ptr == buffer + 36);
174 SkASSERT(data == uuid.fData + 16);
175 return SkString(buffer, 36);
176 }
177 #undef HEXIFY
178
179 namespace {
180 class PDFXMLObject : public SkPDFObject {
181 public:
182 PDFXMLObject(SkString xml) : fXML(skstd::move(xml)) {}
183 void emitObject(SkWStream* stream,
184 const SkPDFObjNumMap& omap,
185 const SkPDFSubstituteMap& smap) const override {
186 SkPDFDict dict("Metadata");
187 dict.insertName("Subtype", "XML");
188 dict.insertInt("Length", fXML.size());
189 dict.emitObject(stream, omap, smap);
190 static const char streamBegin[] = " stream\n";
191 stream->write(streamBegin, strlen(streamBegin));
192 // Do not compress this.
tomhudson 2015/10/09 15:31:37 WHY, OH, WHY? (And why is compression even a topi
hal.canary 2015/10/09 19:13:27 The standard requires that a program that does not
tomhudson 2015/10/09 19:21:37 Just trying to understand why the comment was ther
hal.canary 2015/10/09 19:28:49 Comment fixed: // Do not compress this. The
193 stream->write(fXML.c_str(), fXML.size());
194 static const char streamEnd[] = "\nendstream";
195 stream->write(streamEnd, strlen(streamEnd));
196 }
197 private:
198 const SkString fXML;
199 };
200 } // namespace
201
202 static int count_xml_escape_size(const SkString& input) {
203 int extra = 0;
204 for (size_t i = 0; i < input.size(); ++i) {
205 if (input[i] == '&') {
206 extra += 4; // strlen("&amp;") - strlen("&")
207 } else if (input[i] == '<') {
208 extra += 3; // strlen("&lt;") - strlen("<")
tomhudson 2015/10/09 15:31:37 Nit: not obvious why we don't also have to escape
hal.canary 2015/10/09 19:13:27 <dc:description> <rdf:Alt> <rdf:li>
tomhudson 2015/10/09 19:21:37 I think the key question is: XML never has user-ge
hal.canary 2015/10/09 19:28:49 We don't use that aspect of XML here everywhere w
209 }
210 }
211 return extra;
212 }
213
214 const SkString escape_xml(const SkString& input,
215 const char* before = nullptr,
216 const char* after = nullptr) {
217 if (input.size() == 0) {
218 return input;
219 }
220 // "&" --> "&amp;" and "<" --> "&lt;"
221 // text is assumed to be in UTF-8
222 // all strings are xml content, not attribute values.
223 size_t beforeLen = before ? strlen(before) : 0;
224 size_t afterLen = after ? strlen(after) : 0;
225 int extra = count_xml_escape_size(input);
226 SkString output(input.size() + extra + beforeLen + afterLen);
227 char* out = output.writable_str();
228 if (before) {
229 strncpy(out, before, beforeLen);
230 out += beforeLen;
231 }
232 static const char kAmp[] = "&amp;";
233 static const char kLt[] = "&lt;";
234 for (size_t i = 0; i < input.size(); ++i) {
235 if (input[i] == '&') {
236 strncpy(out, kAmp, strlen(kAmp));
237 out += strlen(kAmp);
238 } else if (input[i] == '<') {
239 strncpy(out, kLt, strlen(kLt));
240 out += strlen(kLt);
241 } else {
242 *out++ = input[i];
243 }
244 }
245 if (after) {
246 strncpy(out, after, afterLen);
247 out += afterLen;
248 }
249 // Validate that we haven't written outside of our string.
250 SkASSERT(out == &output.writable_str()[output.size()]);
251 *out = '\0';
252 return skstd::move(output);
253 }
254
255 SkPDFObject* SkPDFMetadata::createXMPObject(const UUID& doc,
256 const UUID& instance) const {
257 // A PDF-1a file should have
258 // the integer value 1 for pdfaid:part and
tomhudson 2015/10/09 15:31:37 Yet you write '2' and 'B' below in templateString?
hal.canary 2015/10/09 19:13:27 The spec was confusing me. But I think I have it
259 // the value "A" for pdfaid:conformance.
260 static const char templateString[] =
261 "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
262 "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n"
263 " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, 2012/08/23-13:03:03\">\n"
264 "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
265 "<rdf:Description rdf:about=\"\"\n"
266 " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"
267 " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
268 " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n"
269 " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"
270 " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n"
271 "<pdfaid:part>2</pdfaid:part>\n"
272 "<pdfaid:conformance>B</pdfaid:conformance>\n"
273 "%s" // ModifyDate
274 "%s" // CreateDate
275 "%s" // MetadataDate
276 "%s" // xmp:CreatorTool
277 "<dc:format>application/pdf</dc:format>\n"
278 "%s" // dc:title
279 "%s" // dc:description
280 "%s" // author
281 "%s" // keywords
282 "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n"
283 "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n"
284 "<pdf:Producer>Skia/PDF</pdf:Producer>\n"
285 "%s" // pdf:Keywords
286 "</rdf:Description>\n"
287 "</rdf:RDF>\n"
288 "</x:xmpmeta>\n" // Note: the standard suggests 4k of padding.
289 "<?xpacket end=\"w\"?>\n";
290
291 SkString creationDate;
292 SkString modificationDate;
293 SkString metadataDate;
294 if (fCreation) {
295 SkString tmp;
296 fCreation->toISO8601(&tmp);
297 SkASSERT(0 == count_xml_escape_size(tmp));
298 // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape
299 creationDate = sk_string_printf("<xmp:CreateDate>%s</xmp:CreateDate>\n",
300 tmp.c_str());
301 }
302 if (fModified) {
303 SkString tmp;
304 fModified->toISO8601(&tmp);
305 SkASSERT(0 == count_xml_escape_size(tmp));
306 modificationDate =
307 sk_string_printf("<xmp:ModifyDate>%s</xmp:ModifyDate>\n",
308 tmp.c_str());
309 metadataDate = sk_string_printf("<xmp:MetadataDate>%s</xmp:MetadataDate> \n",
310 tmp.c_str());
311 }
312
313 SkString title = escape_xml(get(fInfo, "Title"),
314 "<dc:title><rdf:Alt><rdf:li>",
315 "</rdf:li></rdf:Alt></dc:title>\n");
316 SkString author = escape_xml(get(fInfo, "Author"),
317 "<dc:creator><rdf:Bag><rdf:li>",
318 "</rdf:li></rdf:Bag></dc:creator>\n");
319 // TODO: in theory, XMP can support multiple authors. Split on a delimiter?
320 SkString subject = escape_xml(get(fInfo, "Subject"),
321 "<dc:description><rdf:Alt><rdf:li>",
322 "</rdf:li></rdf:Alt></dc:description>\n");
323 SkString keywords1 = escape_xml(get(fInfo, "Keywords"),
324 "<dc:subject><rdf:Bag><rdf:li>",
325 "</rdf:li></rdf:Bag></dc:subject>\n");
326 SkString keywords2 = escape_xml(get(fInfo, "Keywords"),
327 "<pdf:Keywords>",
328 "</pdf:Keywords>\n");
329
330 // TODO: in theory, keywords can be a list too.
331 SkString creator = escape_xml(get(fInfo, "Creator"),
332 "<xmp:CreatorTool>", "</xmp:CreatorTool>\n");
333 SkString documentID = uuid_to_string(doc); // no need to escape
334 SkASSERT(0 == count_xml_escape_size(documentID));
335 SkString instanceID = uuid_to_string(instance);
336 SkASSERT(0 == count_xml_escape_size(instanceID));
337 return new PDFXMLObject(sk_string_printf(templateString,
338 modificationDate.c_str(),
339 creationDate.c_str(),
340 metadataDate.c_str(),
341 creator.c_str(),
342 title.c_str(),
343 subject.c_str(),
344 author.c_str(),
345 keywords1.c_str(),
346 documentID.c_str(),
347 instanceID.c_str(),
348 keywords2.c_str()));
349 }
350
351 #endif // SK_PDF_GENERATE_PDFA
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698