OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2013 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #include "SkPdfConfig.h" | |
9 #include "SkPdfDiffEncoder.h" | |
10 #include "SkPdfNativeObject.h" | |
11 #include "SkPdfNativeTokenizer.h" | |
12 #include "SkPdfUtils.h" | |
13 | |
14 // TODO(edisonn): mac builder does not find the header ... but from headers is o
k | |
15 //#include "SkPdfStreamCommonDictionary_autogen.h" | |
16 //#include "SkPdfImageDictionary_autogen.h" | |
17 #include "SkPdfHeaders_autogen.h" | |
18 | |
19 | |
20 // TODO(edisonn): Perf, Make this function run faster. | |
21 // There could be 0s between start and end. | |
22 // needle will not contain 0s. | |
23 static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { | |
24 size_t needleLen = strlen(needle); | |
25 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needl
eLen == hayEnd)) && | |
26 strncmp(hayStart, needle, needleLen) == 0) { | |
27 return hayStart; | |
28 } | |
29 | |
30 hayStart++; | |
31 | |
32 while (hayStart < hayEnd) { | |
33 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) && | |
34 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || | |
35 (hayStart+needleLen == hayEnd)) && | |
36 strncmp(hayStart, needle, needleLen) == 0) { | |
37 return hayStart; | |
38 } | |
39 hayStart++; | |
40 } | |
41 return NULL; | |
42 } | |
43 | |
44 const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsign
ed char* end) { | |
45 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDeli
miter)) { | |
46 TRACE_COMMENT(*start); | |
47 if (*start == kComment_PdfDelimiter) { | |
48 // skip the comment until end of line | |
49 while (start < end && !isPdfEOL(*start)) { | |
50 start++; | |
51 TRACE_COMMENT(*start); | |
52 } | |
53 } else { | |
54 start++; | |
55 } | |
56 } | |
57 return start; | |
58 } | |
59 | |
60 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned ch
ar* end) { | |
61 SkASSERT(!isPdfWhiteSpace(*start)); | |
62 | |
63 if (start < end && isPdfDelimiter(*start)) { | |
64 TRACE_TK(*start); | |
65 start++; | |
66 return start; | |
67 } | |
68 | |
69 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { | |
70 TRACE_TK(*start); | |
71 start++; | |
72 } | |
73 return start; | |
74 } | |
75 | |
76 // The parsing should end with a ]. | |
77 static const unsigned char* readArray(const unsigned char* start, const unsigned
char* end, | |
78 SkPdfNativeObject* array, | |
79 SkPdfAllocator* allocator, SkPdfNativeDoc*
doc) { | |
80 SkPdfNativeObject::makeEmptyArray(array); | |
81 // PUT_TRACK_STREAM(array, start, start) | |
82 | |
83 if (allocator == NULL) { | |
84 // TODO(edisonn): report/warning error/assert | |
85 return end; | |
86 } | |
87 | |
88 while (start < end) { | |
89 // skip white spaces | |
90 start = skipPdfWhiteSpaces(start, end); | |
91 | |
92 const unsigned char* endOfToken = endOfPdfToken(start, end); | |
93 | |
94 if (endOfToken == start) { | |
95 // TODO(edisonn): report error in pdf file (end of stream with ] for
end of aray | |
96 return start; | |
97 } | |
98 | |
99 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit
er) { | |
100 return endOfToken; | |
101 } | |
102 | |
103 SkPdfNativeObject* newObj = allocator->allocObject(); | |
104 start = nextObject(start, end, newObj, allocator, doc); | |
105 // TODO(edisonn): perf/memory: put the variables on the stack, and flush
them on the array | |
106 // only when we are sure they are not references! | |
107 if (newObj->isKeywordReference() && array->size() >= 2 && | |
108 array->objAtAIndex(SkToInt(array->size() - 1))->isInteger() && | |
109 array->objAtAIndex(SkToInt(array->size() - 2))->isInteger()) { | |
110 SkPdfNativeObject* gen = array->removeLastInArray(); | |
111 SkPdfNativeObject* id = array->removeLastInArray(); | |
112 | |
113 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(
), | |
114 (unsigned int)gen->intValue
(), newObj); | |
115 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now | |
116 } | |
117 array->appendInArray(newObj); | |
118 } | |
119 // TODO(edisonn): report not reached, we should never get here | |
120 // TODO(edisonn): there might be a bug here, enable an assert and run it on
files | |
121 // or it might be that the files were actually corrupted | |
122 return start; | |
123 } | |
124 | |
125 static const unsigned char* readString(const unsigned char* start, const unsigne
d char* end, | |
126 unsigned char* out) { | |
127 const unsigned char* in = start; | |
128 bool hasOut = (out != NULL); | |
129 | |
130 int openRoundBrackets = 1; | |
131 while (in < end) { | |
132 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter); | |
133 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter); | |
134 if (openRoundBrackets == 0) { | |
135 in++; // consumed ) | |
136 break; | |
137 } | |
138 | |
139 if (*in == kEscape_PdfSpecial) { | |
140 if (in + 1 < end) { | |
141 switch (in[1]) { | |
142 case 'n': | |
143 if (hasOut) { *out = kLF_PdfWhiteSpace; } | |
144 out++; | |
145 in += 2; | |
146 break; | |
147 | |
148 case 'r': | |
149 if (hasOut) { *out = kCR_PdfWhiteSpace; } | |
150 out++; | |
151 in += 2; | |
152 break; | |
153 | |
154 case 't': | |
155 if (hasOut) { *out = kHT_PdfWhiteSpace; } | |
156 out++; | |
157 in += 2; | |
158 break; | |
159 | |
160 case 'b': | |
161 // TODO(edisonn): any special meaning to backspace? | |
162 if (hasOut) { *out = kBackspace_PdfSpecial; } | |
163 out++; | |
164 in += 2; | |
165 break; | |
166 | |
167 case 'f': | |
168 if (hasOut) { *out = kFF_PdfWhiteSpace; } | |
169 out++; | |
170 in += 2; | |
171 break; | |
172 | |
173 case kOpenedRoundBracket_PdfDelimiter: | |
174 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; } | |
175 out++; | |
176 in += 2; | |
177 break; | |
178 | |
179 case kClosedRoundBracket_PdfDelimiter: | |
180 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; } | |
181 out++; | |
182 in += 2; | |
183 break; | |
184 | |
185 case kEscape_PdfSpecial: | |
186 if (hasOut) { *out = kEscape_PdfSpecial; } | |
187 out++; | |
188 in += 2; | |
189 break; | |
190 | |
191 case '0': | |
192 case '1': | |
193 case '2': | |
194 case '3': | |
195 case '4': | |
196 case '5': | |
197 case '6': | |
198 case '7': { | |
199 //read octals | |
200 in++; // consume backslash | |
201 | |
202 int code = 0; | |
203 int i = 0; | |
204 while (in < end && *in >= '0' && *in < '8') { | |
205 code = (code << 3) + ((*in) - '0'); // code * 8
+ d | |
206 i++; | |
207 in++; | |
208 if (i == 3) { | |
209 if (hasOut) { *out = code & 0xff; } | |
210 out++; | |
211 i = 0; | |
212 } | |
213 } | |
214 if (i > 0) { | |
215 if (hasOut) { *out = code & 0xff; } | |
216 out++; | |
217 } | |
218 } | |
219 break; | |
220 | |
221 default: | |
222 // Per spec, backslash is ignored if escaped ch is unkno
wn | |
223 in++; | |
224 break; | |
225 } | |
226 } else { | |
227 in++; | |
228 } | |
229 } else { | |
230 if (hasOut) { *out = *in; } | |
231 in++; | |
232 out++; | |
233 } | |
234 } | |
235 | |
236 if (hasOut) { | |
237 return in; // consumed already ) at the end of the string | |
238 } else { | |
239 // return where the string would end if we reuse the string | |
240 return start + (out - (const unsigned char*)NULL); | |
241 } | |
242 } | |
243 | |
244 static size_t readStringLength(const unsigned char* start, const unsigned char*
end) { | |
245 return readString(start, end, NULL) - start; | |
246 } | |
247 | |
248 static const unsigned char* readString(const unsigned char* start, const unsigne
d char* end, | |
249 SkPdfNativeObject* str, SkPdfAllocator* a
llocator) { | |
250 if (!allocator) { | |
251 // TODO(edisonn): report error/warn/assert | |
252 return end; | |
253 } | |
254 | |
255 size_t outLength = readStringLength(start, end); | |
256 unsigned char* out = (unsigned char*)allocator->alloc(outLength); | |
257 const unsigned char* now = readString(start, end, out); | |
258 SkPdfNativeObject::makeString(out, out + outLength, str); | |
259 // PUT_TRACK_STREAM(str, start, now) | |
260 TRACE_STRING(out, out + outLength); | |
261 return now; // consumed already ) at the end of the string | |
262 } | |
263 | |
264 static const unsigned char* readHexString(const unsigned char* start, const unsi
gned char* end, | |
265 unsigned char* out) { | |
266 bool hasOut = (out != NULL); | |
267 const unsigned char* in = start; | |
268 | |
269 unsigned char code = 0; | |
270 | |
271 while (in < end) { | |
272 while (in < end && isPdfWhiteSpace(*in)) { | |
273 in++; | |
274 } | |
275 | |
276 if (*in == kClosedInequityBracket_PdfDelimiter) { | |
277 in++; // consume > | |
278 // normal exit | |
279 break; | |
280 } | |
281 | |
282 if (in >= end) { | |
283 // end too soon | |
284 break; | |
285 } | |
286 | |
287 switch (*in) { | |
288 case '0': | |
289 case '1': | |
290 case '2': | |
291 case '3': | |
292 case '4': | |
293 case '5': | |
294 case '6': | |
295 case '7': | |
296 case '8': | |
297 case '9': | |
298 code = (*in - '0') << 4; | |
299 break; | |
300 | |
301 case 'a': | |
302 case 'b': | |
303 case 'c': | |
304 case 'd': | |
305 case 'e': | |
306 case 'f': | |
307 code = (*in - 'a' + 10) << 4; | |
308 break; | |
309 | |
310 case 'A': | |
311 case 'B': | |
312 case 'C': | |
313 case 'D': | |
314 case 'E': | |
315 case 'F': | |
316 code = (*in - 'A' + 10) << 4; | |
317 break; | |
318 | |
319 // TODO(edisonn): spec does not say how to handle this error | |
320 default: | |
321 break; | |
322 } | |
323 | |
324 in++; // advance | |
325 | |
326 while (in < end && isPdfWhiteSpace(*in)) { | |
327 in++; | |
328 } | |
329 | |
330 // TODO(edisonn): report error | |
331 if (in >= end) { | |
332 if (hasOut) { *out = code; } | |
333 out++; | |
334 break; | |
335 } | |
336 | |
337 if (*in == kClosedInequityBracket_PdfDelimiter) { | |
338 if (hasOut) { *out = code; } | |
339 out++; | |
340 in++; | |
341 break; | |
342 } | |
343 | |
344 switch (*in) { | |
345 case '0': | |
346 case '1': | |
347 case '2': | |
348 case '3': | |
349 case '4': | |
350 case '5': | |
351 case '6': | |
352 case '7': | |
353 case '8': | |
354 case '9': | |
355 code += (*in - '0'); | |
356 break; | |
357 | |
358 case 'a': | |
359 case 'b': | |
360 case 'c': | |
361 case 'd': | |
362 case 'e': | |
363 case 'f': | |
364 code += (*in - 'a' + 10); | |
365 break; | |
366 | |
367 case 'A': | |
368 case 'B': | |
369 case 'C': | |
370 case 'D': | |
371 case 'E': | |
372 case 'F': | |
373 code += (*in - 'A' + 10); | |
374 break; | |
375 | |
376 // TODO(edisonn): spec does not say how to handle this error | |
377 default: | |
378 break; | |
379 } | |
380 | |
381 if (hasOut) { *out = code; } | |
382 out++; | |
383 in++; | |
384 } | |
385 | |
386 if (hasOut) { | |
387 return in; // consumed already ) at the end of the string | |
388 } else { | |
389 // return where the string would end if we reuse the string | |
390 return start + (out - (const unsigned char*)NULL); | |
391 } | |
392 } | |
393 | |
394 static size_t readHexStringLength(const unsigned char* start, const unsigned cha
r* end) { | |
395 return readHexString(start, end, NULL) - start; | |
396 } | |
397 | |
398 static const unsigned char* readHexString(const unsigned char* start, const unsi
gned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) { | |
399 if (!allocator) { | |
400 // TODO(edisonn): report error/warn/assert | |
401 return end; | |
402 } | |
403 size_t outLength = readHexStringLength(start, end); | |
404 unsigned char* out = (unsigned char*)allocator->alloc(outLength); | |
405 const unsigned char* now = readHexString(start, end, out); | |
406 SkPdfNativeObject::makeHexString(out, out + outLength, str); | |
407 // str PUT_TRACK_STREAM(start, now) | |
408 TRACE_HEXSTRING(out, out + outLength); | |
409 return now; // consumed already > at the end of the string | |
410 } | |
411 | |
412 // TODO(edisonn): add version parameter, before PDF 1.2 name could not have spec
ial characters. | |
413 static const unsigned char* readName(const unsigned char* start, const unsigned
char* end, | |
414 unsigned char* out) { | |
415 bool hasOut = (out != NULL); | |
416 const unsigned char* in = start; | |
417 | |
418 unsigned char code = 0; | |
419 | |
420 while (in < end) { | |
421 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) { | |
422 break; | |
423 } | |
424 | |
425 if (*in == '#' && in + 2 < end) { | |
426 in++; | |
427 switch (*in) { | |
428 case '0': | |
429 case '1': | |
430 case '2': | |
431 case '3': | |
432 case '4': | |
433 case '5': | |
434 case '6': | |
435 case '7': | |
436 case '8': | |
437 case '9': | |
438 code = (*in - '0') << 4; | |
439 break; | |
440 | |
441 case 'a': | |
442 case 'b': | |
443 case 'c': | |
444 case 'd': | |
445 case 'e': | |
446 case 'f': | |
447 code = (*in - 'a' + 10) << 4; | |
448 break; | |
449 | |
450 case 'A': | |
451 case 'B': | |
452 case 'C': | |
453 case 'D': | |
454 case 'E': | |
455 case 'F': | |
456 code = (*in - 'A' + 10) << 4; | |
457 break; | |
458 | |
459 // TODO(edisonn): spec does not say how to handle this error | |
460 default: | |
461 break; | |
462 } | |
463 | |
464 in++; // advance | |
465 | |
466 switch (*in) { | |
467 case '0': | |
468 case '1': | |
469 case '2': | |
470 case '3': | |
471 case '4': | |
472 case '5': | |
473 case '6': | |
474 case '7': | |
475 case '8': | |
476 case '9': | |
477 code += (*in - '0'); | |
478 break; | |
479 | |
480 case 'a': | |
481 case 'b': | |
482 case 'c': | |
483 case 'd': | |
484 case 'e': | |
485 case 'f': | |
486 code += (*in - 'a' + 10); | |
487 break; | |
488 | |
489 case 'A': | |
490 case 'B': | |
491 case 'C': | |
492 case 'D': | |
493 case 'E': | |
494 case 'F': | |
495 code += (*in - 'A' + 10); | |
496 break; | |
497 | |
498 // TODO(edisonn): spec does not say how to handle this error | |
499 default: | |
500 break; | |
501 } | |
502 | |
503 if (hasOut) { *out = code; } | |
504 out++; | |
505 in++; | |
506 } else { | |
507 if (hasOut) { *out = *in; } | |
508 out++; | |
509 in++; | |
510 } | |
511 } | |
512 | |
513 if (hasOut) { | |
514 return in; // consumed already ) at the end of the string | |
515 } else { | |
516 // return where the string would end if we reuse the string | |
517 return start + (out - (const unsigned char*)NULL); | |
518 } | |
519 } | |
520 | |
521 static size_t readNameLength(const unsigned char* start, const unsigned char* en
d) { | |
522 return readName(start, end, NULL) - start; | |
523 } | |
524 | |
525 static const unsigned char* readName(const unsigned char* start, const unsigned
char* end, | |
526 SkPdfNativeObject* name, SkPdfAllocator* al
locator) { | |
527 if (!allocator) { | |
528 // TODO(edisonn): report error/warn/assert | |
529 return end; | |
530 } | |
531 size_t outLength = readNameLength(start, end); | |
532 unsigned char* out = (unsigned char*)allocator->alloc(outLength); | |
533 const unsigned char* now = readName(start, end, out); | |
534 SkPdfNativeObject::makeName(out, out + outLength, name); | |
535 //PUT_TRACK_STREAM(start, now) | |
536 TRACE_NAME(out, out + outLength); | |
537 return now; | |
538 } | |
539 | |
540 // TODO(edisonn): pdf spec let Length to be an indirect object define after the
stream | |
541 // that makes for an interesting scenario, where the stream itself contains ends
tream, together | |
542 // with a reference object with the length, but the real length object would be
somewhere else | |
543 // it could confuse the parser | |
544 /*example: | |
545 | |
546 7 0 obj | |
547 << /length 8 0 R>> | |
548 stream | |
549 ............... | |
550 endstream | |
551 8 0 obj #we are in stream actually, not a real object | |
552 << 10 >> #we are in stream actually, not a real object | |
553 endobj | |
554 endstream | |
555 8 0 obj #real obj | |
556 << 100 >> #real obj | |
557 endobj | |
558 and it could get worse, with multiple object like this | |
559 */ | |
560 | |
561 // right now implement the silly algorithm that assumes endstream is finishing t
he stream | |
562 | |
563 static const unsigned char* readStream(const unsigned char* start, const unsigne
d char* end, | |
564 SkPdfNativeObject* dict, SkPdfNativeDoc*
doc) { | |
565 start = skipPdfWhiteSpaces(start, end); | |
566 if (!( start[0] == 's' && | |
567 start[1] == 't' && | |
568 start[2] == 'r' && | |
569 start[3] == 'e' && | |
570 start[4] == 'a' && | |
571 start[5] == 'm')) { | |
572 // no stream. return. | |
573 return start; | |
574 } | |
575 | |
576 start += 6; // strlen("stream") | |
577 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { | |
578 start += 2; | |
579 } else if (start[0] == kLF_PdfWhiteSpace) { | |
580 start += 1; | |
581 } else if (isPdfWhiteSpace(start[0])) { | |
582 start += 1; | |
583 } else { | |
584 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ? | |
585 } | |
586 | |
587 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; | |
588 // TODO(edisonn): load Length | |
589 int64_t length = -1; | |
590 | |
591 // TODO(edisonn): very basic implementation | |
592 if (stream->has_Length() && stream->Length(doc) > 0) { | |
593 length = stream->Length(doc); | |
594 } | |
595 | |
596 // TODO(edisonn): load external streams | |
597 // TODO(edisonn): look at the last filter, to determine how to deal with pos
sible parsing | |
598 // issues. The last filter can have special rules to terminate a stream, whi
ch we could | |
599 // use to determine end of stream. | |
600 | |
601 if (length >= 0) { | |
602 const unsigned char* endstream = start + length; | |
603 | |
604 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpa
ce) { | |
605 endstream += 2; | |
606 } else if (endstream[0] == kLF_PdfWhiteSpace) { | |
607 endstream += 1; | |
608 } | |
609 | |
610 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) !=
0) { | |
611 length = -1; | |
612 } | |
613 } | |
614 | |
615 if (length < 0) { | |
616 // scan the buffer, until we find first endstream | |
617 // TODO(edisonn): all buffers must have a 0 at the end now, | |
618 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)s
tart, (char*)end, | |
619 "endstre
am"); | |
620 | |
621 if (endstream) { | |
622 length = endstream - start; | |
623 if (*(endstream-1) == kLF_PdfWhiteSpace) length--; | |
624 if (*(endstream-2) == kCR_PdfWhiteSpace) length--; | |
625 } | |
626 } | |
627 if (length >= 0) { | |
628 const unsigned char* endstream = start + length; | |
629 | |
630 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpa
ce) { | |
631 endstream += 2; | |
632 } else if (endstream[0] == kLF_PdfWhiteSpace) { | |
633 endstream += 1; | |
634 } | |
635 | |
636 // TODO(edisonn): verify the next bytes are "endstream" | |
637 | |
638 endstream += strlen("endstream"); | |
639 // TODO(edisonn): Assert? report error/warning? | |
640 dict->addStream(start, (size_t)length); | |
641 return endstream; | |
642 } | |
643 return start; | |
644 } | |
645 | |
646 static const unsigned char* readInlineImageStream(const unsigned char* start, | |
647 const unsigned char* end, | |
648 SkPdfImageDictionary* inlineIm
age, | |
649 SkPdfNativeDoc* doc) { | |
650 // We already processed ID keyword, and we should be positioned immediately
after it | |
651 | |
652 // TODO(edisonn): security: either make all streams to have extra 2 bytes at
the end, | |
653 // instead of this if. | |
654 //if (end - start <= 2) { | |
655 // // TODO(edisonn): warning? | |
656 // return end; // but can we have a pixel image encoded in 1-2 bytes? | |
657 //} | |
658 | |
659 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { | |
660 start += 2; | |
661 } else if (start[0] == kLF_PdfWhiteSpace) { | |
662 start += 1; | |
663 } else if (isPdfWhiteSpace(start[0])) { | |
664 start += 1; | |
665 } else { | |
666 SkASSERT(isPdfDelimiter(start[0])); | |
667 // TODO(edisonn): warning? | |
668 } | |
669 | |
670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start
, (char*)end, "EI"); | |
671 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strl
en("EI") | |
672 | |
673 if (endstream) { | |
674 size_t length = endstream - start; | |
675 if (*(endstream-1) == kLF_PdfWhiteSpace) length--; | |
676 if (*(endstream-2) == kCR_PdfWhiteSpace) length--; | |
677 inlineImage->addStream(start, (size_t)length); | |
678 } else { | |
679 // TODO(edisonn): report error in inline image stream (ID-EI) section | |
680 // TODO(edisonn): based on filter, try to ignore a missing EI, and read
data properly | |
681 return end; | |
682 } | |
683 return endEI; | |
684 } | |
685 | |
686 static const unsigned char* readDictionary(const unsigned char* start, const uns
igned char* end, | |
687 SkPdfNativeObject* dict, | |
688 SkPdfAllocator* allocator, SkPdfNativ
eDoc* doc) { | |
689 if (allocator == NULL) { | |
690 // TODO(edisonn): report/warning error | |
691 return end; | |
692 } | |
693 SkPdfNativeObject::makeEmptyDictionary(dict); | |
694 // PUT_TRACK_STREAM(dict, start, start) | |
695 | |
696 start = skipPdfWhiteSpaces(start, end); | |
697 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them
after set. | |
698 | |
699 while (start < end && *start == kNamed_PdfDelimiter) { | |
700 SkPdfNativeObject key; | |
701 //*start = '\0'; | |
702 start++; | |
703 start = readName(start, end, &key, &tmpStorage); | |
704 start = skipPdfWhiteSpaces(start, end); | |
705 | |
706 if (start < end) { | |
707 SkPdfNativeObject* value = allocator->allocObject(); | |
708 start = nextObject(start, end, value, allocator, doc); | |
709 | |
710 start = skipPdfWhiteSpaces(start, end); | |
711 | |
712 if (start < end) { | |
713 // We should have an indirect reference | |
714 if (isPdfDigit(*start)) { | |
715 SkPdfNativeObject generation; | |
716 start = nextObject(start, end, &generation, allocator, doc); | |
717 | |
718 SkPdfNativeObject keywordR; | |
719 start = nextObject(start, end, &keywordR, allocator, doc); | |
720 | |
721 if (value->isInteger() && generation.isInteger() && | |
722 keywordR.isKeywordReference()) { | |
723 int64_t id = value->intValue(); | |
724 SkPdfNativeObject::resetAndMakeReference( | |
725 (unsigned int)id, | |
726 (unsigned int)generation.intValue(), | |
727 value); | |
728 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation) | |
729 dict->set(&key, value); | |
730 } else { | |
731 // TODO(edisonn) error?, ignore it for now. | |
732 dict->set(&key, value); | |
733 } | |
734 } else { | |
735 // next elem is not a digit, but it might not be / either! | |
736 dict->set(&key, value); | |
737 } | |
738 } else { | |
739 // /key >> | |
740 dict->set(&key, value); | |
741 return end; | |
742 } | |
743 start = skipPdfWhiteSpaces(start, end); | |
744 } else { | |
745 dict->set(&key, &SkPdfNativeObject::kNull); | |
746 return end; | |
747 } | |
748 } | |
749 | |
750 // now we should expect >> | |
751 start = skipPdfWhiteSpaces(start, end); | |
752 if (*start != kClosedInequityBracket_PdfDelimiter) { | |
753 // TODO(edisonn): report/warning | |
754 } | |
755 | |
756 start++; // skip > | |
757 if (*start != kClosedInequityBracket_PdfDelimiter) { | |
758 // TODO(edisonn): report/warning | |
759 } | |
760 | |
761 start++; // skip > | |
762 | |
763 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start); | |
764 | |
765 start = readStream(start, end, dict, doc); | |
766 | |
767 return start; | |
768 } | |
769 | |
770 const unsigned char* nextObject(const unsigned char* start, const unsigned char*
end, | |
771 SkPdfNativeObject* token, | |
772 SkPdfAllocator* allocator, SkPdfNativeDoc* doc)
{ | |
773 const unsigned char* current; | |
774 | |
775 // skip white spaces | |
776 start = skipPdfWhiteSpaces(start, end); | |
777 | |
778 if (start >= end) { | |
779 return end; | |
780 } | |
781 | |
782 current = endOfPdfToken(start, end); | |
783 | |
784 // no token, len would be 0 | |
785 if (current == start || current == end) { | |
786 return end; | |
787 } | |
788 | |
789 size_t tokenLen = current - start; | |
790 | |
791 if (tokenLen == 1) { | |
792 // start array | |
793 switch (*start) { | |
794 case kOpenedSquareBracket_PdfDelimiter: | |
795 return readArray(current, end, token, allocator, doc); | |
796 | |
797 case kOpenedRoundBracket_PdfDelimiter: | |
798 return readString(start + 1, end, token, allocator); | |
799 | |
800 case kOpenedInequityBracket_PdfDelimiter: | |
801 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel
imiter) { | |
802 // TODO(edisonn): pass here the length somehow? | |
803 return readDictionary(start + 2, end, token, allocator, doc)
; // skip << | |
804 } else { | |
805 return readHexString(start + 1, end, token, allocator); //
skip < | |
806 } | |
807 | |
808 case kNamed_PdfDelimiter: | |
809 return readName(start + 1, end, token, allocator); | |
810 | |
811 // TODO(edisonn): what to do curly brackets? | |
812 case kOpenedCurlyBracket_PdfDelimiter: | |
813 default: | |
814 break; | |
815 } | |
816 | |
817 SkASSERT(!isPdfWhiteSpace(*start)); | |
818 if (isPdfDelimiter(*start)) { | |
819 // TODO(edisonn): how unexpected stream ] } > ) will be handled? | |
820 // for now ignore, and it will become a keyword to be ignored | |
821 } | |
822 } | |
823 | |
824 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' &
& start[3] == 'l') { | |
825 SkPdfNativeObject::makeNull(token); | |
826 // PUT_TRACK_STREAM(start, start + 4) | |
827 return current; | |
828 } | |
829 | |
830 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' &
& start[3] == 'e') { | |
831 SkPdfNativeObject::makeBoolean(true, token); | |
832 // PUT_TRACK_STREAM(start, start + 4) | |
833 return current; | |
834 } | |
835 | |
836 // TODO(edisonn): again, make all buffers have 5 extra bytes | |
837 if (tokenLen == 5 && start[0] == 'f' && | |
838 start[1] == 'a' && | |
839 start[2] == 'l' && | |
840 start[3] == 's' && | |
841 start[4] == 'e') { | |
842 SkPdfNativeObject::makeBoolean(false, token); | |
843 // PUT_TRACK_STREAM(start, start + 5) | |
844 return current; | |
845 } | |
846 | |
847 if (isPdfNumeric(*start)) { | |
848 SkPdfNativeObject::makeNumeric(start, current, token); | |
849 // PUT_TRACK_STREAM(start, current) | |
850 } else { | |
851 SkPdfNativeObject::makeKeyword(start, current, token); | |
852 // PUT_TRACK_STREAM(start, current) | |
853 } | |
854 return current; | |
855 } | |
856 | |
857 SkPdfNativeObject* SkPdfAllocator::allocBlock() { | |
858 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject); | |
859 return new SkPdfNativeObject[BUFFER_SIZE]; | |
860 } | |
861 | |
862 SkPdfAllocator::~SkPdfAllocator() { | |
863 for (int i = 0 ; i < fHandles.count(); i++) { | |
864 free(fHandles[i]); | |
865 } | |
866 for (int i = 0 ; i < fHistory.count(); i++) { | |
867 for (int j = 0 ; j < BUFFER_SIZE; j++) { | |
868 fHistory[i][j].reset(); | |
869 } | |
870 delete[] fHistory[i]; | |
871 } | |
872 for (int j = 0 ; j < BUFFER_SIZE; j++) { | |
873 fCurrent[j].reset(); | |
874 } | |
875 delete[] fCurrent; | |
876 } | |
877 | |
878 SkPdfNativeObject* SkPdfAllocator::allocObject() { | |
879 if (fCurrentUsed >= BUFFER_SIZE) { | |
880 fHistory.push(fCurrent); | |
881 fCurrent = allocBlock(); | |
882 fCurrentUsed = 0; | |
883 fSizeInBytes += sizeof(SkPdfNativeObject*); | |
884 } | |
885 fCurrentUsed++; | |
886 return &fCurrent[fCurrentUsed - 1]; | |
887 } | |
888 | |
889 // TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache t
he result, | |
890 // so there is no need of a second pass | |
891 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, | |
892 SkPdfAllocator* allocator, | |
893 SkPdfNativeDoc* doc) | |
894 : fDoc(doc) | |
895 , fAllocator(allocator) | |
896 , fUncompressedStream(NULL) | |
897 , fUncompressedStreamEnd(NULL) | |
898 , fEmpty(false) | |
899 , fHasPutBack(false) { | |
900 const unsigned char* buffer = NULL; | |
901 size_t len = 0; | |
902 objWithStream->GetFilteredStreamRef(&buffer, &len); | |
903 // TODO(edisonn): really bad hack, find end of object (endobj might be in a
comment!) | |
904 // we need to do now for perf, and our generated pdfs do not have comments, | |
905 // but we need to remove this hack for pdfs in the wild | |
906 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); | |
907 if (endobj) { | |
908 len = endobj - (char*)buffer + strlen("endobj"); | |
909 } | |
910 fUncompressedStreamStart = fUncompressedStream = buffer; | |
911 fUncompressedStreamEnd = fUncompressedStream + len; | |
912 } | |
913 | |
914 SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, | |
915 SkPdfAllocator* allocator, | |
916 SkPdfNativeDoc* doc) : fDoc(doc) | |
917 , fAllocator(all
ocator) | |
918 , fEmpty(false) | |
919 , fHasPutBack(fa
lse) { | |
920 // TODO(edisonn): really bad hack, find end of object (endobj might be in a
comment!) | |
921 // we need to do now for perf, and our generated pdfs do not have comments, | |
922 // but we need to remove this hack for pdfs in the wild | |
923 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); | |
924 if (endobj) { | |
925 len = SkToInt(endobj - (char*)buffer + strlen("endobj")); | |
926 } | |
927 fUncompressedStreamStart = fUncompressedStream = buffer; | |
928 fUncompressedStreamEnd = fUncompressedStream + len; | |
929 } | |
930 | |
931 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { | |
932 } | |
933 | |
934 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { | |
935 #ifdef PDF_TRACE_READ_TOKEN | |
936 static int read_op = 0; | |
937 #endif | |
938 | |
939 token->fKeyword = NULL; | |
940 token->fObject = NULL; | |
941 | |
942 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); | |
943 if (fUncompressedStream >= fUncompressedStreamEnd) { | |
944 fEmpty = true; | |
945 return false; | |
946 } | |
947 | |
948 SkPdfNativeObject obj; | |
949 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd
, &obj, fAllocator, fDoc); | |
950 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart) | |
951 | |
952 // If it is a keyword, we will only get the pointer of the string. | |
953 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) { | |
954 token->fKeyword = obj.c_str(); | |
955 token->fKeywordLength = obj.lenstr(); | |
956 token->fType = kKeyword_TokenType; | |
957 } else { | |
958 SkPdfNativeObject* pobj = fAllocator->allocObject(); | |
959 *pobj = obj; | |
960 token->fObject = pobj; | |
961 token->fType = kObject_TokenType; | |
962 } | |
963 | |
964 #ifdef PDF_TRACE_READ_TOKEN | |
965 read_op++; | |
966 #if 0 | |
967 if (548 == read_op) { | |
968 printf("break;\n"); | |
969 } | |
970 #endif | |
971 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Key
word" : "Object", | |
972 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_
str() : | |
973 token->fObject->toString().c_str()); | |
974 #endif | |
975 | |
976 return true; | |
977 } | |
978 | |
979 void SkPdfNativeTokenizer::PutBack(PdfToken token) { | |
980 SkASSERT(!fHasPutBack); | |
981 fHasPutBack = true; | |
982 fPutBack = token; | |
983 #ifdef PDF_TRACE_READ_TOKEN | |
984 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "
Object", | |
985 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str
() : | |
986 token.fObject->toString().c_str()); | |
987 #endif | |
988 } | |
989 | |
990 bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) { | |
991 if (fHasPutBack) { | |
992 *token = fPutBack; | |
993 fHasPutBack = false; | |
994 #ifdef PDF_TRACE_READ_TOKEN | |
995 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keywor
d" : "Object", | |
996 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength
).c_str() : | |
997 token->fObject->toString().c_str()); | |
998 #endif | |
999 if (writeDiff) { | |
1000 SkPdfDiffEncoder::WriteToFile(token); | |
1001 } | |
1002 return true; | |
1003 } | |
1004 | |
1005 if (fEmpty) { | |
1006 #ifdef PDF_TRACE_READ_TOKEN | |
1007 printf("EMPTY TOKENIZER\n"); | |
1008 #endif | |
1009 return false; | |
1010 } | |
1011 | |
1012 const bool result = readTokenCore(token); | |
1013 if (result && writeDiff) { | |
1014 SkPdfDiffEncoder::WriteToFile(token); | |
1015 } | |
1016 return result; | |
1017 } | |
1018 | |
1019 #define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName) | |
1020 | |
1021 // keys | |
1022 DECLARE_PDF_NAME(BitsPerComponent); | |
1023 DECLARE_PDF_NAME(ColorSpace); | |
1024 DECLARE_PDF_NAME(Decode); | |
1025 DECLARE_PDF_NAME(DecodeParms); | |
1026 DECLARE_PDF_NAME(Filter); | |
1027 DECLARE_PDF_NAME(Height); | |
1028 DECLARE_PDF_NAME(ImageMask); | |
1029 DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations? | |
1030 DECLARE_PDF_NAME(Interpolate); | |
1031 DECLARE_PDF_NAME(Width); | |
1032 | |
1033 // values | |
1034 DECLARE_PDF_NAME(DeviceGray); | |
1035 DECLARE_PDF_NAME(DeviceRGB); | |
1036 DECLARE_PDF_NAME(DeviceCMYK); | |
1037 DECLARE_PDF_NAME(Indexed); | |
1038 DECLARE_PDF_NAME(ASCIIHexDecode); | |
1039 DECLARE_PDF_NAME(ASCII85Decode); | |
1040 DECLARE_PDF_NAME(LZWDecode); | |
1041 DECLARE_PDF_NAME(FlateDecode); // PDF 1.2 | |
1042 DECLARE_PDF_NAME(RunLengthDecode); | |
1043 DECLARE_PDF_NAME(CCITTFaxDecode); | |
1044 DECLARE_PDF_NAME(DCTDecode); | |
1045 | |
1046 #define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) re
turn &longName; | |
1047 | |
1048 | |
1049 static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* ke
y) { | |
1050 if (!key || !key->isName()) { | |
1051 return key; | |
1052 } | |
1053 | |
1054 // TODO(edisonn): use autogenerated code! | |
1055 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC); | |
1056 HANDLE_NAME_ABBR(key, ColorSpace, CS); | |
1057 HANDLE_NAME_ABBR(key, Decode, D); | |
1058 HANDLE_NAME_ABBR(key, DecodeParms, DP); | |
1059 HANDLE_NAME_ABBR(key, Filter, F); | |
1060 HANDLE_NAME_ABBR(key, Height, H); | |
1061 HANDLE_NAME_ABBR(key, ImageMask, IM); | |
1062 // HANDLE_NAME_ABBR(key, Intent, ); | |
1063 HANDLE_NAME_ABBR(key, Interpolate, I); | |
1064 HANDLE_NAME_ABBR(key, Width, W); | |
1065 | |
1066 return key; | |
1067 } | |
1068 | |
1069 static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject*
value) { | |
1070 if (!value || !value->isName()) { | |
1071 return value; | |
1072 } | |
1073 | |
1074 // TODO(edisonn): use autogenerated code! | |
1075 HANDLE_NAME_ABBR(value, DeviceGray, G); | |
1076 HANDLE_NAME_ABBR(value, DeviceRGB, RGB); | |
1077 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK); | |
1078 HANDLE_NAME_ABBR(value, Indexed, I); | |
1079 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx); | |
1080 HANDLE_NAME_ABBR(value, ASCII85Decode, A85); | |
1081 HANDLE_NAME_ABBR(value, LZWDecode, LZW); | |
1082 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2) | |
1083 HANDLE_NAME_ABBR(value, RunLengthDecode, RL); | |
1084 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF); | |
1085 HANDLE_NAME_ABBR(value, DCTDecode, DCT); | |
1086 | |
1087 return value; | |
1088 } | |
1089 | |
1090 SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() { | |
1091 // BI already processed | |
1092 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); | |
1093 if (fUncompressedStream >= fUncompressedStreamEnd) { | |
1094 return NULL; | |
1095 } | |
1096 | |
1097 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->alloc
Object(); | |
1098 SkPdfNativeObject::makeEmptyDictionary(inlineImage); | |
1099 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompresse
dStreamStart, | |
1100 // fUncompressedStream - fUncompressedStreamStar
t) | |
1101 | |
1102 while (fUncompressedStream < fUncompressedStreamEnd) { | |
1103 SkPdfNativeObject* key = fAllocator->allocObject(); | |
1104 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStrea
mEnd, key, | |
1105 fAllocator, fDoc); | |
1106 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s | |
1107 | |
1108 if (key->isKeyword() && key->lenstr() == 2 && | |
1109 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID | |
1110 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUn
compressedStreamEnd, | |
1111 inlineImage, fDoc); | |
1112 return inlineImage; | |
1113 } else { | |
1114 SkPdfNativeObject* obj = fAllocator->allocObject(); | |
1115 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedS
treamEnd, obj, | |
1116 fAllocator, fDoc); | |
1117 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
s | |
1118 // TODO(edisonn): perf maybe we should not expand abBreviation like
this | |
1119 inlineImage->set(inlineImageKeyAbbreviationExpand(key), | |
1120 inlineImageValueAbbreviationExpand(obj)); | |
1121 } | |
1122 } | |
1123 // TODO(edisonn): report end of data with inline image without an EI | |
1124 return inlineImage; | |
1125 } | |
OLD | NEW |