OLD | NEW |
1 | 1 |
2 #include "SkPdfNativeTokenizer.h" | 2 #include "SkPdfNativeTokenizer.h" |
3 #include "SkPdfObject.h" | 3 #include "SkPdfObject.h" |
4 #include "SkPdfConfig.h" | 4 #include "SkPdfConfig.h" |
5 | 5 |
6 #include "SkPdfStreamCommonDictionary_autogen.h" | 6 #include "SkPdfStreamCommonDictionary_autogen.h" |
7 | 7 |
8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en
d) { | 8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en
d) { |
9 while (start < end && isPdfWhiteSpace(*start)) { | 9 while (start < end && isPdfWhiteSpace(*start)) { |
10 if (*start == kComment_PdfDelimiter) { | 10 if (*start == kComment_PdfDelimiter) { |
(...skipping 22 matching lines...) Expand all Loading... |
33 return start; | 33 return start; |
34 } | 34 } |
35 | 35 |
36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { | 36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { |
37 start++; | 37 start++; |
38 } | 38 } |
39 return start; | 39 return start; |
40 } | 40 } |
41 | 41 |
42 // last elem has to be ] | 42 // last elem has to be ] |
43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO
bject* array, SkPdfAllocator* allocator) { | 43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO
bject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
44 while (start < end) { | 44 while (start < end) { |
45 // skip white spaces | 45 // skip white spaces |
46 start = skipPdfWhiteSpaces(start, end); | 46 start = skipPdfWhiteSpaces(start, end); |
47 | 47 |
48 unsigned char* endOfToken = endOfPdfToken(start, end); | 48 unsigned char* endOfToken = endOfPdfToken(start, end); |
49 | 49 |
50 if (endOfToken == start) { | 50 if (endOfToken == start) { |
51 // TODO(edisonn): report error in pdf file (end of stream with ] for
end of aray | 51 // TODO(edisonn): report error in pdf file (end of stream with ] for
end of aray |
52 return start; | 52 return start; |
53 } | 53 } |
54 | 54 |
55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit
er) { | 55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit
er) { |
56 return endOfToken; | 56 return endOfToken; |
57 } | 57 } |
58 | 58 |
59 SkPdfObject* newObj = allocator->allocObject(); | 59 SkPdfObject* newObj = allocator->allocObject(); |
60 start = nextObject(start, end, newObj, allocator); | 60 start = nextObject(start, end, newObj, allocator, doc); |
61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush
them on the array only when | 61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush
them on the array only when |
62 // we are sure they are not references! | 62 // we are sure they are not references! |
63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI
ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i
sInteger()) { | 63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI
ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i
sInteger()) { |
64 SkPdfObject* gen = array->removeLastInArray(); | 64 SkPdfObject* gen = array->removeLastInArray(); |
65 SkPdfObject* id = array->removeLastInArray(); | 65 SkPdfObject* id = array->removeLastInArray(); |
66 newObj->reset(); | 66 newObj->reset(); |
67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i
nt)gen->intValue(), newObj); | 67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i
nt)gen->intValue(), newObj); |
68 } | 68 } |
69 array->appendInArray(newObj); | 69 array->appendInArray(newObj); |
70 } | 70 } |
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
436 endstream | 436 endstream |
437 8 0 obj #real obj | 437 8 0 obj #real obj |
438 << 100 >> #real obj | 438 << 100 >> #real obj |
439 endobj | 439 endobj |
440 and it could get worse, with multiple object like this | 440 and it could get worse, with multiple object like this |
441 */ | 441 */ |
442 | 442 |
443 // right now implement the silly algorithm that assumes endstream is finishing t
he stream | 443 // right now implement the silly algorithm that assumes endstream is finishing t
he stream |
444 | 444 |
445 | 445 |
446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf
Object* dict) { | 446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf
Object* dict, SkNativeParsedPDF* doc) { |
447 start = skipPdfWhiteSpaces(start, end); | 447 start = skipPdfWhiteSpaces(start, end); |
448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == '
e' && start[4] == 'a' && start[5] == 'm')) { | 448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == '
e' && start[4] == 'a' && start[5] == 'm')) { |
449 // no stream. return. | 449 // no stream. return. |
450 return start; | 450 return start; |
451 } | 451 } |
452 | 452 |
453 start += 6; // strlen("stream") | 453 start += 6; // strlen("stream") |
454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { | 454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { |
455 start += 2; | 455 start += 2; |
456 } else if (start[0] == kLF_PdfWhiteSpace) { | 456 } else if (start[0] == kLF_PdfWhiteSpace) { |
457 start += 1; | 457 start += 1; |
458 } | 458 } |
459 | 459 |
460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; | 460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; |
461 // TODO(edisonn): load Length | 461 // TODO(edisonn): load Length |
462 int64_t length = -1; | 462 int64_t length = -1; |
463 | 463 |
464 // TODO(edisonn): very basic implementation | 464 // TODO(edisonn): very basic implementation |
465 if (stream->has_Length() && stream->Length(NULL) > 0) { | 465 if (stream->has_Length() && stream->Length(doc) > 0) { |
466 length = stream->Length(NULL); | 466 length = stream->Length(doc); |
467 } | 467 } |
468 | 468 |
469 // TODO(edisonn): laod external streams | 469 // TODO(edisonn): laod external streams |
470 // TODO(edisonn): look at the last filter, to determione how to deal with po
ssible issue | 470 // TODO(edisonn): look at the last filter, to determione how to deal with po
ssible issue |
471 | 471 |
472 if (length < 0) { | 472 if (length < 0) { |
473 // scan the buffer, until we find first endstream | 473 // scan the buffer, until we find first endstream |
474 // TODO(edisonn): all buffers must have a 0 at the end now, | 474 // TODO(edisonn): all buffers must have a 0 at the end now, |
475 // TODO(edisonn): hack (mark end of content with 0) | 475 // TODO(edisonn): hack (mark end of content with 0) |
476 unsigned char lastCh = *end; | 476 unsigned char lastCh = *end; |
(...skipping 20 matching lines...) Expand all Loading... |
497 // TODO(edisonn): verify the next bytes are "endstream" | 497 // TODO(edisonn): verify the next bytes are "endstream" |
498 | 498 |
499 endstream += strlen("endstream"); | 499 endstream += strlen("endstream"); |
500 // TODO(edisonn): Assert? report error/warning? | 500 // TODO(edisonn): Assert? report error/warning? |
501 dict->addStream(start, (size_t)length); | 501 dict->addStream(start, (size_t)length); |
502 return endstream; | 502 return endstream; |
503 } | 503 } |
504 return start; | 504 return start; |
505 } | 505 } |
506 | 506 |
507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S
kPdfObject* dict, SkPdfAllocator* allocator) { | 507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S
kPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
508 SkPdfObject::makeEmptyDictionary(dict); | 508 SkPdfObject::makeEmptyDictionary(dict); |
509 | 509 |
510 start = skipPdfWhiteSpaces(start, end); | 510 start = skipPdfWhiteSpaces(start, end); |
511 | 511 |
512 while (start < end && *start == kNamed_PdfDelimiter) { | 512 while (start < end && *start == kNamed_PdfDelimiter) { |
513 SkPdfObject key; | 513 SkPdfObject key; |
514 *start = '\0'; | 514 *start = '\0'; |
515 start++; | 515 start++; |
516 start = readName(start, end, &key); | 516 start = readName(start, end, &key); |
517 start = skipPdfWhiteSpaces(start, end); | 517 start = skipPdfWhiteSpaces(start, end); |
518 | 518 |
519 if (start < end) { | 519 if (start < end) { |
520 SkPdfObject* value = allocator->allocObject(); | 520 SkPdfObject* value = allocator->allocObject(); |
521 start = nextObject(start, end, value, allocator); | 521 start = nextObject(start, end, value, allocator, doc); |
522 | 522 |
523 start = skipPdfWhiteSpaces(start, end); | 523 start = skipPdfWhiteSpaces(start, end); |
524 | 524 |
525 if (start < end) { | 525 if (start < end) { |
526 // seems we have an indirect reference | 526 // seems we have an indirect reference |
527 if (isPdfDigit(*start)) { | 527 if (isPdfDigit(*start)) { |
528 SkPdfObject generation; | 528 SkPdfObject generation; |
529 start = nextObject(start, end, &generation, allocator); | 529 start = nextObject(start, end, &generation, allocator, doc); |
530 | 530 |
531 SkPdfObject keywordR; | 531 SkPdfObject keywordR; |
532 start = nextObject(start, end, &keywordR, allocator); | 532 start = nextObject(start, end, &keywordR, allocator, doc); |
533 | 533 |
534 if (value->isInteger() && generation.isInteger() && keywordR
.isKeywordReference()) { | 534 if (value->isInteger() && generation.isInteger() && keywordR
.isKeywordReference()) { |
535 int64_t id = value->intValue(); | 535 int64_t id = value->intValue(); |
536 value->reset(); | 536 value->reset(); |
537 SkPdfObject::makeReference((unsigned int)id, (unsigned i
nt)generation.intValue(), value); | 537 SkPdfObject::makeReference((unsigned int)id, (unsigned i
nt)generation.intValue(), value); |
538 dict->set(&key, value); | 538 dict->set(&key, value); |
539 } else { | 539 } else { |
540 // error, ignore | 540 // error, ignore |
541 dict->set(&key, value); | 541 dict->set(&key, value); |
542 } | 542 } |
(...skipping 16 matching lines...) Expand all Loading... |
559 // TODO(edisonn): options to ignore these errors | 559 // TODO(edisonn): options to ignore these errors |
560 | 560 |
561 // now we should expect >> | 561 // now we should expect >> |
562 start = skipPdfWhiteSpaces(start, end); | 562 start = skipPdfWhiteSpaces(start, end); |
563 start = endOfPdfToken(start, end); // > | 563 start = endOfPdfToken(start, end); // > |
564 start = endOfPdfToken(start, end); // > | 564 start = endOfPdfToken(start, end); // > |
565 | 565 |
566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have
a pointer to struct ... | 566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have
a pointer to struct ... |
567 // or alocate 2 objects, and if there is no stream, free it to be used by so
meone else? or just leave it ? | 567 // or alocate 2 objects, and if there is no stream, free it to be used by so
meone else? or just leave it ? |
568 | 568 |
569 start = readStream(start, end, dict); | 569 start = readStream(start, end, dict, doc); |
570 | 570 |
571 return start; | 571 return start; |
572 } | 572 } |
573 | 573 |
574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject*
token, SkPdfAllocator* allocator) { | 574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject*
token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
575 unsigned char* current; | 575 unsigned char* current; |
576 | 576 |
577 // skip white spaces | 577 // skip white spaces |
578 start = skipPdfWhiteSpaces(start, end); | 578 start = skipPdfWhiteSpaces(start, end); |
579 | 579 |
580 current = endOfPdfToken(start, end); | 580 current = endOfPdfToken(start, end); |
581 | 581 |
582 // no token, len would be 0 | 582 // no token, len would be 0 |
583 if (current == start) { | 583 if (current == start) { |
584 return NULL; | 584 return NULL; |
585 } | 585 } |
586 | 586 |
587 int tokenLen = current - start; | 587 int tokenLen = current - start; |
588 | 588 |
589 if (tokenLen == 1) { | 589 if (tokenLen == 1) { |
590 // start array | 590 // start array |
591 switch (*start) { | 591 switch (*start) { |
592 case kOpenedSquareBracket_PdfDelimiter: | 592 case kOpenedSquareBracket_PdfDelimiter: |
593 *start = '\0'; | 593 *start = '\0'; |
594 SkPdfObject::makeEmptyArray(token); | 594 SkPdfObject::makeEmptyArray(token); |
595 return readArray(current, end, token, allocator); | 595 return readArray(current, end, token, allocator, doc); |
596 | 596 |
597 case kOpenedRoundBracket_PdfDelimiter: | 597 case kOpenedRoundBracket_PdfDelimiter: |
598 *start = '\0'; | 598 *start = '\0'; |
599 return readString(start, end, token); | 599 return readString(start, end, token); |
600 | 600 |
601 case kOpenedInequityBracket_PdfDelimiter: | 601 case kOpenedInequityBracket_PdfDelimiter: |
602 *start = '\0'; | 602 *start = '\0'; |
603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel
imiter) { | 603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel
imiter) { |
604 // TODO(edisonn): pass here the length somehow? | 604 // TODO(edisonn): pass here the length somehow? |
605 return readDictionary(start + 2, end, token, allocator); //
skip << | 605 return readDictionary(start + 2, end, token, allocator, doc)
; // skip << |
606 } else { | 606 } else { |
607 return readHexString(start + 1, end, token); // skip < | 607 return readHexString(start + 1, end, token); // skip < |
608 } | 608 } |
609 | 609 |
610 case kNamed_PdfDelimiter: | 610 case kNamed_PdfDelimiter: |
611 *start = '\0'; | 611 *start = '\0'; |
612 return readName(start + 1, end, token); | 612 return readName(start + 1, end, token); |
613 | 613 |
614 // TODO(edisonn): what to do curly brackets? read spec! | 614 // TODO(edisonn): what to do curly brackets? read spec! |
615 case kOpenedCurlyBracket_PdfDelimiter: | 615 case kOpenedCurlyBracket_PdfDelimiter: |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
671 if (fCurrentUsed >= BUFFER_SIZE) { | 671 if (fCurrentUsed >= BUFFER_SIZE) { |
672 fHistory.push(fCurrent); | 672 fHistory.push(fCurrent); |
673 fCurrent = allocBlock(); | 673 fCurrent = allocBlock(); |
674 fCurrentUsed = 0; | 674 fCurrentUsed = 0; |
675 } | 675 } |
676 fCurrentUsed++; | 676 fCurrentUsed++; |
677 return &fCurrent[fCurrentUsed - 1]; | 677 return &fCurrent[fCurrentUsed - 1]; |
678 } | 678 } |
679 | 679 |
680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the
result, so there is no need of a second pass | 680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the
result, so there is no need of a second pass |
681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP
dfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(alloc
ator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), f
HasPutBack(false) { | 681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP
dfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc)
, fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompress
edStreamEnd(NULL), fEmpty(false), fHasPutBack(false) { |
682 unsigned char* buffer = NULL; | 682 unsigned char* buffer = NULL; |
683 size_t len = 0; | 683 size_t len = 0; |
684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); | 684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); |
685 // TODO(edisonn): hack, find end of object | 685 // TODO(edisonn): hack, find end of object |
686 char* endobj = strstr((char*)buffer, "endobj"); | 686 char* endobj = strstr((char*)buffer, "endobj"); |
687 if (endobj) { | 687 if (endobj) { |
688 len = endobj - (char*)buffer + strlen("endobj"); | 688 len = endobj - (char*)buffer + strlen("endobj"); |
689 } | 689 } |
690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); | 690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); |
691 fUncompressedStreamEnd = fUncompressedStream + len; | 691 fUncompressedStreamEnd = fUncompressedStream + len; |
692 memcpy(fUncompressedStream, buffer, len); | 692 memcpy(fUncompressedStream, buffer, len); |
693 } | 693 } |
694 | 694 |
695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const
SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(a
llocator), fEmpty(false), fHasPutBack(false) { | 695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const
SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(
doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false)
{ |
696 // TODO(edisonn): hack, find end of object | 696 // TODO(edisonn): hack, find end of object |
697 char* endobj = strstr((char*)buffer, "endobj"); | 697 char* endobj = strstr((char*)buffer, "endobj"); |
698 if (endobj) { | 698 if (endobj) { |
699 len = endobj - (char*)buffer + strlen("endobj"); | 699 len = endobj - (char*)buffer + strlen("endobj"); |
700 } | 700 } |
701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); | 701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); |
702 fUncompressedStreamEnd = fUncompressedStream + len; | 702 fUncompressedStreamEnd = fUncompressedStream + len; |
703 memcpy(fUncompressedStream, buffer, len); | 703 memcpy(fUncompressedStream, buffer, len); |
704 } | 704 } |
705 | 705 |
706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { | 706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { |
707 } | 707 } |
708 | 708 |
709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { | 709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { |
710 token->fKeyword = NULL; | 710 token->fKeyword = NULL; |
711 token->fObject = NULL; | 711 token->fObject = NULL; |
712 | 712 |
713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); | 713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); |
714 if (fUncompressedStream >= fUncompressedStreamEnd) { | 714 if (fUncompressedStream >= fUncompressedStreamEnd) { |
715 return false; | 715 return false; |
716 } | 716 } |
717 | 717 |
718 SkPdfObject obj; | 718 SkPdfObject obj; |
719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd
, &obj, fAllocator); | 719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd
, &obj, fAllocator, fDoc); |
720 | 720 |
721 // If it is a keyword, we will only get the pointer of the string | 721 // If it is a keyword, we will only get the pointer of the string |
722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { | 722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { |
723 token->fKeyword = obj.c_str(); | 723 token->fKeyword = obj.c_str(); |
724 token->fKeywordLength = obj.len(); | 724 token->fKeywordLength = obj.len(); |
725 token->fType = kKeyword_TokenType; | 725 token->fType = kKeyword_TokenType; |
726 } else { | 726 } else { |
727 SkPdfObject* pobj = fAllocator->allocObject(); | 727 SkPdfObject* pobj = fAllocator->allocObject(); |
728 *pobj = obj; | 728 *pobj = obj; |
729 token->fObject = pobj; | 729 token->fObject = pobj; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
764 if (fEmpty) { | 764 if (fEmpty) { |
765 #ifdef PDF_TRACE | 765 #ifdef PDF_TRACE |
766 printf("EMPTY TOKENIZER\n"); | 766 printf("EMPTY TOKENIZER\n"); |
767 #endif | 767 #endif |
768 return false; | 768 return false; |
769 } | 769 } |
770 | 770 |
771 return readTokenCore(token); | 771 return readTokenCore(token); |
772 } | 772 } |
773 | 773 |
OLD | NEW |