| OLD | NEW |
| 1 | 1 |
| 2 #include "SkPdfNativeTokenizer.h" | 2 #include "SkPdfNativeTokenizer.h" |
| 3 #include "SkPdfObject.h" | 3 #include "SkPdfObject.h" |
| 4 #include "SkPdfConfig.h" | 4 #include "SkPdfConfig.h" |
| 5 | 5 |
| 6 #include "SkPdfStreamCommonDictionary_autogen.h" | 6 #include "SkPdfStreamCommonDictionary_autogen.h" |
| 7 | 7 |
| 8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en
d) { | 8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en
d) { |
| 9 while (start < end && isPdfWhiteSpace(*start)) { | 9 while (start < end && isPdfWhiteSpace(*start)) { |
| 10 if (*start == kComment_PdfDelimiter) { | 10 if (*start == kComment_PdfDelimiter) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 return start; | 33 return start; |
| 34 } | 34 } |
| 35 | 35 |
| 36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { | 36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { |
| 37 start++; | 37 start++; |
| 38 } | 38 } |
| 39 return start; | 39 return start; |
| 40 } | 40 } |
| 41 | 41 |
| 42 // last elem has to be ] | 42 // last elem has to be ] |
| 43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO
bject* array, SkPdfAllocator* allocator) { | 43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO
bject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
| 44 while (start < end) { | 44 while (start < end) { |
| 45 // skip white spaces | 45 // skip white spaces |
| 46 start = skipPdfWhiteSpaces(start, end); | 46 start = skipPdfWhiteSpaces(start, end); |
| 47 | 47 |
| 48 unsigned char* endOfToken = endOfPdfToken(start, end); | 48 unsigned char* endOfToken = endOfPdfToken(start, end); |
| 49 | 49 |
| 50 if (endOfToken == start) { | 50 if (endOfToken == start) { |
| 51 // TODO(edisonn): report error in pdf file (end of stream with ] for
end of aray | 51 // TODO(edisonn): report error in pdf file (end of stream with ] for
end of aray |
| 52 return start; | 52 return start; |
| 53 } | 53 } |
| 54 | 54 |
| 55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit
er) { | 55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit
er) { |
| 56 return endOfToken; | 56 return endOfToken; |
| 57 } | 57 } |
| 58 | 58 |
| 59 SkPdfObject* newObj = allocator->allocObject(); | 59 SkPdfObject* newObj = allocator->allocObject(); |
| 60 start = nextObject(start, end, newObj, allocator); | 60 start = nextObject(start, end, newObj, allocator, doc); |
| 61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush
them on the array only when | 61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush
them on the array only when |
| 62 // we are sure they are not references! | 62 // we are sure they are not references! |
| 63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI
ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i
sInteger()) { | 63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI
ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i
sInteger()) { |
| 64 SkPdfObject* gen = array->removeLastInArray(); | 64 SkPdfObject* gen = array->removeLastInArray(); |
| 65 SkPdfObject* id = array->removeLastInArray(); | 65 SkPdfObject* id = array->removeLastInArray(); |
| 66 newObj->reset(); | 66 newObj->reset(); |
| 67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i
nt)gen->intValue(), newObj); | 67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i
nt)gen->intValue(), newObj); |
| 68 } | 68 } |
| 69 array->appendInArray(newObj); | 69 array->appendInArray(newObj); |
| 70 } | 70 } |
| (...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 436 endstream | 436 endstream |
| 437 8 0 obj #real obj | 437 8 0 obj #real obj |
| 438 << 100 >> #real obj | 438 << 100 >> #real obj |
| 439 endobj | 439 endobj |
| 440 and it could get worse, with multiple object like this | 440 and it could get worse, with multiple object like this |
| 441 */ | 441 */ |
| 442 | 442 |
| 443 // right now implement the silly algorithm that assumes endstream is finishing t
he stream | 443 // right now implement the silly algorithm that assumes endstream is finishing t
he stream |
| 444 | 444 |
| 445 | 445 |
| 446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf
Object* dict) { | 446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf
Object* dict, SkNativeParsedPDF* doc) { |
| 447 start = skipPdfWhiteSpaces(start, end); | 447 start = skipPdfWhiteSpaces(start, end); |
| 448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == '
e' && start[4] == 'a' && start[5] == 'm')) { | 448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == '
e' && start[4] == 'a' && start[5] == 'm')) { |
| 449 // no stream. return. | 449 // no stream. return. |
| 450 return start; | 450 return start; |
| 451 } | 451 } |
| 452 | 452 |
| 453 start += 6; // strlen("stream") | 453 start += 6; // strlen("stream") |
| 454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { | 454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { |
| 455 start += 2; | 455 start += 2; |
| 456 } else if (start[0] == kLF_PdfWhiteSpace) { | 456 } else if (start[0] == kLF_PdfWhiteSpace) { |
| 457 start += 1; | 457 start += 1; |
| 458 } | 458 } |
| 459 | 459 |
| 460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; | 460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; |
| 461 // TODO(edisonn): load Length | 461 // TODO(edisonn): load Length |
| 462 int64_t length = -1; | 462 int64_t length = -1; |
| 463 | 463 |
| 464 // TODO(edisonn): very basic implementation | 464 // TODO(edisonn): very basic implementation |
| 465 if (stream->has_Length() && stream->Length(NULL) > 0) { | 465 if (stream->has_Length() && stream->Length(doc) > 0) { |
| 466 length = stream->Length(NULL); | 466 length = stream->Length(doc); |
| 467 } | 467 } |
| 468 | 468 |
| 469 // TODO(edisonn): laod external streams | 469 // TODO(edisonn): laod external streams |
| 470 // TODO(edisonn): look at the last filter, to determione how to deal with po
ssible issue | 470 // TODO(edisonn): look at the last filter, to determione how to deal with po
ssible issue |
| 471 | 471 |
| 472 if (length < 0) { | 472 if (length < 0) { |
| 473 // scan the buffer, until we find first endstream | 473 // scan the buffer, until we find first endstream |
| 474 // TODO(edisonn): all buffers must have a 0 at the end now, | 474 // TODO(edisonn): all buffers must have a 0 at the end now, |
| 475 // TODO(edisonn): hack (mark end of content with 0) | 475 // TODO(edisonn): hack (mark end of content with 0) |
| 476 unsigned char lastCh = *end; | 476 unsigned char lastCh = *end; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 497 // TODO(edisonn): verify the next bytes are "endstream" | 497 // TODO(edisonn): verify the next bytes are "endstream" |
| 498 | 498 |
| 499 endstream += strlen("endstream"); | 499 endstream += strlen("endstream"); |
| 500 // TODO(edisonn): Assert? report error/warning? | 500 // TODO(edisonn): Assert? report error/warning? |
| 501 dict->addStream(start, (size_t)length); | 501 dict->addStream(start, (size_t)length); |
| 502 return endstream; | 502 return endstream; |
| 503 } | 503 } |
| 504 return start; | 504 return start; |
| 505 } | 505 } |
| 506 | 506 |
| 507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S
kPdfObject* dict, SkPdfAllocator* allocator) { | 507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S
kPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
| 508 SkPdfObject::makeEmptyDictionary(dict); | 508 SkPdfObject::makeEmptyDictionary(dict); |
| 509 | 509 |
| 510 start = skipPdfWhiteSpaces(start, end); | 510 start = skipPdfWhiteSpaces(start, end); |
| 511 | 511 |
| 512 while (start < end && *start == kNamed_PdfDelimiter) { | 512 while (start < end && *start == kNamed_PdfDelimiter) { |
| 513 SkPdfObject key; | 513 SkPdfObject key; |
| 514 *start = '\0'; | 514 *start = '\0'; |
| 515 start++; | 515 start++; |
| 516 start = readName(start, end, &key); | 516 start = readName(start, end, &key); |
| 517 start = skipPdfWhiteSpaces(start, end); | 517 start = skipPdfWhiteSpaces(start, end); |
| 518 | 518 |
| 519 if (start < end) { | 519 if (start < end) { |
| 520 SkPdfObject* value = allocator->allocObject(); | 520 SkPdfObject* value = allocator->allocObject(); |
| 521 start = nextObject(start, end, value, allocator); | 521 start = nextObject(start, end, value, allocator, doc); |
| 522 | 522 |
| 523 start = skipPdfWhiteSpaces(start, end); | 523 start = skipPdfWhiteSpaces(start, end); |
| 524 | 524 |
| 525 if (start < end) { | 525 if (start < end) { |
| 526 // seems we have an indirect reference | 526 // seems we have an indirect reference |
| 527 if (isPdfDigit(*start)) { | 527 if (isPdfDigit(*start)) { |
| 528 SkPdfObject generation; | 528 SkPdfObject generation; |
| 529 start = nextObject(start, end, &generation, allocator); | 529 start = nextObject(start, end, &generation, allocator, doc); |
| 530 | 530 |
| 531 SkPdfObject keywordR; | 531 SkPdfObject keywordR; |
| 532 start = nextObject(start, end, &keywordR, allocator); | 532 start = nextObject(start, end, &keywordR, allocator, doc); |
| 533 | 533 |
| 534 if (value->isInteger() && generation.isInteger() && keywordR
.isKeywordReference()) { | 534 if (value->isInteger() && generation.isInteger() && keywordR
.isKeywordReference()) { |
| 535 int64_t id = value->intValue(); | 535 int64_t id = value->intValue(); |
| 536 value->reset(); | 536 value->reset(); |
| 537 SkPdfObject::makeReference((unsigned int)id, (unsigned i
nt)generation.intValue(), value); | 537 SkPdfObject::makeReference((unsigned int)id, (unsigned i
nt)generation.intValue(), value); |
| 538 dict->set(&key, value); | 538 dict->set(&key, value); |
| 539 } else { | 539 } else { |
| 540 // error, ignore | 540 // error, ignore |
| 541 dict->set(&key, value); | 541 dict->set(&key, value); |
| 542 } | 542 } |
| (...skipping 16 matching lines...) Expand all Loading... |
| 559 // TODO(edisonn): options to ignore these errors | 559 // TODO(edisonn): options to ignore these errors |
| 560 | 560 |
| 561 // now we should expect >> | 561 // now we should expect >> |
| 562 start = skipPdfWhiteSpaces(start, end); | 562 start = skipPdfWhiteSpaces(start, end); |
| 563 start = endOfPdfToken(start, end); // > | 563 start = endOfPdfToken(start, end); // > |
| 564 start = endOfPdfToken(start, end); // > | 564 start = endOfPdfToken(start, end); // > |
| 565 | 565 |
| 566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have
a pointer to struct ... | 566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have
a pointer to struct ... |
| 567 // or alocate 2 objects, and if there is no stream, free it to be used by so
meone else? or just leave it ? | 567 // or alocate 2 objects, and if there is no stream, free it to be used by so
meone else? or just leave it ? |
| 568 | 568 |
| 569 start = readStream(start, end, dict); | 569 start = readStream(start, end, dict, doc); |
| 570 | 570 |
| 571 return start; | 571 return start; |
| 572 } | 572 } |
| 573 | 573 |
| 574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject*
token, SkPdfAllocator* allocator) { | 574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject*
token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
| 575 unsigned char* current; | 575 unsigned char* current; |
| 576 | 576 |
| 577 // skip white spaces | 577 // skip white spaces |
| 578 start = skipPdfWhiteSpaces(start, end); | 578 start = skipPdfWhiteSpaces(start, end); |
| 579 | 579 |
| 580 current = endOfPdfToken(start, end); | 580 current = endOfPdfToken(start, end); |
| 581 | 581 |
| 582 // no token, len would be 0 | 582 // no token, len would be 0 |
| 583 if (current == start) { | 583 if (current == start) { |
| 584 return NULL; | 584 return NULL; |
| 585 } | 585 } |
| 586 | 586 |
| 587 int tokenLen = current - start; | 587 int tokenLen = current - start; |
| 588 | 588 |
| 589 if (tokenLen == 1) { | 589 if (tokenLen == 1) { |
| 590 // start array | 590 // start array |
| 591 switch (*start) { | 591 switch (*start) { |
| 592 case kOpenedSquareBracket_PdfDelimiter: | 592 case kOpenedSquareBracket_PdfDelimiter: |
| 593 *start = '\0'; | 593 *start = '\0'; |
| 594 SkPdfObject::makeEmptyArray(token); | 594 SkPdfObject::makeEmptyArray(token); |
| 595 return readArray(current, end, token, allocator); | 595 return readArray(current, end, token, allocator, doc); |
| 596 | 596 |
| 597 case kOpenedRoundBracket_PdfDelimiter: | 597 case kOpenedRoundBracket_PdfDelimiter: |
| 598 *start = '\0'; | 598 *start = '\0'; |
| 599 return readString(start, end, token); | 599 return readString(start, end, token); |
| 600 | 600 |
| 601 case kOpenedInequityBracket_PdfDelimiter: | 601 case kOpenedInequityBracket_PdfDelimiter: |
| 602 *start = '\0'; | 602 *start = '\0'; |
| 603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel
imiter) { | 603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel
imiter) { |
| 604 // TODO(edisonn): pass here the length somehow? | 604 // TODO(edisonn): pass here the length somehow? |
| 605 return readDictionary(start + 2, end, token, allocator); //
skip << | 605 return readDictionary(start + 2, end, token, allocator, doc)
; // skip << |
| 606 } else { | 606 } else { |
| 607 return readHexString(start + 1, end, token); // skip < | 607 return readHexString(start + 1, end, token); // skip < |
| 608 } | 608 } |
| 609 | 609 |
| 610 case kNamed_PdfDelimiter: | 610 case kNamed_PdfDelimiter: |
| 611 *start = '\0'; | 611 *start = '\0'; |
| 612 return readName(start + 1, end, token); | 612 return readName(start + 1, end, token); |
| 613 | 613 |
| 614 // TODO(edisonn): what to do curly brackets? read spec! | 614 // TODO(edisonn): what to do curly brackets? read spec! |
| 615 case kOpenedCurlyBracket_PdfDelimiter: | 615 case kOpenedCurlyBracket_PdfDelimiter: |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 671 if (fCurrentUsed >= BUFFER_SIZE) { | 671 if (fCurrentUsed >= BUFFER_SIZE) { |
| 672 fHistory.push(fCurrent); | 672 fHistory.push(fCurrent); |
| 673 fCurrent = allocBlock(); | 673 fCurrent = allocBlock(); |
| 674 fCurrentUsed = 0; | 674 fCurrentUsed = 0; |
| 675 } | 675 } |
| 676 fCurrentUsed++; | 676 fCurrentUsed++; |
| 677 return &fCurrent[fCurrentUsed - 1]; | 677 return &fCurrent[fCurrentUsed - 1]; |
| 678 } | 678 } |
| 679 | 679 |
| 680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the
result, so there is no need of a second pass | 680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the
result, so there is no need of a second pass |
| 681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP
dfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(alloc
ator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), f
HasPutBack(false) { | 681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP
dfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc)
, fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompress
edStreamEnd(NULL), fEmpty(false), fHasPutBack(false) { |
| 682 unsigned char* buffer = NULL; | 682 unsigned char* buffer = NULL; |
| 683 size_t len = 0; | 683 size_t len = 0; |
| 684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); | 684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); |
| 685 // TODO(edisonn): hack, find end of object | 685 // TODO(edisonn): hack, find end of object |
| 686 char* endobj = strstr((char*)buffer, "endobj"); | 686 char* endobj = strstr((char*)buffer, "endobj"); |
| 687 if (endobj) { | 687 if (endobj) { |
| 688 len = endobj - (char*)buffer + strlen("endobj"); | 688 len = endobj - (char*)buffer + strlen("endobj"); |
| 689 } | 689 } |
| 690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); | 690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); |
| 691 fUncompressedStreamEnd = fUncompressedStream + len; | 691 fUncompressedStreamEnd = fUncompressedStream + len; |
| 692 memcpy(fUncompressedStream, buffer, len); | 692 memcpy(fUncompressedStream, buffer, len); |
| 693 } | 693 } |
| 694 | 694 |
| 695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const
SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(a
llocator), fEmpty(false), fHasPutBack(false) { | 695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const
SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(
doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false)
{ |
| 696 // TODO(edisonn): hack, find end of object | 696 // TODO(edisonn): hack, find end of object |
| 697 char* endobj = strstr((char*)buffer, "endobj"); | 697 char* endobj = strstr((char*)buffer, "endobj"); |
| 698 if (endobj) { | 698 if (endobj) { |
| 699 len = endobj - (char*)buffer + strlen("endobj"); | 699 len = endobj - (char*)buffer + strlen("endobj"); |
| 700 } | 700 } |
| 701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); | 701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator-
>alloc(len); |
| 702 fUncompressedStreamEnd = fUncompressedStream + len; | 702 fUncompressedStreamEnd = fUncompressedStream + len; |
| 703 memcpy(fUncompressedStream, buffer, len); | 703 memcpy(fUncompressedStream, buffer, len); |
| 704 } | 704 } |
| 705 | 705 |
| 706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { | 706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { |
| 707 } | 707 } |
| 708 | 708 |
| 709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { | 709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { |
| 710 token->fKeyword = NULL; | 710 token->fKeyword = NULL; |
| 711 token->fObject = NULL; | 711 token->fObject = NULL; |
| 712 | 712 |
| 713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); | 713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS
treamEnd); |
| 714 if (fUncompressedStream >= fUncompressedStreamEnd) { | 714 if (fUncompressedStream >= fUncompressedStreamEnd) { |
| 715 return false; | 715 return false; |
| 716 } | 716 } |
| 717 | 717 |
| 718 SkPdfObject obj; | 718 SkPdfObject obj; |
| 719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd
, &obj, fAllocator); | 719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd
, &obj, fAllocator, fDoc); |
| 720 | 720 |
| 721 // If it is a keyword, we will only get the pointer of the string | 721 // If it is a keyword, we will only get the pointer of the string |
| 722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { | 722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { |
| 723 token->fKeyword = obj.c_str(); | 723 token->fKeyword = obj.c_str(); |
| 724 token->fKeywordLength = obj.len(); | 724 token->fKeywordLength = obj.len(); |
| 725 token->fType = kKeyword_TokenType; | 725 token->fType = kKeyword_TokenType; |
| 726 } else { | 726 } else { |
| 727 SkPdfObject* pobj = fAllocator->allocObject(); | 727 SkPdfObject* pobj = fAllocator->allocObject(); |
| 728 *pobj = obj; | 728 *pobj = obj; |
| 729 token->fObject = pobj; | 729 token->fObject = pobj; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 764 if (fEmpty) { | 764 if (fEmpty) { |
| 765 #ifdef PDF_TRACE | 765 #ifdef PDF_TRACE |
| 766 printf("EMPTY TOKENIZER\n"); | 766 printf("EMPTY TOKENIZER\n"); |
| 767 #endif | 767 #endif |
| 768 return false; | 768 return false; |
| 769 } | 769 } |
| 770 | 770 |
| 771 return readTokenCore(token); | 771 return readTokenCore(token); |
| 772 } | 772 } |
| 773 | 773 |
| OLD | NEW |