experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - Issue 18536014: pdfviewer: more load references dinamically plumming

Side by Side Diff: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

Issue 18536014: pdfviewer: more load references dinamically plumming (Closed) Base URL: http://skia.googlecode.com/svn/trunk/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1	1

2 #include "SkPdfNativeTokenizer.h"	2 #include "SkPdfNativeTokenizer.h"

3 #include "SkPdfObject.h"	3 #include "SkPdfObject.h"

4 #include "SkPdfConfig.h"	4 #include "SkPdfConfig.h"

5	5

6 #include "SkPdfStreamCommonDictionary_autogen.h"	6 #include "SkPdfStreamCommonDictionary_autogen.h"

7	7

8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en d) {	8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en d) {

9 while (start < end && isPdfWhiteSpace(*start)) {	9 while (start < end && isPdfWhiteSpace(*start)) {

10 if (*start == kComment_PdfDelimiter) {	10 if (*start == kComment_PdfDelimiter) {

(...skipping 22 matching lines...) Expand all Loading...
33 return start;	33 return start;

34 }	34 }

35	35

36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {	36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {

37 start++;	37 start++;

38 }	38 }

39 return start;	39 return start;

40 }	40 }

41	41

42 // last elem has to be ]	42 // last elem has to be ]

43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO bject* array, SkPdfAllocator* allocator) {	43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO bject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {

44 while (start < end) {	44 while (start < end) {

45 // skip white spaces	45 // skip white spaces

46 start = skipPdfWhiteSpaces(start, end);	46 start = skipPdfWhiteSpaces(start, end);

47	47

48 unsigned char* endOfToken = endOfPdfToken(start, end);	48 unsigned char* endOfToken = endOfPdfToken(start, end);

49	49

50 if (endOfToken == start) {	50 if (endOfToken == start) {

51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray	51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray

52 return start;	52 return start;

53 }	53 }

54	54

55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit er) {	55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit er) {

56 return endOfToken;	56 return endOfToken;

57 }	57 }

58	58

59 SkPdfObject* newObj = allocator->allocObject();	59 SkPdfObject* newObj = allocator->allocObject();

60 start = nextObject(start, end, newObj, allocator);	60 start = nextObject(start, end, newObj, allocator, doc);

61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when	61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when

62 // we are sure they are not references!	62 // we are sure they are not references!

63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i sInteger()) {	63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i sInteger()) {

64 SkPdfObject* gen = array->removeLastInArray();	64 SkPdfObject* gen = array->removeLastInArray();

65 SkPdfObject* id = array->removeLastInArray();	65 SkPdfObject* id = array->removeLastInArray();

66 newObj->reset();	66 newObj->reset();

67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i nt)gen->intValue(), newObj);	67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i nt)gen->intValue(), newObj);

68 }	68 }

69 array->appendInArray(newObj);	69 array->appendInArray(newObj);

70 }	70 }

(...skipping 365 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
436 endstream	436 endstream

437 8 0 obj #real obj	437 8 0 obj #real obj

438 << 100 >> #real obj	438 << 100 >> #real obj

439 endobj	439 endobj

440 and it could get worse, with multiple object like this	440 and it could get worse, with multiple object like this

441 */	441 */

442	442

443 // right now implement the silly algorithm that assumes endstream is finishing t he stream	443 // right now implement the silly algorithm that assumes endstream is finishing t he stream

444	444

445	445

446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf Object* dict) {	446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf Object* dict, SkNativeParsedPDF* doc) {

447 start = skipPdfWhiteSpaces(start, end);	447 start = skipPdfWhiteSpaces(start, end);

448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == ' e' && start[4] == 'a' && start[5] == 'm')) {	448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == ' e' && start[4] == 'a' && start[5] == 'm')) {

449 // no stream. return.	449 // no stream. return.

450 return start;	450 return start;

451 }	451 }

452	452

453 start += 6; // strlen("stream")	453 start += 6; // strlen("stream")

454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {	454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {

455 start += 2;	455 start += 2;

456 } else if (start[0] == kLF_PdfWhiteSpace) {	456 } else if (start[0] == kLF_PdfWhiteSpace) {

457 start += 1;	457 start += 1;

458 }	458 }

459	459

460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;	460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;

461 // TODO(edisonn): load Length	461 // TODO(edisonn): load Length

462 int64_t length = -1;	462 int64_t length = -1;

463	463

464 // TODO(edisonn): very basic implementation	464 // TODO(edisonn): very basic implementation

465 if (stream->has_Length() && stream->Length(NULL) > 0) {	465 if (stream->has_Length() && stream->Length(doc) > 0) {

466 length = stream->Length(NULL);	466 length = stream->Length(doc);

467 }	467 }

468	468

469 // TODO(edisonn): laod external streams	469 // TODO(edisonn): laod external streams

470 // TODO(edisonn): look at the last filter, to determione how to deal with po ssible issue	470 // TODO(edisonn): look at the last filter, to determione how to deal with po ssible issue

471	471

472 if (length < 0) {	472 if (length < 0) {

473 // scan the buffer, until we find first endstream	473 // scan the buffer, until we find first endstream

474 // TODO(edisonn): all buffers must have a 0 at the end now,	474 // TODO(edisonn): all buffers must have a 0 at the end now,

475 // TODO(edisonn): hack (mark end of content with 0)	475 // TODO(edisonn): hack (mark end of content with 0)

476 unsigned char lastCh = *end;	476 unsigned char lastCh = *end;

(...skipping 20 matching lines...) Expand all Loading...
497 // TODO(edisonn): verify the next bytes are "endstream"	497 // TODO(edisonn): verify the next bytes are "endstream"

498	498

499 endstream += strlen("endstream");	499 endstream += strlen("endstream");

500 // TODO(edisonn): Assert? report error/warning?	500 // TODO(edisonn): Assert? report error/warning?

501 dict->addStream(start, (size_t)length);	501 dict->addStream(start, (size_t)length);

502 return endstream;	502 return endstream;

503 }	503 }

504 return start;	504 return start;

505 }	505 }

506	506

507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S kPdfObject* dict, SkPdfAllocator* allocator) {	507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S kPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {

508 SkPdfObject::makeEmptyDictionary(dict);	508 SkPdfObject::makeEmptyDictionary(dict);

509	509

510 start = skipPdfWhiteSpaces(start, end);	510 start = skipPdfWhiteSpaces(start, end);

511	511

512 while (start < end && *start == kNamed_PdfDelimiter) {	512 while (start < end && *start == kNamed_PdfDelimiter) {

513 SkPdfObject key;	513 SkPdfObject key;

514 *start = '\0';	514 *start = '\0';

515 start++;	515 start++;

516 start = readName(start, end, &key);	516 start = readName(start, end, &key);

517 start = skipPdfWhiteSpaces(start, end);	517 start = skipPdfWhiteSpaces(start, end);

518	518

519 if (start < end) {	519 if (start < end) {

520 SkPdfObject* value = allocator->allocObject();	520 SkPdfObject* value = allocator->allocObject();

521 start = nextObject(start, end, value, allocator);	521 start = nextObject(start, end, value, allocator, doc);

522	522

523 start = skipPdfWhiteSpaces(start, end);	523 start = skipPdfWhiteSpaces(start, end);

524	524

525 if (start < end) {	525 if (start < end) {

526 // seems we have an indirect reference	526 // seems we have an indirect reference

527 if (isPdfDigit(*start)) {	527 if (isPdfDigit(*start)) {

528 SkPdfObject generation;	528 SkPdfObject generation;

529 start = nextObject(start, end, &generation, allocator);	529 start = nextObject(start, end, &generation, allocator, doc);

530	530

531 SkPdfObject keywordR;	531 SkPdfObject keywordR;

532 start = nextObject(start, end, &keywordR, allocator);	532 start = nextObject(start, end, &keywordR, allocator, doc);

533	533

534 if (value->isInteger() && generation.isInteger() && keywordR .isKeywordReference()) {	534 if (value->isInteger() && generation.isInteger() && keywordR .isKeywordReference()) {

535 int64_t id = value->intValue();	535 int64_t id = value->intValue();

536 value->reset();	536 value->reset();

537 SkPdfObject::makeReference((unsigned int)id, (unsigned i nt)generation.intValue(), value);	537 SkPdfObject::makeReference((unsigned int)id, (unsigned i nt)generation.intValue(), value);

538 dict->set(&key, value);	538 dict->set(&key, value);

539 } else {	539 } else {

540 // error, ignore	540 // error, ignore

541 dict->set(&key, value);	541 dict->set(&key, value);

542 }	542 }

(...skipping 16 matching lines...) Expand all Loading...
559 // TODO(edisonn): options to ignore these errors	559 // TODO(edisonn): options to ignore these errors

560	560

561 // now we should expect >>	561 // now we should expect >>

562 start = skipPdfWhiteSpaces(start, end);	562 start = skipPdfWhiteSpaces(start, end);

563 start = endOfPdfToken(start, end); // >	563 start = endOfPdfToken(start, end); // >

564 start = endOfPdfToken(start, end); // >	564 start = endOfPdfToken(start, end); // >

565	565

566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...	566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...

567 // or alocate 2 objects, and if there is no stream, free it to be used by so meone else? or just leave it ?	567 // or alocate 2 objects, and if there is no stream, free it to be used by so meone else? or just leave it ?

568	568

569 start = readStream(start, end, dict);	569 start = readStream(start, end, dict, doc);

570	570

571 return start;	571 return start;

572 }	572 }

573	573

574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator) {	574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {

575 unsigned char* current;	575 unsigned char* current;

576	576

577 // skip white spaces	577 // skip white spaces

578 start = skipPdfWhiteSpaces(start, end);	578 start = skipPdfWhiteSpaces(start, end);

579	579

580 current = endOfPdfToken(start, end);	580 current = endOfPdfToken(start, end);

581	581

582 // no token, len would be 0	582 // no token, len would be 0

583 if (current == start) {	583 if (current == start) {

584 return NULL;	584 return NULL;

585 }	585 }

586	586

587 int tokenLen = current - start;	587 int tokenLen = current - start;

588	588

589 if (tokenLen == 1) {	589 if (tokenLen == 1) {

590 // start array	590 // start array

591 switch (*start) {	591 switch (*start) {

592 case kOpenedSquareBracket_PdfDelimiter:	592 case kOpenedSquareBracket_PdfDelimiter:

593 *start = '\0';	593 *start = '\0';

594 SkPdfObject::makeEmptyArray(token);	594 SkPdfObject::makeEmptyArray(token);

595 return readArray(current, end, token, allocator);	595 return readArray(current, end, token, allocator, doc);

596	596

597 case kOpenedRoundBracket_PdfDelimiter:	597 case kOpenedRoundBracket_PdfDelimiter:

598 *start = '\0';	598 *start = '\0';

599 return readString(start, end, token);	599 return readString(start, end, token);

600	600

601 case kOpenedInequityBracket_PdfDelimiter:	601 case kOpenedInequityBracket_PdfDelimiter:

602 *start = '\0';	602 *start = '\0';

603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel imiter) {	603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel imiter) {

604 // TODO(edisonn): pass here the length somehow?	604 // TODO(edisonn): pass here the length somehow?

605 return readDictionary(start + 2, end, token, allocator); // skip <<	605 return readDictionary(start + 2, end, token, allocator, doc) ; // skip <<

606 } else {	606 } else {

607 return readHexString(start + 1, end, token); // skip <	607 return readHexString(start + 1, end, token); // skip <

608 }	608 }

609	609

610 case kNamed_PdfDelimiter:	610 case kNamed_PdfDelimiter:

611 *start = '\0';	611 *start = '\0';

612 return readName(start + 1, end, token);	612 return readName(start + 1, end, token);

613	613

614 // TODO(edisonn): what to do curly brackets? read spec!	614 // TODO(edisonn): what to do curly brackets? read spec!

615 case kOpenedCurlyBracket_PdfDelimiter:	615 case kOpenedCurlyBracket_PdfDelimiter:

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
671 if (fCurrentUsed >= BUFFER_SIZE) {	671 if (fCurrentUsed >= BUFFER_SIZE) {

672 fHistory.push(fCurrent);	672 fHistory.push(fCurrent);

673 fCurrent = allocBlock();	673 fCurrent = allocBlock();

674 fCurrentUsed = 0;	674 fCurrentUsed = 0;

675 }	675 }

676 fCurrentUsed++;	676 fCurrentUsed++;

677 return &fCurrent[fCurrentUsed - 1];	677 return &fCurrent[fCurrentUsed - 1];

678 }	678 }

679	679

680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass	680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass

681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP dfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(alloc ator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), f HasPutBack(false) {	681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP dfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc) , fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompress edStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {

682 unsigned char* buffer = NULL;	682 unsigned char* buffer = NULL;

683 size_t len = 0;	683 size_t len = 0;

684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);	684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);

685 // TODO(edisonn): hack, find end of object	685 // TODO(edisonn): hack, find end of object

686 char* endobj = strstr((char*)buffer, "endobj");	686 char* endobj = strstr((char*)buffer, "endobj");

687 if (endobj) {	687 if (endobj) {

688 len = endobj - (char*)buffer + strlen("endobj");	688 len = endobj - (char*)buffer + strlen("endobj");

689 }	689 }

690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);	690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);

691 fUncompressedStreamEnd = fUncompressedStream + len;	691 fUncompressedStreamEnd = fUncompressedStream + len;

692 memcpy(fUncompressedStream, buffer, len);	692 memcpy(fUncompressedStream, buffer, len);

693 }	693 }

694	694

695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(a llocator), fEmpty(false), fHasPutBack(false) {	695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc( doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {

696 // TODO(edisonn): hack, find end of object	696 // TODO(edisonn): hack, find end of object

697 char* endobj = strstr((char*)buffer, "endobj");	697 char* endobj = strstr((char*)buffer, "endobj");

698 if (endobj) {	698 if (endobj) {

699 len = endobj - (char*)buffer + strlen("endobj");	699 len = endobj - (char*)buffer + strlen("endobj");

700 }	700 }

701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);	701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);

702 fUncompressedStreamEnd = fUncompressedStream + len;	702 fUncompressedStreamEnd = fUncompressedStream + len;

703 memcpy(fUncompressedStream, buffer, len);	703 memcpy(fUncompressedStream, buffer, len);

704 }	704 }

705	705

706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {	706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {

707 }	707 }

708	708

709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {	709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {

710 token->fKeyword = NULL;	710 token->fKeyword = NULL;

711 token->fObject = NULL;	711 token->fObject = NULL;

712	712

713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd);	713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd);

714 if (fUncompressedStream >= fUncompressedStreamEnd) {	714 if (fUncompressedStream >= fUncompressedStreamEnd) {

715 return false;	715 return false;

716 }	716 }

717	717

718 SkPdfObject obj;	718 SkPdfObject obj;

719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd , &obj, fAllocator);	719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd , &obj, fAllocator, fDoc);

720	720

721 // If it is a keyword, we will only get the pointer of the string	721 // If it is a keyword, we will only get the pointer of the string

722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {	722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {

723 token->fKeyword = obj.c_str();	723 token->fKeyword = obj.c_str();

724 token->fKeywordLength = obj.len();	724 token->fKeywordLength = obj.len();

725 token->fType = kKeyword_TokenType;	725 token->fType = kKeyword_TokenType;

726 } else {	726 } else {

727 SkPdfObject* pobj = fAllocator->allocObject();	727 SkPdfObject* pobj = fAllocator->allocObject();

728 *pobj = obj;	728 *pobj = obj;

729 token->fObject = pobj;	729 token->fObject = pobj;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
764 if (fEmpty) {	764 if (fEmpty) {

765 #ifdef PDF_TRACE	765 #ifdef PDF_TRACE

766 printf("EMPTY TOKENIZER\n");	766 printf("EMPTY TOKENIZER\n");

767 #endif	767 #endif

768 return false;	768 return false;

769 }	769 }

770	770

771 return readTokenCore(token);	771 return readTokenCore(token);

772 }	772 }

773	773

OLD	NEW

« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | no next file » | no next file with comments »