Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(778)

Side by Side Diff: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

Issue 18536014: pdfviewer: more load references dinamically plumming (Closed) Base URL: http://skia.googlecode.com/svn/trunk/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 #include "SkPdfNativeTokenizer.h" 2 #include "SkPdfNativeTokenizer.h"
3 #include "SkPdfObject.h" 3 #include "SkPdfObject.h"
4 #include "SkPdfConfig.h" 4 #include "SkPdfConfig.h"
5 5
6 #include "SkPdfStreamCommonDictionary_autogen.h" 6 #include "SkPdfStreamCommonDictionary_autogen.h"
7 7
8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en d) { 8 static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* en d) {
9 while (start < end && isPdfWhiteSpace(*start)) { 9 while (start < end && isPdfWhiteSpace(*start)) {
10 if (*start == kComment_PdfDelimiter) { 10 if (*start == kComment_PdfDelimiter) {
(...skipping 22 matching lines...) Expand all
33 return start; 33 return start;
34 } 34 }
35 35
36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { 36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
37 start++; 37 start++;
38 } 38 }
39 return start; 39 return start;
40 } 40 }
41 41
42 // last elem has to be ] 42 // last elem has to be ]
43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO bject* array, SkPdfAllocator* allocator) { 43 static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfO bject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
44 while (start < end) { 44 while (start < end) {
45 // skip white spaces 45 // skip white spaces
46 start = skipPdfWhiteSpaces(start, end); 46 start = skipPdfWhiteSpaces(start, end);
47 47
48 unsigned char* endOfToken = endOfPdfToken(start, end); 48 unsigned char* endOfToken = endOfPdfToken(start, end);
49 49
50 if (endOfToken == start) { 50 if (endOfToken == start) {
51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray 51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
52 return start; 52 return start;
53 } 53 }
54 54
55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit er) { 55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit er) {
56 return endOfToken; 56 return endOfToken;
57 } 57 }
58 58
59 SkPdfObject* newObj = allocator->allocObject(); 59 SkPdfObject* newObj = allocator->allocObject();
60 start = nextObject(start, end, newObj, allocator); 60 start = nextObject(start, end, newObj, allocator, doc);
61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when 61 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
62 // we are sure they are not references! 62 // we are sure they are not references!
63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i sInteger()) { 63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAI ndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->i sInteger()) {
64 SkPdfObject* gen = array->removeLastInArray(); 64 SkPdfObject* gen = array->removeLastInArray();
65 SkPdfObject* id = array->removeLastInArray(); 65 SkPdfObject* id = array->removeLastInArray();
66 newObj->reset(); 66 newObj->reset();
67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i nt)gen->intValue(), newObj); 67 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned i nt)gen->intValue(), newObj);
68 } 68 }
69 array->appendInArray(newObj); 69 array->appendInArray(newObj);
70 } 70 }
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after
436 endstream 436 endstream
437 8 0 obj #real obj 437 8 0 obj #real obj
438 << 100 >> #real obj 438 << 100 >> #real obj
439 endobj 439 endobj
440 and it could get worse, with multiple object like this 440 and it could get worse, with multiple object like this
441 */ 441 */
442 442
443 // right now implement the silly algorithm that assumes endstream is finishing t he stream 443 // right now implement the silly algorithm that assumes endstream is finishing t he stream
444 444
445 445
446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf Object* dict) { 446 static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdf Object* dict, SkNativeParsedPDF* doc) {
447 start = skipPdfWhiteSpaces(start, end); 447 start = skipPdfWhiteSpaces(start, end);
448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == ' e' && start[4] == 'a' && start[5] == 'm')) { 448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == ' e' && start[4] == 'a' && start[5] == 'm')) {
449 // no stream. return. 449 // no stream. return.
450 return start; 450 return start;
451 } 451 }
452 452
453 start += 6; // strlen("stream") 453 start += 6; // strlen("stream")
454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { 454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
455 start += 2; 455 start += 2;
456 } else if (start[0] == kLF_PdfWhiteSpace) { 456 } else if (start[0] == kLF_PdfWhiteSpace) {
457 start += 1; 457 start += 1;
458 } 458 }
459 459
460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; 460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
461 // TODO(edisonn): load Length 461 // TODO(edisonn): load Length
462 int64_t length = -1; 462 int64_t length = -1;
463 463
464 // TODO(edisonn): very basic implementation 464 // TODO(edisonn): very basic implementation
465 if (stream->has_Length() && stream->Length(NULL) > 0) { 465 if (stream->has_Length() && stream->Length(doc) > 0) {
466 length = stream->Length(NULL); 466 length = stream->Length(doc);
467 } 467 }
468 468
469 // TODO(edisonn): laod external streams 469 // TODO(edisonn): laod external streams
470 // TODO(edisonn): look at the last filter, to determione how to deal with po ssible issue 470 // TODO(edisonn): look at the last filter, to determione how to deal with po ssible issue
471 471
472 if (length < 0) { 472 if (length < 0) {
473 // scan the buffer, until we find first endstream 473 // scan the buffer, until we find first endstream
474 // TODO(edisonn): all buffers must have a 0 at the end now, 474 // TODO(edisonn): all buffers must have a 0 at the end now,
475 // TODO(edisonn): hack (mark end of content with 0) 475 // TODO(edisonn): hack (mark end of content with 0)
476 unsigned char lastCh = *end; 476 unsigned char lastCh = *end;
(...skipping 20 matching lines...) Expand all
497 // TODO(edisonn): verify the next bytes are "endstream" 497 // TODO(edisonn): verify the next bytes are "endstream"
498 498
499 endstream += strlen("endstream"); 499 endstream += strlen("endstream");
500 // TODO(edisonn): Assert? report error/warning? 500 // TODO(edisonn): Assert? report error/warning?
501 dict->addStream(start, (size_t)length); 501 dict->addStream(start, (size_t)length);
502 return endstream; 502 return endstream;
503 } 503 }
504 return start; 504 return start;
505 } 505 }
506 506
507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S kPdfObject* dict, SkPdfAllocator* allocator) { 507 static unsigned char* readDictionary(unsigned char* start, unsigned char* end, S kPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
508 SkPdfObject::makeEmptyDictionary(dict); 508 SkPdfObject::makeEmptyDictionary(dict);
509 509
510 start = skipPdfWhiteSpaces(start, end); 510 start = skipPdfWhiteSpaces(start, end);
511 511
512 while (start < end && *start == kNamed_PdfDelimiter) { 512 while (start < end && *start == kNamed_PdfDelimiter) {
513 SkPdfObject key; 513 SkPdfObject key;
514 *start = '\0'; 514 *start = '\0';
515 start++; 515 start++;
516 start = readName(start, end, &key); 516 start = readName(start, end, &key);
517 start = skipPdfWhiteSpaces(start, end); 517 start = skipPdfWhiteSpaces(start, end);
518 518
519 if (start < end) { 519 if (start < end) {
520 SkPdfObject* value = allocator->allocObject(); 520 SkPdfObject* value = allocator->allocObject();
521 start = nextObject(start, end, value, allocator); 521 start = nextObject(start, end, value, allocator, doc);
522 522
523 start = skipPdfWhiteSpaces(start, end); 523 start = skipPdfWhiteSpaces(start, end);
524 524
525 if (start < end) { 525 if (start < end) {
526 // seems we have an indirect reference 526 // seems we have an indirect reference
527 if (isPdfDigit(*start)) { 527 if (isPdfDigit(*start)) {
528 SkPdfObject generation; 528 SkPdfObject generation;
529 start = nextObject(start, end, &generation, allocator); 529 start = nextObject(start, end, &generation, allocator, doc);
530 530
531 SkPdfObject keywordR; 531 SkPdfObject keywordR;
532 start = nextObject(start, end, &keywordR, allocator); 532 start = nextObject(start, end, &keywordR, allocator, doc);
533 533
534 if (value->isInteger() && generation.isInteger() && keywordR .isKeywordReference()) { 534 if (value->isInteger() && generation.isInteger() && keywordR .isKeywordReference()) {
535 int64_t id = value->intValue(); 535 int64_t id = value->intValue();
536 value->reset(); 536 value->reset();
537 SkPdfObject::makeReference((unsigned int)id, (unsigned i nt)generation.intValue(), value); 537 SkPdfObject::makeReference((unsigned int)id, (unsigned i nt)generation.intValue(), value);
538 dict->set(&key, value); 538 dict->set(&key, value);
539 } else { 539 } else {
540 // error, ignore 540 // error, ignore
541 dict->set(&key, value); 541 dict->set(&key, value);
542 } 542 }
(...skipping 16 matching lines...) Expand all
559 // TODO(edisonn): options to ignore these errors 559 // TODO(edisonn): options to ignore these errors
560 560
561 // now we should expect >> 561 // now we should expect >>
562 start = skipPdfWhiteSpaces(start, end); 562 start = skipPdfWhiteSpaces(start, end);
563 start = endOfPdfToken(start, end); // > 563 start = endOfPdfToken(start, end); // >
564 start = endOfPdfToken(start, end); // > 564 start = endOfPdfToken(start, end); // >
565 565
566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ... 566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
567 // or alocate 2 objects, and if there is no stream, free it to be used by so meone else? or just leave it ? 567 // or alocate 2 objects, and if there is no stream, free it to be used by so meone else? or just leave it ?
568 568
569 start = readStream(start, end, dict); 569 start = readStream(start, end, dict, doc);
570 570
571 return start; 571 return start;
572 } 572 }
573 573
574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator) { 574 unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
575 unsigned char* current; 575 unsigned char* current;
576 576
577 // skip white spaces 577 // skip white spaces
578 start = skipPdfWhiteSpaces(start, end); 578 start = skipPdfWhiteSpaces(start, end);
579 579
580 current = endOfPdfToken(start, end); 580 current = endOfPdfToken(start, end);
581 581
582 // no token, len would be 0 582 // no token, len would be 0
583 if (current == start) { 583 if (current == start) {
584 return NULL; 584 return NULL;
585 } 585 }
586 586
587 int tokenLen = current - start; 587 int tokenLen = current - start;
588 588
589 if (tokenLen == 1) { 589 if (tokenLen == 1) {
590 // start array 590 // start array
591 switch (*start) { 591 switch (*start) {
592 case kOpenedSquareBracket_PdfDelimiter: 592 case kOpenedSquareBracket_PdfDelimiter:
593 *start = '\0'; 593 *start = '\0';
594 SkPdfObject::makeEmptyArray(token); 594 SkPdfObject::makeEmptyArray(token);
595 return readArray(current, end, token, allocator); 595 return readArray(current, end, token, allocator, doc);
596 596
597 case kOpenedRoundBracket_PdfDelimiter: 597 case kOpenedRoundBracket_PdfDelimiter:
598 *start = '\0'; 598 *start = '\0';
599 return readString(start, end, token); 599 return readString(start, end, token);
600 600
601 case kOpenedInequityBracket_PdfDelimiter: 601 case kOpenedInequityBracket_PdfDelimiter:
602 *start = '\0'; 602 *start = '\0';
603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel imiter) { 603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel imiter) {
604 // TODO(edisonn): pass here the length somehow? 604 // TODO(edisonn): pass here the length somehow?
605 return readDictionary(start + 2, end, token, allocator); // skip << 605 return readDictionary(start + 2, end, token, allocator, doc) ; // skip <<
606 } else { 606 } else {
607 return readHexString(start + 1, end, token); // skip < 607 return readHexString(start + 1, end, token); // skip <
608 } 608 }
609 609
610 case kNamed_PdfDelimiter: 610 case kNamed_PdfDelimiter:
611 *start = '\0'; 611 *start = '\0';
612 return readName(start + 1, end, token); 612 return readName(start + 1, end, token);
613 613
614 // TODO(edisonn): what to do curly brackets? read spec! 614 // TODO(edisonn): what to do curly brackets? read spec!
615 case kOpenedCurlyBracket_PdfDelimiter: 615 case kOpenedCurlyBracket_PdfDelimiter:
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
671 if (fCurrentUsed >= BUFFER_SIZE) { 671 if (fCurrentUsed >= BUFFER_SIZE) {
672 fHistory.push(fCurrent); 672 fHistory.push(fCurrent);
673 fCurrent = allocBlock(); 673 fCurrent = allocBlock();
674 fCurrentUsed = 0; 674 fCurrentUsed = 0;
675 } 675 }
676 fCurrentUsed++; 676 fCurrentUsed++;
677 return &fCurrent[fCurrentUsed - 1]; 677 return &fCurrent[fCurrentUsed - 1];
678 } 678 }
679 679
680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass 680 // TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP dfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(alloc ator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), f HasPutBack(false) { 681 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkP dfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc) , fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompress edStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
682 unsigned char* buffer = NULL; 682 unsigned char* buffer = NULL;
683 size_t len = 0; 683 size_t len = 0;
684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); 684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
685 // TODO(edisonn): hack, find end of object 685 // TODO(edisonn): hack, find end of object
686 char* endobj = strstr((char*)buffer, "endobj"); 686 char* endobj = strstr((char*)buffer, "endobj");
687 if (endobj) { 687 if (endobj) {
688 len = endobj - (char*)buffer + strlen("endobj"); 688 len = endobj - (char*)buffer + strlen("endobj");
689 } 689 }
690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len); 690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);
691 fUncompressedStreamEnd = fUncompressedStream + len; 691 fUncompressedStreamEnd = fUncompressedStream + len;
692 memcpy(fUncompressedStream, buffer, len); 692 memcpy(fUncompressedStream, buffer, len);
693 } 693 }
694 694
695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator) : fMapper(mapper), fAllocator(a llocator), fEmpty(false), fHasPutBack(false) { 695 SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc( doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
696 // TODO(edisonn): hack, find end of object 696 // TODO(edisonn): hack, find end of object
697 char* endobj = strstr((char*)buffer, "endobj"); 697 char* endobj = strstr((char*)buffer, "endobj");
698 if (endobj) { 698 if (endobj) {
699 len = endobj - (char*)buffer + strlen("endobj"); 699 len = endobj - (char*)buffer + strlen("endobj");
700 } 700 }
701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len); 701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator- >alloc(len);
702 fUncompressedStreamEnd = fUncompressedStream + len; 702 fUncompressedStreamEnd = fUncompressedStream + len;
703 memcpy(fUncompressedStream, buffer, len); 703 memcpy(fUncompressedStream, buffer, len);
704 } 704 }
705 705
706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { 706 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
707 } 707 }
708 708
709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { 709 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
710 token->fKeyword = NULL; 710 token->fKeyword = NULL;
711 token->fObject = NULL; 711 token->fObject = NULL;
712 712
713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd); 713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd);
714 if (fUncompressedStream >= fUncompressedStreamEnd) { 714 if (fUncompressedStream >= fUncompressedStreamEnd) {
715 return false; 715 return false;
716 } 716 }
717 717
718 SkPdfObject obj; 718 SkPdfObject obj;
719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd , &obj, fAllocator); 719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd , &obj, fAllocator, fDoc);
720 720
721 // If it is a keyword, we will only get the pointer of the string 721 // If it is a keyword, we will only get the pointer of the string
722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) { 722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
723 token->fKeyword = obj.c_str(); 723 token->fKeyword = obj.c_str();
724 token->fKeywordLength = obj.len(); 724 token->fKeywordLength = obj.len();
725 token->fType = kKeyword_TokenType; 725 token->fType = kKeyword_TokenType;
726 } else { 726 } else {
727 SkPdfObject* pobj = fAllocator->allocObject(); 727 SkPdfObject* pobj = fAllocator->allocObject();
728 *pobj = obj; 728 *pobj = obj;
729 token->fObject = pobj; 729 token->fObject = pobj;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
764 if (fEmpty) { 764 if (fEmpty) {
765 #ifdef PDF_TRACE 765 #ifdef PDF_TRACE
766 printf("EMPTY TOKENIZER\n"); 766 printf("EMPTY TOKENIZER\n");
767 #endif 767 #endif
768 return false; 768 return false;
769 } 769 }
770 770
771 return readTokenCore(token); 771 return readTokenCore(token);
772 } 772 }
773 773
OLDNEW
« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698