experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - Issue 19243003: pdfviewer: native inline images support

Unified Diff: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

Issue 19243003: pdfviewer: native inline images support (Closed) Base URL: http://skia.googlecode.com/svn/trunk/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | experimental/PdfViewer/pdfparser/native/SkPdfObject.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

===================================================================

--- experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (revision 10010)

+++ experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (working copy)

@@ -4,7 +4,31 @@

#include "SkPdfConfig.h"

#include "SkPdfStreamCommonDictionary_autogen.h"

+#include "SkPdfImageDictionary_autogen.h"

+// TODO(edisonn): perf!!!

+// there could be 0s between start and end! but not in the needle.

+static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {

+ int needleLen = strlen(needle);

+ if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&

+ strncmp(hayStart, needle, needleLen) == 0) {

+ return hayStart;

+ }

+ hayStart++;

+ while (hayStart < hayEnd) {

+ if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&

+ (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&

+ strncmp(hayStart, needle, needleLen) == 0) {

+ return hayStart;

+ }

+ hayStart++;

+ }

+ return NULL;

static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {

while (start < end && isPdfWhiteSpace(*start)) {

if (*start == kComment_PdfDelimiter) {

@@ -68,6 +92,7 @@

}

array->appendInArray(newObj);

}

+ printf("break;\n"); // DO NOT SUBMIT!

// TODO(edisonn): report not reached, we should never get here

// TODO(edisonn): there might be a bug here, enable an assert and run it on files

// or it might be that the files were actually corrupted

@@ -458,6 +483,11 @@

start += 2;

} else if (start[0] == kLF_PdfWhiteSpace) {

start += 1;

+ } else if (isPdfWhiteSpace(start[0])) {

+ start += 1;

+ } else {

+ // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?

+ // TODO(edisonn): warning?

}

SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;

@@ -475,17 +505,12 @@

if (length < 0) {

// scan the buffer, until we find first endstream

// TODO(edisonn): all buffers must have a 0 at the end now,

- // TODO(edisonn): hack (mark end of content with 0)

- unsigned char lastCh = *end;

- *end = '\0';

- //SkASSERT(*end == '\0');

- unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream");

- *end = lastCh;

+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "endstream");

if (endstream) {

length = endstream - start;

if (*(endstream-1) == kLF_PdfWhiteSpace) length--;

- if (*(endstream-1) == kCR_PdfWhiteSpace) length--;

+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--;

}

if (length >= 0) {

@@ -507,6 +532,37 @@

return start;

}

+static unsigned char* readInlineImageStream(unsigned char* start, unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {

+ // We already processed ID keyword, and we should be positioned immediately after it

+ // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes

+ if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {

+ start += 2;

+ } else if (start[0] == kLF_PdfWhiteSpace) {

+ start += 1;

+ } else if (isPdfWhiteSpace(start[0])) {

+ start += 1;

+ } else {

+ SkASSERT(isPdfDelimiter(start[0]));

+ // TODO(edisonn): warning?

+ }

+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "EI");

+ unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")

+ if (endstream) {

+ int length = endstream - start;

+ if (*(endstream-1) == kLF_PdfWhiteSpace) length--;

+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--;

+ inlineImage->addStream(start, (size_t)length);

+ } else {

+ // TODO(edisonn): report error in inline image stream (ID-EI) section

+ // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly

+ return end;

+ }

+ return endEI;

static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {

SkPdfObject::makeEmptyDictionary(dict);

@@ -563,12 +619,17 @@

// now we should expect >>

start = skipPdfWhiteSpaces(start, end);

- start = endOfPdfToken(start, end); // >

+ if (*start != kClosedInequityBracket_PdfDelimiter) {

+ // TODO(edisonn): report/warning

+ }

+ *start = '\0';

+ start++; // skip >

+ if (*start != kClosedInequityBracket_PdfDelimiter) {

+ // TODO(edisonn): report/warning

+ }

+ *start = '\0';

+ start++; // skip >

- // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...

- // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?

start = readStream(start, end, dict, doc);

return start;

@@ -604,6 +665,7 @@

case kOpenedInequityBracket_PdfDelimiter:

*start = '\0';

if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {

+ start[1] = '\0'; // optional

// TODO(edisonn): pass here the length somehow?

return readDictionary(start + 2, end, token, allocator, doc); // skip <<

} else {

@@ -688,7 +750,7 @@

size_t len = 0;

objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);

// TODO(edisonn): hack, find end of object

- char* endobj = strstr((char*)buffer, "endobj");

+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");

if (endobj) {

len = endobj - (char*)buffer + strlen("endobj");

}

@@ -699,7 +761,7 @@

SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {

// TODO(edisonn): hack, find end of object

- char* endobj = strstr((char*)buffer, "endobj");

+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");

if (endobj) {

len = endobj - (char*)buffer + strlen("endobj");

}

@@ -775,3 +837,103 @@

return readTokenCore(token);

}

+#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)

+// keys

+DECLARE_PDF_NAME(BitsPerComponent);

+DECLARE_PDF_NAME(ColorSpace);

+DECLARE_PDF_NAME(Decode);

+DECLARE_PDF_NAME(DecodeParms);

+DECLARE_PDF_NAME(Filter);

+DECLARE_PDF_NAME(Height);

+DECLARE_PDF_NAME(ImageMask);

+DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?

+DECLARE_PDF_NAME(Interpolate);

+DECLARE_PDF_NAME(Width);

+// values

+DECLARE_PDF_NAME(DeviceGray);

+DECLARE_PDF_NAME(DeviceRGB);

+DECLARE_PDF_NAME(DeviceCMYK);

+DECLARE_PDF_NAME(Indexed);

+DECLARE_PDF_NAME(ASCIIHexDecode);

+DECLARE_PDF_NAME(ASCII85Decode);

+DECLARE_PDF_NAME(LZWDecode);

+DECLARE_PDF_NAME(FlateDecode); // PDF 1.2

+DECLARE_PDF_NAME(RunLengthDecode);

+DECLARE_PDF_NAME(CCITTFaxDecode);

+DECLARE_PDF_NAME(DCTDecode);

+#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;

+static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {

+ if (!key || !key->isName()) {

+ return key;

+ }

+ // TODO(edisonn): use autogenerated code!

+ HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);

+ HANDLE_NAME_ABBR(key, ColorSpace, CS);

+ HANDLE_NAME_ABBR(key, Decode, D);

+ HANDLE_NAME_ABBR(key, DecodeParms, DP);

+ HANDLE_NAME_ABBR(key, Filter, F);

+ HANDLE_NAME_ABBR(key, Height, H);

+ HANDLE_NAME_ABBR(key, ImageMask, IM);

+// HANDLE_NAME_ABBR(key, Intent, );

+ HANDLE_NAME_ABBR(key, Interpolate, I);

+ HANDLE_NAME_ABBR(key, Width, W);

+ return key;

+static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {

+ if (!value || !value->isName()) {

+ return value;

+ }

+ // TODO(edisonn): use autogenerated code!

+ HANDLE_NAME_ABBR(value, DeviceGray, G);

+ HANDLE_NAME_ABBR(value, DeviceRGB, RGB);

+ HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);

+ HANDLE_NAME_ABBR(value, Indexed, I);

+ HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);

+ HANDLE_NAME_ABBR(value, ASCII85Decode, A85);

+ HANDLE_NAME_ABBR(value, LZWDecode, LZW);

+ HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)

+ HANDLE_NAME_ABBR(value, RunLengthDecode, RL);

+ HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);

+ HANDLE_NAME_ABBR(value, DCTDecode, DCT);

+ return value;

+SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {

+ // BI already processed

+ fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);

+ if (fUncompressedStream >= fUncompressedStreamEnd) {

+ return NULL;

+ }

+ SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();

+ SkPdfObject::makeEmptyDictionary(inlineImage);

+ while (fUncompressedStream < fUncompressedStreamEnd) {

+ SkPdfObject* key = fAllocator->allocObject();

+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);

+ if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID

+ fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);

+ return inlineImage;

+ } else {

+ SkPdfObject* obj = fAllocator->allocObject();

+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);

+ // TODO(edisonn): perf maybe we should not expand abreviation like this

+ inlineImage->set(inlineImageKeyAbbreviationExpand(key),

+ inlineImageValueAbbreviationExpand(obj));

+ }

+ // TODO(edisonn): report end of data with inline image without an EI

+ return inlineImage;