Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(690)

Unified Diff: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

Issue 19243003: pdfviewer: native inline images support (Closed) Base URL: http://skia.googlecode.com/svn/trunk/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp
===================================================================
--- experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (revision 10010)
+++ experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (working copy)
@@ -4,7 +4,31 @@
#include "SkPdfConfig.h"
#include "SkPdfStreamCommonDictionary_autogen.h"
+#include "SkPdfImageDictionary_autogen.h"
+// TODO(edisonn): perf!!!
+// there could be 0s between start and end! but not in the needle.
+static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
+ int needleLen = strlen(needle);
+ if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
+ strncmp(hayStart, needle, needleLen) == 0) {
+ return hayStart;
+ }
+
+ hayStart++;
+
+ while (hayStart < hayEnd) {
+ if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
+ (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
+ strncmp(hayStart, needle, needleLen) == 0) {
+ return hayStart;
+ }
+ hayStart++;
+ }
+ return NULL;
+}
+
+
static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
while (start < end && isPdfWhiteSpace(*start)) {
if (*start == kComment_PdfDelimiter) {
@@ -68,6 +92,7 @@
}
array->appendInArray(newObj);
}
+ printf("break;\n"); // DO NOT SUBMIT!
// TODO(edisonn): report not reached, we should never get here
// TODO(edisonn): there might be a bug here, enable an assert and run it on files
// or it might be that the files were actually corrupted
@@ -458,6 +483,11 @@
start += 2;
} else if (start[0] == kLF_PdfWhiteSpace) {
start += 1;
+ } else if (isPdfWhiteSpace(start[0])) {
+ start += 1;
+ } else {
+ // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
+ // TODO(edisonn): warning?
}
SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
@@ -475,17 +505,12 @@
if (length < 0) {
// scan the buffer, until we find first endstream
// TODO(edisonn): all buffers must have a 0 at the end now,
- // TODO(edisonn): hack (mark end of content with 0)
- unsigned char lastCh = *end;
- *end = '\0';
- //SkASSERT(*end == '\0');
- unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream");
- *end = lastCh;
+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
if (endstream) {
length = endstream - start;
if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
- if (*(endstream-1) == kCR_PdfWhiteSpace) length--;
+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
}
}
if (length >= 0) {
@@ -507,6 +532,37 @@
return start;
}
+static unsigned char* readInlineImageStream(unsigned char* start, unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
+ // We already processed ID keyword, and we should be positioned immediately after it
+
+ // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
+ if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
+ start += 2;
+ } else if (start[0] == kLF_PdfWhiteSpace) {
+ start += 1;
+ } else if (isPdfWhiteSpace(start[0])) {
+ start += 1;
+ } else {
+ SkASSERT(isPdfDelimiter(start[0]));
+ // TODO(edisonn): warning?
+ }
+
+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "EI");
+ unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
+
+ if (endstream) {
+ int length = endstream - start;
+ if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
+ inlineImage->addStream(start, (size_t)length);
+ } else {
+ // TODO(edisonn): report error in inline image stream (ID-EI) section
+ // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
+ return end;
+ }
+ return endEI;
+}
+
static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
SkPdfObject::makeEmptyDictionary(dict);
@@ -563,12 +619,17 @@
// now we should expect >>
start = skipPdfWhiteSpaces(start, end);
- start = endOfPdfToken(start, end); // >
- start = endOfPdfToken(start, end); // >
+ if (*start != kClosedInequityBracket_PdfDelimiter) {
+ // TODO(edisonn): report/warning
+ }
+ *start = '\0';
+ start++; // skip >
+ if (*start != kClosedInequityBracket_PdfDelimiter) {
+ // TODO(edisonn): report/warning
+ }
+ *start = '\0';
+ start++; // skip >
- // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
- // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?
-
start = readStream(start, end, dict, doc);
return start;
@@ -604,6 +665,7 @@
case kOpenedInequityBracket_PdfDelimiter:
*start = '\0';
if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
+ start[1] = '\0'; // optional
// TODO(edisonn): pass here the length somehow?
return readDictionary(start + 2, end, token, allocator, doc); // skip <<
} else {
@@ -688,7 +750,7 @@
size_t len = 0;
objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
// TODO(edisonn): hack, find end of object
- char* endobj = strstr((char*)buffer, "endobj");
+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
if (endobj) {
len = endobj - (char*)buffer + strlen("endobj");
}
@@ -699,7 +761,7 @@
SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
// TODO(edisonn): hack, find end of object
- char* endobj = strstr((char*)buffer, "endobj");
+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
if (endobj) {
len = endobj - (char*)buffer + strlen("endobj");
}
@@ -775,3 +837,103 @@
return readTokenCore(token);
}
+
+#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
+
+// keys
+DECLARE_PDF_NAME(BitsPerComponent);
+DECLARE_PDF_NAME(ColorSpace);
+DECLARE_PDF_NAME(Decode);
+DECLARE_PDF_NAME(DecodeParms);
+DECLARE_PDF_NAME(Filter);
+DECLARE_PDF_NAME(Height);
+DECLARE_PDF_NAME(ImageMask);
+DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
+DECLARE_PDF_NAME(Interpolate);
+DECLARE_PDF_NAME(Width);
+
+// values
+DECLARE_PDF_NAME(DeviceGray);
+DECLARE_PDF_NAME(DeviceRGB);
+DECLARE_PDF_NAME(DeviceCMYK);
+DECLARE_PDF_NAME(Indexed);
+DECLARE_PDF_NAME(ASCIIHexDecode);
+DECLARE_PDF_NAME(ASCII85Decode);
+DECLARE_PDF_NAME(LZWDecode);
+DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
+DECLARE_PDF_NAME(RunLengthDecode);
+DECLARE_PDF_NAME(CCITTFaxDecode);
+DECLARE_PDF_NAME(DCTDecode);
+
+#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
+
+
+static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
+ if (!key || !key->isName()) {
+ return key;
+ }
+
+ // TODO(edisonn): use autogenerated code!
+ HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
+ HANDLE_NAME_ABBR(key, ColorSpace, CS);
+ HANDLE_NAME_ABBR(key, Decode, D);
+ HANDLE_NAME_ABBR(key, DecodeParms, DP);
+ HANDLE_NAME_ABBR(key, Filter, F);
+ HANDLE_NAME_ABBR(key, Height, H);
+ HANDLE_NAME_ABBR(key, ImageMask, IM);
+// HANDLE_NAME_ABBR(key, Intent, );
+ HANDLE_NAME_ABBR(key, Interpolate, I);
+ HANDLE_NAME_ABBR(key, Width, W);
+
+ return key;
+}
+
+static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
+ if (!value || !value->isName()) {
+ return value;
+ }
+
+ // TODO(edisonn): use autogenerated code!
+ HANDLE_NAME_ABBR(value, DeviceGray, G);
+ HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
+ HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
+ HANDLE_NAME_ABBR(value, Indexed, I);
+ HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
+ HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
+ HANDLE_NAME_ABBR(value, LZWDecode, LZW);
+ HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
+ HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
+ HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
+ HANDLE_NAME_ABBR(value, DCTDecode, DCT);
+
+ return value;
+}
+
+SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
+ // BI already processed
+ fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
+ if (fUncompressedStream >= fUncompressedStreamEnd) {
+ return NULL;
+ }
+
+ SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
+ SkPdfObject::makeEmptyDictionary(inlineImage);
+
+ while (fUncompressedStream < fUncompressedStreamEnd) {
+ SkPdfObject* key = fAllocator->allocObject();
+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
+
+ if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
+ fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
+ return inlineImage;
+ } else {
+ SkPdfObject* obj = fAllocator->allocObject();
+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
+ // TODO(edisonn): perf maybe we should not expand abreviation like this
+ inlineImage->set(inlineImageKeyAbbreviationExpand(key),
+ inlineImageValueAbbreviationExpand(obj));
+ }
+ }
+ // TODO(edisonn): report end of data with inline image without an EI
+ return inlineImage;
+}
« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | experimental/PdfViewer/pdfparser/native/SkPdfObject.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698