Index: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp |
=================================================================== |
--- experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (revision 10010) |
+++ experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp (working copy) |
@@ -4,7 +4,31 @@ |
#include "SkPdfConfig.h" |
#include "SkPdfStreamCommonDictionary_autogen.h" |
+#include "SkPdfImageDictionary_autogen.h" |
+// TODO(edisonn): perf!!! |
+// there could be 0s between start and end! but not in the needle. |
+static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { |
+ int needleLen = strlen(needle); |
+ if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) && |
+ strncmp(hayStart, needle, needleLen) == 0) { |
+ return hayStart; |
+ } |
+ |
+ hayStart++; |
+ |
+ while (hayStart < hayEnd) { |
+ if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) && |
+ (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) && |
+ strncmp(hayStart, needle, needleLen) == 0) { |
+ return hayStart; |
+ } |
+ hayStart++; |
+ } |
+ return NULL; |
+} |
+ |
+ |
static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) { |
while (start < end && isPdfWhiteSpace(*start)) { |
if (*start == kComment_PdfDelimiter) { |
@@ -68,6 +92,7 @@ |
} |
array->appendInArray(newObj); |
} |
+ printf("break;\n"); // DO NOT SUBMIT! |
// TODO(edisonn): report not reached, we should never get here |
// TODO(edisonn): there might be a bug here, enable an assert and run it on files |
// or it might be that the files were actually corrupted |
@@ -458,6 +483,11 @@ |
start += 2; |
} else if (start[0] == kLF_PdfWhiteSpace) { |
start += 1; |
+ } else if (isPdfWhiteSpace(start[0])) { |
+ start += 1; |
+ } else { |
+ // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ? |
+ // TODO(edisonn): warning? |
} |
SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; |
@@ -475,17 +505,12 @@ |
if (length < 0) { |
// scan the buffer, until we find first endstream |
// TODO(edisonn): all buffers must have a 0 at the end now, |
- // TODO(edisonn): hack (mark end of content with 0) |
- unsigned char lastCh = *end; |
- *end = '\0'; |
- //SkASSERT(*end == '\0'); |
- unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream"); |
- *end = lastCh; |
+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "endstream"); |
if (endstream) { |
length = endstream - start; |
if (*(endstream-1) == kLF_PdfWhiteSpace) length--; |
- if (*(endstream-1) == kCR_PdfWhiteSpace) length--; |
+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--; |
} |
} |
if (length >= 0) { |
@@ -507,6 +532,37 @@ |
return start; |
} |
+static unsigned char* readInlineImageStream(unsigned char* start, unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) { |
+ // We already processed ID keyword, and we should be positioned immediately after it |
+ |
+ // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes |
+ if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { |
+ start += 2; |
+ } else if (start[0] == kLF_PdfWhiteSpace) { |
+ start += 1; |
+ } else if (isPdfWhiteSpace(start[0])) { |
+ start += 1; |
+ } else { |
+ SkASSERT(isPdfDelimiter(start[0])); |
+ // TODO(edisonn): warning? |
+ } |
+ |
+ unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "EI"); |
+ unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI") |
+ |
+ if (endstream) { |
+ int length = endstream - start; |
+ if (*(endstream-1) == kLF_PdfWhiteSpace) length--; |
+ if (*(endstream-2) == kCR_PdfWhiteSpace) length--; |
+ inlineImage->addStream(start, (size_t)length); |
+ } else { |
+ // TODO(edisonn): report error in inline image stream (ID-EI) section |
+ // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly |
+ return end; |
+ } |
+ return endEI; |
+} |
+ |
static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) { |
SkPdfObject::makeEmptyDictionary(dict); |
@@ -563,12 +619,17 @@ |
// now we should expect >> |
start = skipPdfWhiteSpaces(start, end); |
- start = endOfPdfToken(start, end); // > |
- start = endOfPdfToken(start, end); // > |
+ if (*start != kClosedInequityBracket_PdfDelimiter) { |
+ // TODO(edisonn): report/warning |
+ } |
+ *start = '\0'; |
+ start++; // skip > |
+ if (*start != kClosedInequityBracket_PdfDelimiter) { |
+ // TODO(edisonn): report/warning |
+ } |
+ *start = '\0'; |
+ start++; // skip > |
- // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ... |
- // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ? |
- |
start = readStream(start, end, dict, doc); |
return start; |
@@ -604,6 +665,7 @@ |
case kOpenedInequityBracket_PdfDelimiter: |
*start = '\0'; |
if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) { |
+ start[1] = '\0'; // optional |
// TODO(edisonn): pass here the length somehow? |
return readDictionary(start + 2, end, token, allocator, doc); // skip << |
} else { |
@@ -688,7 +750,7 @@ |
size_t len = 0; |
objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator); |
// TODO(edisonn): hack, find end of object |
- char* endobj = strstr((char*)buffer, "endobj"); |
+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); |
if (endobj) { |
len = endobj - (char*)buffer + strlen("endobj"); |
} |
@@ -699,7 +761,7 @@ |
SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) { |
// TODO(edisonn): hack, find end of object |
- char* endobj = strstr((char*)buffer, "endobj"); |
+ char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); |
if (endobj) { |
len = endobj - (char*)buffer + strlen("endobj"); |
} |
@@ -775,3 +837,103 @@ |
return readTokenCore(token); |
} |
+ |
+#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName) |
+ |
+// keys |
+DECLARE_PDF_NAME(BitsPerComponent); |
+DECLARE_PDF_NAME(ColorSpace); |
+DECLARE_PDF_NAME(Decode); |
+DECLARE_PDF_NAME(DecodeParms); |
+DECLARE_PDF_NAME(Filter); |
+DECLARE_PDF_NAME(Height); |
+DECLARE_PDF_NAME(ImageMask); |
+DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations? |
+DECLARE_PDF_NAME(Interpolate); |
+DECLARE_PDF_NAME(Width); |
+ |
+// values |
+DECLARE_PDF_NAME(DeviceGray); |
+DECLARE_PDF_NAME(DeviceRGB); |
+DECLARE_PDF_NAME(DeviceCMYK); |
+DECLARE_PDF_NAME(Indexed); |
+DECLARE_PDF_NAME(ASCIIHexDecode); |
+DECLARE_PDF_NAME(ASCII85Decode); |
+DECLARE_PDF_NAME(LZWDecode); |
+DECLARE_PDF_NAME(FlateDecode); // PDF 1.2 |
+DECLARE_PDF_NAME(RunLengthDecode); |
+DECLARE_PDF_NAME(CCITTFaxDecode); |
+DECLARE_PDF_NAME(DCTDecode); |
+ |
+#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName; |
+ |
+ |
+static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) { |
+ if (!key || !key->isName()) { |
+ return key; |
+ } |
+ |
+ // TODO(edisonn): use autogenerated code! |
+ HANDLE_NAME_ABBR(key, BitsPerComponent, BPC); |
+ HANDLE_NAME_ABBR(key, ColorSpace, CS); |
+ HANDLE_NAME_ABBR(key, Decode, D); |
+ HANDLE_NAME_ABBR(key, DecodeParms, DP); |
+ HANDLE_NAME_ABBR(key, Filter, F); |
+ HANDLE_NAME_ABBR(key, Height, H); |
+ HANDLE_NAME_ABBR(key, ImageMask, IM); |
+// HANDLE_NAME_ABBR(key, Intent, ); |
+ HANDLE_NAME_ABBR(key, Interpolate, I); |
+ HANDLE_NAME_ABBR(key, Width, W); |
+ |
+ return key; |
+} |
+ |
+static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) { |
+ if (!value || !value->isName()) { |
+ return value; |
+ } |
+ |
+ // TODO(edisonn): use autogenerated code! |
+ HANDLE_NAME_ABBR(value, DeviceGray, G); |
+ HANDLE_NAME_ABBR(value, DeviceRGB, RGB); |
+ HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK); |
+ HANDLE_NAME_ABBR(value, Indexed, I); |
+ HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx); |
+ HANDLE_NAME_ABBR(value, ASCII85Decode, A85); |
+ HANDLE_NAME_ABBR(value, LZWDecode, LZW); |
+ HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2) |
+ HANDLE_NAME_ABBR(value, RunLengthDecode, RL); |
+ HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF); |
+ HANDLE_NAME_ABBR(value, DCTDecode, DCT); |
+ |
+ return value; |
+} |
+ |
+SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() { |
+ // BI already processed |
+ fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd); |
+ if (fUncompressedStream >= fUncompressedStreamEnd) { |
+ return NULL; |
+ } |
+ |
+ SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject(); |
+ SkPdfObject::makeEmptyDictionary(inlineImage); |
+ |
+ while (fUncompressedStream < fUncompressedStreamEnd) { |
+ SkPdfObject* key = fAllocator->allocObject(); |
+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc); |
+ |
+ if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID |
+ fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc); |
+ return inlineImage; |
+ } else { |
+ SkPdfObject* obj = fAllocator->allocObject(); |
+ fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc); |
+ // TODO(edisonn): perf maybe we should not expand abreviation like this |
+ inlineImage->set(inlineImageKeyAbbreviationExpand(key), |
+ inlineImageValueAbbreviationExpand(obj)); |
+ } |
+ } |
+ // TODO(edisonn): report end of data with inline image without an EI |
+ return inlineImage; |
+} |