| Index: experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp
|
| ===================================================================
|
| --- experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp (revision 9879)
|
| +++ experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp (working copy)
|
| @@ -1,12 +1,469 @@
|
| -
|
| #include "SkNativeParsedPDF.h"
|
| +#include "SkPdfNativeTokenizer.h"
|
| +#include "SkPdfBasics.h"
|
| +#include "SkPdfParser.h"
|
| +#include "SkPdfObject.h"
|
|
|
| -SkNativeParsedPDF::SkNativeParsedPDF() {
|
| - // TODO(edisonn): Auto-generated constructor stub
|
| +#include <stdio.h>
|
| +#include <string.h>
|
| +#include <sys/types.h>
|
| +#include <sys/stat.h>
|
|
|
| +#include "SkPdfFileTrailerDictionary_autogen.h"
|
| +#include "SkPdfCatalogDictionary_autogen.h"
|
| +#include "SkPdfPageObjectDictionary_autogen.h"
|
| +#include "SkPdfPageTreeNodeDictionary_autogen.h"
|
| +#include "SkPdfMapper_autogen.h"
|
| +
|
| +
|
| +
|
| +long getFileSize(const char* filename)
|
| +{
|
| + struct stat stat_buf;
|
| + int rc = stat(filename, &stat_buf);
|
| + return rc == 0 ? stat_buf.st_size : -1;
|
| }
|
|
|
| +unsigned char* lineHome(unsigned char* start, unsigned char* current) {
|
| + while (current > start && !isPdfEOL(*(current - 1))) {
|
| + current--;
|
| + }
|
| + return current;
|
| +}
|
| +
|
| +unsigned char* previousLineHome(unsigned char* start, unsigned char* current) {
|
| + if (current > start && isPdfEOL(*(current - 1))) {
|
| + current--;
|
| + }
|
| +
|
| + // allows CR+LF, LF+CR but not two CR+CR or LF+LF
|
| + if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
|
| + current--;
|
| + }
|
| +
|
| + while (current > start && !isPdfEOL(*(current - 1))) {
|
| + current--;
|
| + }
|
| +
|
| + return current;
|
| +}
|
| +
|
| +unsigned char* ignoreLine(unsigned char* current, unsigned char* end) {
|
| + while (current < end && !isPdfEOL(*current)) {
|
| + current++;
|
| + }
|
| + current++;
|
| + if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
|
| + current++;
|
| + }
|
| + return current;
|
| +}
|
| +
|
| +
|
| +// TODO(edisonn): NYI
|
| +// TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
|
| +// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them
|
| +// TODO(edisonn): testing:
|
| +// 1) run on a lot of file
|
| +// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ...
|
| +// 3) irrecoverable corrupt file
|
| +SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAllocator()) {
|
| + FILE* file = fopen(path, "r");
|
| + fContentLength = getFileSize(path);
|
| + fFileContent = new unsigned char[fContentLength];
|
| + fread(fFileContent, fContentLength, 1, file);
|
| + fclose(file);
|
| + file = NULL;
|
| +
|
| + unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
|
| + unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
|
| + unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
|
| +
|
| + if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
|
| + // TODO(edisonn): report/issue
|
| + }
|
| +
|
| + long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
|
| +
|
| + bool storeCatalog = true;
|
| + while (xrefByteOffset >= 0) {
|
| + unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
|
| + xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog);
|
| + storeCatalog = false;
|
| + }
|
| +
|
| + // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
|
| + // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
|
| + // load catalog
|
| + fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
|
| + SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
|
| +
|
| + fillPages(tree);
|
| +
|
| + // now actually read all objects if we want, or do it lazyly
|
| + // and resolve references?... or not ...
|
| +}
|
| +
|
| +// TODO(edisonn): NYI
|
| SkNativeParsedPDF::~SkNativeParsedPDF() {
|
| - // TODO(edisonn): Auto-generated destructor stub
|
| + delete[] fFileContent;
|
| + delete fAllocator;
|
| }
|
|
|
| +unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd) {
|
| + unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn): verify next keyord is "xref", use nextObject here
|
| +
|
| + SkPdfObject token;
|
| + while (current < trailerEnd) {
|
| + token.reset();
|
| + unsigned char* previous = current;
|
| + current = nextObject(current, trailerEnd, &token, NULL);
|
| + if (!token.isInteger()) {
|
| + return previous;
|
| + }
|
| +
|
| + int startId = token.intValue();
|
| + token.reset();
|
| + current = nextObject(current, trailerEnd, &token, NULL);
|
| +
|
| + if (!token.isInteger()) {
|
| + // TODO(edisonn): report/warning
|
| + return current;
|
| + }
|
| +
|
| + int entries = token.intValue();
|
| +
|
| + for (int i = 0; i < entries; i++) {
|
| + token.reset();
|
| + current = nextObject(current, trailerEnd, &token, NULL);
|
| + if (!token.isInteger()) {
|
| + // TODO(edisonn): report/warning
|
| + return current;
|
| + }
|
| + int offset = token.intValue();
|
| +
|
| + token.reset();
|
| + current = nextObject(current, trailerEnd, &token, NULL);
|
| + if (!token.isInteger()) {
|
| + // TODO(edisonn): report/warning
|
| + return current;
|
| + }
|
| + int generation = token.intValue();
|
| +
|
| + token.reset();
|
| + current = nextObject(current, trailerEnd, &token, NULL);
|
| + if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
|
| + // TODO(edisonn): report/warning
|
| + return current;
|
| + }
|
| +
|
| + addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
|
| + }
|
| + }
|
| + // TODO(edisonn): it should never get here? there is no trailer?
|
| + return current;
|
| +}
|
| +
|
| +long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog) {
|
| + unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edisonn): verify next keyord is "trailer" use nextObject here
|
| +
|
| + SkPdfObject token;
|
| + current = nextObject(current, trailerEnd, &token, fAllocator);
|
| + SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
|
| +
|
| + if (storeCatalog) {
|
| + const SkPdfObject* ref = trailer->Root(NULL);
|
| + if (ref == NULL || !ref->isReference()) {
|
| + // TODO(edisonn): oops, we have to fix the corrup pdf file
|
| + return -1;
|
| + }
|
| + fRootCatalogRef = ref;
|
| + }
|
| +
|
| + if (trailer->has_Prev()) {
|
| + return trailer->Prev(NULL);
|
| + }
|
| +
|
| + return -1;
|
| +}
|
| +
|
| +void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
|
| + // TODO(edisonn): security here
|
| + while (fObjects.count() < id + 1) {
|
| + reset(fObjects.append());
|
| + }
|
| +
|
| + fObjects[id].fOffset = offset;
|
| + fObjects[id].fObj = NULL;
|
| +}
|
| +
|
| +SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) const {
|
| + long startOffset = fObjects[id].fOffset;
|
| + //long endOffset = fObjects[id].fOffsetEnd;
|
| + // TODO(edisonn): use hinted endOffset
|
| + // TODO(edisonn): current implementation will result in a lot of memory usage
|
| + // to decrease memory usage, we wither need to be smart and know where objects end, and we will
|
| + // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to
|
| + // cache the results so it does not go twice on the same buffer
|
| + unsigned char* current = fFileContent + startOffset;
|
| + unsigned char* end = fFileContent + fContentLength;
|
| +
|
| + SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator);
|
| +
|
| + SkPdfObject idObj;
|
| + SkPdfObject generationObj;
|
| + SkPdfObject objKeyword;
|
| + SkPdfObject* dict = fAllocator->allocObject();
|
| +
|
| + current = nextObject(current, end, &idObj, NULL);
|
| + if (current >= end) {
|
| + // TODO(edisonn): report warning/error
|
| + return NULL;
|
| + }
|
| +
|
| + current = nextObject(current, end, &generationObj, NULL);
|
| + if (current >= end) {
|
| + // TODO(edisonn): report warning/error
|
| + return NULL;
|
| + }
|
| +
|
| + current = nextObject(current, end, &objKeyword, NULL);
|
| + if (current >= end) {
|
| + // TODO(edisonn): report warning/error
|
| + return NULL;
|
| + }
|
| +
|
| + if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue()/* || generation != generationObj.intValue()*/) {
|
| + // TODO(edisonn): report warning/error
|
| + }
|
| +
|
| + if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
|
| + // TODO(edisonn): report warning/error
|
| + }
|
| +
|
| + current = nextObject(current, end, dict, fAllocator);
|
| +
|
| + // TODO(edisonn): report warning/error - verify last token is endobj
|
| +
|
| + return dict;
|
| +}
|
| +
|
| +void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) {
|
| + const SkPdfArray* kids = tree->Kids(this);
|
| + if (kids == NULL) {
|
| + *fPages.append() = (SkPdfPageObjectDictionary*)tree;
|
| + return;
|
| + }
|
| +
|
| + int cnt = kids->size();
|
| + for (int i = 0; i < cnt; i++) {
|
| + const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i));
|
| + if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfObjectType) {
|
| + *fPages.append() = (SkPdfPageObjectDictionary*)obj;
|
| + } else {
|
| + // TODO(edisonn): verify that it is a page tree indeed
|
| + fillPages((SkPdfPageTreeNodeDictionary*)obj);
|
| + }
|
| + }
|
| +}
|
| +
|
| +int SkNativeParsedPDF::pages() const {
|
| + return fPages.count();
|
| +}
|
| +
|
| +SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) {
|
| + return fPages[page]->Resources(this);
|
| +}
|
| +
|
| +// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value?
|
| +SkRect SkNativeParsedPDF::MediaBox(int page) const {
|
| + SkPdfPageObjectDictionary* current = fPages[page];
|
| + while (!current->has_MediaBox() && current->has_Parent()) {
|
| + current = (SkPdfPageObjectDictionary*)current->Parent(this);
|
| + }
|
| + if (current) {
|
| + return current->MediaBox(this);
|
| + }
|
| + return SkRect::MakeEmpty();
|
| +}
|
| +
|
| +// TODO(edisonn): stream or array ... ? for now only array
|
| +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const {
|
| + if (fPages[page]->isContentsAStream(this)) {
|
| + return tokenizerOfStream(fPages[page]->getContentsAsStream(this));
|
| + } else {
|
| + // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart
|
| + // so we don't allocate new memory
|
| + return NULL;
|
| + }
|
| +}
|
| +
|
| +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream) const {
|
| + if (stream == NULL) {
|
| + return NULL;
|
| + }
|
| +
|
| + return new SkPdfNativeTokenizer(stream, fMapper, fAllocator);
|
| +}
|
| +
|
| +// TODO(edisonn): NYI
|
| +SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer, size_t len) const {
|
| + // warning does not track two calls in the same buffer! the buffer is updated!
|
| + // make a clean copy if needed!
|
| + return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator);
|
| +}
|
| +
|
| +size_t SkNativeParsedPDF::objects() const {
|
| + return fObjects.count();
|
| +}
|
| +
|
| +SkPdfObject* SkNativeParsedPDF::object(int i) {
|
| + SkASSERT(!(i < 0 || i > fObjects.count()));
|
| +
|
| + if (i < 0 || i > fObjects.count()) {
|
| + return NULL;
|
| + }
|
| +
|
| + if (fObjects[i].fObj == NULL) {
|
| + // TODO(edisonn): when we read the cross reference sections, store the start of the next object
|
| + // and fill fOffsetEnd
|
| + fObjects[i].fObj = readObject(i);
|
| + }
|
| +
|
| + return fObjects[i].fObj;
|
| +}
|
| +
|
| +const SkPdfMapper* SkNativeParsedPDF::mapper() const {
|
| + return fMapper;
|
| +}
|
| +
|
| +SkPdfReal* SkNativeParsedPDF::createReal(double value) const {
|
| + SkPdfObject* obj = fAllocator->allocObject();
|
| + SkPdfObject::makeReal(value, obj);
|
| + return (SkPdfReal*)obj;
|
| +}
|
| +
|
| +SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const {
|
| + SkPdfObject* obj = fAllocator->allocObject();
|
| + SkPdfObject::makeInteger(value, obj);
|
| + return (SkPdfInteger*)obj;
|
| +}
|
| +
|
| +SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) const {
|
| + SkPdfObject* obj = fAllocator->allocObject();
|
| + SkPdfObject::makeString(sz, len, obj);
|
| + return (SkPdfString*)obj;
|
| +}
|
| +
|
| +PdfContext* gPdfContext = NULL;
|
| +
|
| +void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) {
|
| + SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page);
|
| +
|
| + PdfContext pdfContext(this);
|
| + pdfContext.fOriginalMatrix = SkMatrix::I();
|
| + pdfContext.fGraphicsState.fResources = pageResources(page);
|
| +
|
| + gPdfContext = &pdfContext;
|
| +
|
| + // TODO(edisonn): get matrix stuff right.
|
| + // TODO(edisonn): add DPI/scale/zoom.
|
| + SkScalar z = SkIntToScalar(0);
|
| + SkRect rect = MediaBox(page);
|
| + SkScalar w = rect.width();
|
| + SkScalar h = rect.height();
|
| +
|
| + SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Make(w, h), SkPoint::Make(z, h)};
|
| +// SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
|
| +
|
| + // TODO(edisonn): add flag for this app to create sourunding buffer zone
|
| + // TODO(edisonn): add flagg for no clipping.
|
| + // Use larger image to make sure we do not draw anything outside of page
|
| + // could be used in tests.
|
| +
|
| +#ifdef PDF_DEBUG_3X
|
| + SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), SkPoint::Make(w+w, h+z), SkPoint::Make(w+z, h+z)};
|
| +#else
|
| + SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
|
| +#endif
|
| + //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)};
|
| + //SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)};
|
| +
|
| + //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)};
|
| + //SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)};
|
| +
|
| + //SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint::Make(w, h)};
|
| + //SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint::Make(w, 0)};
|
| +
|
| + SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace, 4));
|
| + SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix");
|
| +
|
| +
|
| + pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix;
|
| + pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix;
|
| + pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix;
|
| +
|
| + canvas->setMatrix(pdfContext.fOriginalMatrix);
|
| +
|
| +#ifndef PDF_DEBUG_NO_PAGE_CLIPING
|
| + canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true);
|
| +#endif
|
| +
|
| +// erase with red before?
|
| +// SkPaint paint;
|
| +// paint.setColor(SK_ColorRED);
|
| +// canvas->drawRect(rect, paint);
|
| +
|
| + PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas);
|
| + looper.loop();
|
| +
|
| + delete tokenizer;
|
| +
|
| + canvas->flush();
|
| +}
|
| +
|
| +SkPdfAllocator* SkNativeParsedPDF::allocator() const {
|
| + return fAllocator;
|
| +}
|
| +
|
| +SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const {
|
| + return (SkPdfObject*)resolveReference((const SkPdfObject*)ref);
|
| +}
|
| +
|
| +// TODO(edisonn): fix infinite loop if ref to itself!
|
| +// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference?
|
| +SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const {
|
| + if (ref && ref->isReference()) {
|
| + int id = ref->referenceId();
|
| + // TODO(edisonn): generation/updates not supported now
|
| + //int gen = ref->referenceGeneration();
|
| +
|
| + SkASSERT(!(id < 0 || id > fObjects.count()));
|
| +
|
| + if (id < 0 || id > fObjects.count()) {
|
| + return NULL;
|
| + }
|
| +
|
| + // TODO(edisonn): verify id and gen expected
|
| +
|
| + if (fObjects[id].fResolvedReference != NULL) {
|
| + return fObjects[id].fResolvedReference;
|
| + }
|
| +
|
| + if (fObjects[id].fObj == NULL) {
|
| + fObjects[id].fObj = readObject(id);
|
| + }
|
| +
|
| + if (fObjects[id].fResolvedReference == NULL) {
|
| + if (!fObjects[id].fObj->isReference()) {
|
| + fObjects[id].fResolvedReference = fObjects[id].fObj;
|
| + } else {
|
| + fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
|
| + }
|
| + }
|
| +
|
| + return fObjects[id].fResolvedReference;
|
| + }
|
| + // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere
|
| + return (SkPdfObject*)ref;
|
| +}
|
|
|