Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp

Issue 18323019: work on the native parser, in progress, uploaded to have a backup (Closed) Base URL: http://skia.googlecode.com/svn/trunk/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1
2 #include "SkNativeParsedPDF.h" 1 #include "SkNativeParsedPDF.h"
3 2 #include "SkPdfNativeTokenizer.h"
4 SkNativeParsedPDF::SkNativeParsedPDF() { 3 #include "SkPdfBasics.h"
5 // TODO(edisonn): Auto-generated constructor stub 4 #include "SkPdfParser.h"
6 5 #include "SkPdfObject.h"
7 } 6
8 7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11
12 #include "SkPdfFileTrailerDictionary_autogen.h"
13 #include "SkPdfCatalogDictionary_autogen.h"
14 #include "SkPdfPageObjectDictionary_autogen.h"
15 #include "SkPdfPageTreeNodeDictionary_autogen.h"
16 #include "SkPdfMapper_autogen.h"
17
18
19
20 long getFileSize(const char* filename)
21 {
22 struct stat stat_buf;
23 int rc = stat(filename, &stat_buf);
24 return rc == 0 ? stat_buf.st_size : -1;
25 }
26
27 unsigned char* lineHome(unsigned char* start, unsigned char* current) {
28 while (current > start && !isPdfEOL(*(current - 1))) {
29 current--;
30 }
31 return current;
32 }
33
34 unsigned char* previousLineHome(unsigned char* start, unsigned char* current) {
35 if (current > start && isPdfEOL(*(current - 1))) {
36 current--;
37 }
38
39 // allows CR+LF, LF+CR but not two CR+CR or LF+LF
40 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1 )) {
41 current--;
42 }
43
44 while (current > start && !isPdfEOL(*(current - 1))) {
45 current--;
46 }
47
48 return current;
49 }
50
51 unsigned char* ignoreLine(unsigned char* current, unsigned char* end) {
52 while (current < end && !isPdfEOL(*current)) {
53 current++;
54 }
55 current++;
56 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
57 current++;
58 }
59 return current;
60 }
61
62
63 // TODO(edisonn): NYI
64 // TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
65 // TODO(edisonn): write one that accepts errors in the file and ignores/fixis th em
66 // TODO(edisonn): testing:
67 // 1) run on a lot of file
68 // 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, u se other white spaces, insert comments randomly, ...
69 // 3) irrecoverable corrupt file
70 SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAll ocator()) {
71 FILE* file = fopen(path, "r");
72 fContentLength = getFileSize(path);
73 fFileContent = new unsigned char[fContentLength];
74 fread(fFileContent, fContentLength, 1, file);
75 fclose(file);
76 file = NULL;
77
78 unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLengt h - 1);
79 unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
80 unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByt eOffsetLine);
81
82 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
83 // TODO(edisonn): report/issue
84 }
85
86 long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
87
88 bool storeCatalog = true;
89 while (xrefByteOffset >= 0) {
90 unsigned char* trailerStart = readCrossReferenceSection(fFileContent + x refByteOffset, xrefstartKeywordLine);
91 xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCa talog);
92 storeCatalog = false;
93 }
94
95 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == f RefCatalogGeneration
96 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed usi ng mapper
97 // load catalog
98 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
99 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
100
101 fillPages(tree);
102
103 // now actually read all objects if we want, or do it lazyly
104 // and resolve references?... or not ...
105 }
106
107 // TODO(edisonn): NYI
9 SkNativeParsedPDF::~SkNativeParsedPDF() { 108 SkNativeParsedPDF::~SkNativeParsedPDF() {
10 // TODO(edisonn): Auto-generated destructor stub 109 delete[] fFileContent;
11 } 110 delete fAllocator;
12 111 }
112
113 unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefS tart, unsigned char* trailerEnd) {
114 unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn ): verify next keyord is "xref", use nextObject here
115
116 SkPdfObject token;
117 while (current < trailerEnd) {
118 token.reset();
119 unsigned char* previous = current;
120 current = nextObject(current, trailerEnd, &token, NULL);
121 if (!token.isInteger()) {
122 return previous;
123 }
124
125 int startId = token.intValue();
126 token.reset();
127 current = nextObject(current, trailerEnd, &token, NULL);
128
129 if (!token.isInteger()) {
130 // TODO(edisonn): report/warning
131 return current;
132 }
133
134 int entries = token.intValue();
135
136 for (int i = 0; i < entries; i++) {
137 token.reset();
138 current = nextObject(current, trailerEnd, &token, NULL);
139 if (!token.isInteger()) {
140 // TODO(edisonn): report/warning
141 return current;
142 }
143 int offset = token.intValue();
144
145 token.reset();
146 current = nextObject(current, trailerEnd, &token, NULL);
147 if (!token.isInteger()) {
148 // TODO(edisonn): report/warning
149 return current;
150 }
151 int generation = token.intValue();
152
153 token.reset();
154 current = nextObject(current, trailerEnd, &token, NULL);
155 if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
156 // TODO(edisonn): report/warning
157 return current;
158 }
159
160 addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
161 }
162 }
163 // TODO(edisonn): it should never get here? there is no trailer?
164 return current;
165 }
166
167 long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog) {
168 unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edis onn): verify next keyord is "trailer" use nextObject here
169
170 SkPdfObject token;
171 current = nextObject(current, trailerEnd, &token, fAllocator);
172 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
173
174 if (storeCatalog) {
175 const SkPdfObject* ref = trailer->Root(NULL);
176 if (ref == NULL || !ref->isReference()) {
177 // TODO(edisonn): oops, we have to fix the corrup pdf file
178 return -1;
179 }
180 fRootCatalogRef = ref;
181 }
182
183 if (trailer->has_Prev()) {
184 return trailer->Prev(NULL);
185 }
186
187 return -1;
188 }
189
190 void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
191 // TODO(edisonn): security here
192 while (fObjects.count() < id + 1) {
193 reset(fObjects.append());
194 }
195
196 fObjects[id].fOffset = offset;
197 fObjects[id].fObj = NULL;
198 }
199
200 SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) c onst {
201 long startOffset = fObjects[id].fOffset;
202 //long endOffset = fObjects[id].fOffsetEnd;
203 // TODO(edisonn): use hinted endOffset
204 // TODO(edisonn): current implementation will result in a lot of memory usag e
205 // to decrease memory usage, we wither need to be smart and know where objec ts end, and we will
206 // alocate only the chancks needed, or the tokenizer will not make copies, b ut then it needs to
207 // cache the results so it does not go twice on the same buffer
208 unsigned char* current = fFileContent + startOffset;
209 unsigned char* end = fFileContent + fContentLength;
210
211 SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator);
212
213 SkPdfObject idObj;
214 SkPdfObject generationObj;
215 SkPdfObject objKeyword;
216 SkPdfObject* dict = fAllocator->allocObject();
217
218 current = nextObject(current, end, &idObj, NULL);
219 if (current >= end) {
220 // TODO(edisonn): report warning/error
221 return NULL;
222 }
223
224 current = nextObject(current, end, &generationObj, NULL);
225 if (current >= end) {
226 // TODO(edisonn): report warning/error
227 return NULL;
228 }
229
230 current = nextObject(current, end, &objKeyword, NULL);
231 if (current >= end) {
232 // TODO(edisonn): report warning/error
233 return NULL;
234 }
235
236 if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue ()/* || generation != generationObj.intValue()*/) {
237 // TODO(edisonn): report warning/error
238 }
239
240 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
241 // TODO(edisonn): report warning/error
242 }
243
244 current = nextObject(current, end, dict, fAllocator);
245
246 // TODO(edisonn): report warning/error - verify last token is endobj
247
248 return dict;
249 }
250
251 void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) {
252 const SkPdfArray* kids = tree->Kids(this);
253 if (kids == NULL) {
254 *fPages.append() = (SkPdfPageObjectDictionary*)tree;
255 return;
256 }
257
258 int cnt = kids->size();
259 for (int i = 0; i < cnt; i++) {
260 const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i));
261 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdf ObjectType) {
262 *fPages.append() = (SkPdfPageObjectDictionary*)obj;
263 } else {
264 // TODO(edisonn): verify that it is a page tree indeed
265 fillPages((SkPdfPageTreeNodeDictionary*)obj);
266 }
267 }
268 }
269
270 int SkNativeParsedPDF::pages() const {
271 return fPages.count();
272 }
273
274 SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) {
275 return fPages[page]->Resources(this);
276 }
277
278 // TODO(edisonn): Partial implemented. Move the logics directly in the code gene rator for inheritable and default value?
279 SkRect SkNativeParsedPDF::MediaBox(int page) const {
280 SkPdfPageObjectDictionary* current = fPages[page];
281 while (!current->has_MediaBox() && current->has_Parent()) {
282 current = (SkPdfPageObjectDictionary*)current->Parent(this);
283 }
284 if (current) {
285 return current->MediaBox(this);
286 }
287 return SkRect::MakeEmpty();
288 }
289
290 // TODO(edisonn): stream or array ... ? for now only array
291 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const {
292 if (fPages[page]->isContentsAStream(this)) {
293 return tokenizerOfStream(fPages[page]->getContentsAsStream(this));
294 } else {
295 // TODO(edisonn): NYI, we need to concatenate all streams in the array o r make the tokenizer smart
296 // so we don't allocate new memory
297 return NULL;
298 }
299 }
300
301 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream) const {
302 if (stream == NULL) {
303 return NULL;
304 }
305
306 return new SkPdfNativeTokenizer(stream, fMapper, fAllocator);
307 }
308
309 // TODO(edisonn): NYI
310 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer , size_t len) const {
311 // warning does not track two calls in the same buffer! the buffer is update d!
312 // make a clean copy if needed!
313 return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator);
314 }
315
316 size_t SkNativeParsedPDF::objects() const {
317 return fObjects.count();
318 }
319
320 SkPdfObject* SkNativeParsedPDF::object(int i) {
321 SkASSERT(!(i < 0 || i > fObjects.count()));
322
323 if (i < 0 || i > fObjects.count()) {
324 return NULL;
325 }
326
327 if (fObjects[i].fObj == NULL) {
328 // TODO(edisonn): when we read the cross reference sections, store the s tart of the next object
329 // and fill fOffsetEnd
330 fObjects[i].fObj = readObject(i);
331 }
332
333 return fObjects[i].fObj;
334 }
335
336 const SkPdfMapper* SkNativeParsedPDF::mapper() const {
337 return fMapper;
338 }
339
340 SkPdfReal* SkNativeParsedPDF::createReal(double value) const {
341 SkPdfObject* obj = fAllocator->allocObject();
342 SkPdfObject::makeReal(value, obj);
343 return (SkPdfReal*)obj;
344 }
345
346 SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const {
347 SkPdfObject* obj = fAllocator->allocObject();
348 SkPdfObject::makeInteger(value, obj);
349 return (SkPdfInteger*)obj;
350 }
351
352 SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) cons t {
353 SkPdfObject* obj = fAllocator->allocObject();
354 SkPdfObject::makeString(sz, len, obj);
355 return (SkPdfString*)obj;
356 }
357
358 PdfContext* gPdfContext = NULL;
359
360 void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) {
361 SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page);
362
363 PdfContext pdfContext(this);
364 pdfContext.fOriginalMatrix = SkMatrix::I();
365 pdfContext.fGraphicsState.fResources = pageResources(page);
366
367 gPdfContext = &pdfContext;
368
369 // TODO(edisonn): get matrix stuff right.
370 // TODO(edisonn): add DPI/scale/zoom.
371 SkScalar z = SkIntToScalar(0);
372 SkRect rect = MediaBox(page);
373 SkScalar w = rect.width();
374 SkScalar h = rect.height();
375
376 SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Ma ke(w, h), SkPoint::Make(z, h)};
377 // SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
378
379 // TODO(edisonn): add flag for this app to create sourunding buffer zone
380 // TODO(edisonn): add flagg for no clipping.
381 // Use larger image to make sure we do not draw anything outside of page
382 // could be used in tests.
383
384 #ifdef PDF_DEBUG_3X
385 SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), Sk Point::Make(w+w, h+z), SkPoint::Make(w+z, h+z)};
386 #else
387 SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::M ake(w, z), SkPoint::Make(z, z)};
388 #endif
389 //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)};
390 //SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)};
391
392 //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)};
393 //SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)};
394
395 //SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint:: Make(w, h)};
396 //SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint: :Make(w, 0)};
397
398 SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace, 4));
399 SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix");
400
401
402 pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix;
403 pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix;
404 pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix;
405
406 canvas->setMatrix(pdfContext.fOriginalMatrix);
407
408 #ifndef PDF_DEBUG_NO_PAGE_CLIPING
409 canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true );
410 #endif
411
412 // erase with red before?
413 // SkPaint paint;
414 // paint.setColor(SK_ColorRED);
415 // canvas->drawRect(rect, paint);
416
417 PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas);
418 looper.loop();
419
420 delete tokenizer;
421
422 canvas->flush();
423 }
424
425 SkPdfAllocator* SkNativeParsedPDF::allocator() const {
426 return fAllocator;
427 }
428
429 SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const {
430 return (SkPdfObject*)resolveReference((const SkPdfObject*)ref);
431 }
432
433 // TODO(edisonn): fix infinite loop if ref to itself!
434 // TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolv edReference?
435 SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const {
436 if (ref && ref->isReference()) {
437 int id = ref->referenceId();
438 // TODO(edisonn): generation/updates not supported now
439 //int gen = ref->referenceGeneration();
440
441 SkASSERT(!(id < 0 || id > fObjects.count()));
442
443 if (id < 0 || id > fObjects.count()) {
444 return NULL;
445 }
446
447 // TODO(edisonn): verify id and gen expected
448
449 if (fObjects[id].fResolvedReference != NULL) {
450 return fObjects[id].fResolvedReference;
451 }
452
453 if (fObjects[id].fObj == NULL) {
454 fObjects[id].fObj = readObject(id);
455 }
456
457 if (fObjects[id].fResolvedReference == NULL) {
458 if (!fObjects[id].fObj->isReference()) {
459 fObjects[id].fResolvedReference = fObjects[id].fObj;
460 } else {
461 fObjects[id].fResolvedReference = resolveReference(fObjects[id]. fObj);
462 }
463 }
464
465 return fObjects[id].fResolvedReference;
466 }
467 // TODO(edisonn): fix the mess with const, probably we need to remove it pre tty much everywhere
468 return (SkPdfObject*)ref;
469 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698