Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: experimental/PdfViewer/pdfparser/native/SkNativeParsedPDF.cpp

Issue 21096006: pdfviewer: load files with missing xref (we need in order to help with testing, as most good pdfx i… (Closed) Base URL: http://skia.googlecode.com/svn/trunk/
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 #include "SkNativeParsedPDF.h" 1 #include "SkNativeParsedPDF.h"
2 #include "SkPdfNativeTokenizer.h" 2 #include "SkPdfNativeTokenizer.h"
3 #include "SkPdfBasics.h" 3 #include "SkPdfBasics.h"
4 #include "SkPdfObject.h" 4 #include "SkPdfObject.h"
5 5
6 #include <stdio.h> 6 #include <stdio.h>
7 #include <string.h> 7 #include <string.h>
8 #include <sys/types.h> 8 #include <sys/types.h>
9 #include <sys/stat.h> 9 #include <sys/stat.h>
10 10
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 116
117 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { 117 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
118 // TODO(edisonn): report/issue 118 // TODO(edisonn): report/issue
119 } 119 }
120 120
121 long xrefByteOffset = atol((const char*)xrefByteOffsetLine); 121 long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
122 122
123 bool storeCatalog = true; 123 bool storeCatalog = true;
124 while (xrefByteOffset >= 0) { 124 while (xrefByteOffset >= 0) {
125 const unsigned char* trailerStart = readCrossReferenceSection(fFileConte nt + xrefByteOffset, xrefstartKeywordLine); 125 const unsigned char* trailerStart = readCrossReferenceSection(fFileConte nt + xrefByteOffset, xrefstartKeywordLine);
126 xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCa talog); 126 readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteO ffset, false);
127 storeCatalog = false; 127 storeCatalog = false;
128 } 128 }
129 129
130 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == f RefCatalogGeneration 130 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == f RefCatalogGeneration
131 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed usi ng mapper 131 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed usi ng mapper
132 // load catalog 132 // load catalog
133 133
134 if (fRootCatalogRef) { 134 if (fRootCatalogRef) {
135 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef ); 135 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef );
136 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) { 136 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
137 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); 137 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
138 if (tree && tree->isDictionary() && tree->valid()) { 138 if (tree && tree->isDictionary() && tree->valid()) {
139 fillPages(tree); 139 fillPages(tree);
140 } 140 }
141 } 141 }
142 } 142 }
143 143
144 // TODO(edisonn): clean up this doc, or better, let the caller call again an d build a new doc
145 // caller should be a static function.
146 if (pages() == 0) {
147 loadWithoutXRef();
148 }
149
144 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, t railer, or just reall all objects) 150 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, t railer, or just reall all objects)
145 // 0 pages 151 // 0 pages
146 152
147 // now actually read all objects if we want, or do it lazyly 153 // now actually read all objects if we want, or do it lazyly
148 // and resolve references?... or not ... 154 // and resolve references?... or not ...
149 } 155 }
150 156
157 void SkNativeParsedPDF::loadWithoutXRef() {
158 const unsigned char* current = fFileContent;
159 const unsigned char* end = fFileContent + fContentLength;
160
161 // TODO(edisonn): read pdf version
162 current = ignoreLine(current, end);
163
164 current = skipPdfWhiteSpaces(0, current, end);
165 while (current < end) {
166 SkPdfObject token;
167 current = nextObject(0, current, end, &token, NULL, NULL);
168 if (token.isInteger()) {
169 int id = (int)token.intValue();
170
171 token.reset();
172 current = nextObject(0, current, end, &token, NULL, NULL);
173 // int generation = (int)token.intValue(); // TODO(edisonn): ignore d for now
174
175 token.reset();
176 current = nextObject(0, current, end, &token, NULL, NULL);
177 // TODO(edisonn): must be obj, return error if not? ignore ?
178 if (!token.isKeyword("obj")) {
179 continue;
180 }
181
182 while (fObjects.count() < id + 1) {
183 reset(fObjects.append());
184 }
185
186 fObjects[id].fOffset = current - fFileContent;
187
188 SkPdfObject* obj = fAllocator->allocObject();
189 current = nextObject(0, current, end, obj, fAllocator, this);
190
191 fObjects[id].fResolvedReference = obj;
192 fObjects[id].fObj = obj;
193
194 // set objects
195 } else if (token.isKeyword("trailer")) {
196 long dummy;
197 current = readTrailer(current, end, true, &dummy, true);
198 } else if (token.isKeyword("startxref")) {
199 token.reset();
200 current = nextObject(0, current, end, &token, NULL, NULL); // ignor e
201 }
202
203 current = skipPdfWhiteSpaces(0, current, end);
204 }
205
206 if (fRootCatalogRef) {
207 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef );
208 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
209 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
210 if (tree && tree->isDictionary() && tree->valid()) {
211 fillPages(tree);
212 }
213 }
214 }
215
216 }
217
151 // TODO(edisonn): NYI 218 // TODO(edisonn): NYI
152 SkNativeParsedPDF::~SkNativeParsedPDF() { 219 SkNativeParsedPDF::~SkNativeParsedPDF() {
153 sk_free((void*)fFileContent); 220 sk_free((void*)fFileContent);
154 delete fAllocator; 221 delete fAllocator;
155 } 222 }
156 223
157 const unsigned char* SkNativeParsedPDF::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) { 224 const unsigned char* SkNativeParsedPDF::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) {
158 const unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(e disonn): verify next keyord is "xref", use nextObject here 225 const unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(e disonn): verify next keyord is "xref", use nextObject here
159 226
160 SkPdfObject token; 227 SkPdfObject token;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
201 return current; 268 return current;
202 } 269 }
203 270
204 addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); 271 addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
205 } 272 }
206 } 273 }
207 // TODO(edisonn): it should never get here? there is no trailer? 274 // TODO(edisonn): it should never get here? there is no trailer?
208 return current; 275 return current;
209 } 276 }
210 277
211 long SkNativeParsedPDF::readTrailer(const unsigned char* trailerStart, const uns igned char* trailerEnd, bool storeCatalog) { 278 const unsigned char* SkNativeParsedPDF::readTrailer(const unsigned char* trailer Start, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skip Keyword) {
212 SkPdfObject trailerKeyword; 279 *prev = -1;
213 // TODO(edisonn): use null allocator, and let it just fail if memory
214 // needs allocated (but no crash)!
215 const unsigned char* current =
216 nextObject(0, trailerStart, trailerEnd, &trailerKeyword, NULL, NULL) ;
217 280
218 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenst r() || 281 const unsigned char* current = trailerStart;
219 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { 282 if (!skipKeyword) {
220 // TODO(edisonn): report warning, rebuild trailer from objects. 283 SkPdfObject trailerKeyword;
221 return -1; 284 // TODO(edisonn): use null allocator, and let it just fail if memory
285 // needs allocated (but no crash)!
286 current = nextObject(0, current, trailerEnd, &trailerKeyword, NULL, NULL );
287
288 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.l enstr() ||
289 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
290 // TODO(edisonn): report warning, rebuild trailer from objects.
291 return current;
292 }
222 } 293 }
223 294
224 SkPdfObject token; 295 SkPdfObject token;
225 current = nextObject(0, current, trailerEnd, &token, fAllocator, NULL); 296 current = nextObject(0, current, trailerEnd, &token, fAllocator, NULL);
226 if (!token.isDictionary()) { 297 if (!token.isDictionary()) {
227 return -1; 298 return current;
228 } 299 }
229 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; 300 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
230 if (!trailer->valid()) { 301 if (!trailer->valid()) {
231 return -1; 302 return current;
232 } 303 }
233 304
234 if (storeCatalog) { 305 if (storeCatalog) {
235 const SkPdfObject* ref = trailer->Root(NULL); 306 const SkPdfObject* ref = trailer->Root(NULL);
236 if (ref == NULL || !ref->isReference()) { 307 if (ref == NULL || !ref->isReference()) {
237 // TODO(edisonn): oops, we have to fix the corrup pdf file 308 // TODO(edisonn): oops, we have to fix the corrup pdf file
238 return -1; 309 return current;
239 } 310 }
240 fRootCatalogRef = ref; 311 fRootCatalogRef = ref;
241 } 312 }
242 313
243 if (trailer->has_Prev()) { 314 if (trailer->has_Prev()) {
244 return (long)trailer->Prev(NULL); 315 *prev = (long)trailer->Prev(NULL);
245 } 316 }
246 317
247 return -1; 318 return current;
248 } 319 }
249 320
250 void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { 321 void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
251 // TODO(edisonn): security here 322 // TODO(edisonn): security here
252 while (fObjects.count() < id + 1) { 323 while (fObjects.count() < id + 1) {
253 reset(fObjects.append()); 324 reset(fObjects.append());
254 } 325 }
255 326
256 fObjects[id].fOffset = offset; 327 fObjects[id].fOffset = offset;
257 fObjects[id].fObj = NULL; 328 fObjects[id].fObj = NULL;
329 fObjects[id].fResolvedReference = NULL;
258 } 330 }
259 331
260 SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) { 332 SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) {
261 long startOffset = fObjects[id].fOffset; 333 long startOffset = fObjects[id].fOffset;
262 //long endOffset = fObjects[id].fOffsetEnd; 334 //long endOffset = fObjects[id].fOffsetEnd;
263 // TODO(edisonn): use hinted endOffset 335 // TODO(edisonn): use hinted endOffset
264 // TODO(edisonn): current implementation will result in a lot of memory usag e 336 // TODO(edisonn): current implementation will result in a lot of memory usag e
265 // to decrease memory usage, we wither need to be smart and know where objec ts end, and we will 337 // to decrease memory usage, we wither need to be smart and know where objec ts end, and we will
266 // alocate only the chancks needed, or the tokenizer will not make copies, b ut then it needs to 338 // alocate only the chancks needed, or the tokenizer will not make copies, b ut then it needs to
267 // cache the results so it does not go twice on the same buffer 339 // cache the results so it does not go twice on the same buffer
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after
460 return (SkPdfObject*)ref; 532 return (SkPdfObject*)ref;
461 } 533 }
462 534
463 size_t SkNativeParsedPDF::bytesUsed() const { 535 size_t SkNativeParsedPDF::bytesUsed() const {
464 return fAllocator->bytesUsed() + 536 return fAllocator->bytesUsed() +
465 fContentLength + 537 fContentLength +
466 fObjects.count() * sizeof(PublicObjectEntry) + 538 fObjects.count() * sizeof(PublicObjectEntry) +
467 fPages.count() * sizeof(SkPdfPageObjectDictionary*) + 539 fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
468 sizeof(*this); 540 sizeof(*this);
469 } 541 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698