OLD | NEW |
1 | |
2 #include "SkNativeParsedPDF.h" | 1 #include "SkNativeParsedPDF.h" |
3 | 2 #include "SkPdfNativeTokenizer.h" |
4 SkNativeParsedPDF::SkNativeParsedPDF() { | 3 #include "SkPdfBasics.h" |
5 // TODO(edisonn): Auto-generated constructor stub | 4 #include "SkPdfParser.h" |
6 | 5 #include "SkPdfObject.h" |
7 } | 6 |
8 | 7 #include <stdio.h> |
| 8 #include <string.h> |
| 9 #include <sys/types.h> |
| 10 #include <sys/stat.h> |
| 11 |
| 12 #include "SkPdfFileTrailerDictionary_autogen.h" |
| 13 #include "SkPdfCatalogDictionary_autogen.h" |
| 14 #include "SkPdfPageObjectDictionary_autogen.h" |
| 15 #include "SkPdfPageTreeNodeDictionary_autogen.h" |
| 16 #include "SkPdfMapper_autogen.h" |
| 17 |
| 18 |
| 19 |
| 20 long getFileSize(const char* filename) |
| 21 { |
| 22 struct stat stat_buf; |
| 23 int rc = stat(filename, &stat_buf); |
| 24 return rc == 0 ? stat_buf.st_size : -1; |
| 25 } |
| 26 |
| 27 unsigned char* lineHome(unsigned char* start, unsigned char* current) { |
| 28 while (current > start && !isPdfEOL(*(current - 1))) { |
| 29 current--; |
| 30 } |
| 31 return current; |
| 32 } |
| 33 |
| 34 unsigned char* previousLineHome(unsigned char* start, unsigned char* current) { |
| 35 if (current > start && isPdfEOL(*(current - 1))) { |
| 36 current--; |
| 37 } |
| 38 |
| 39 // allows CR+LF, LF+CR but not two CR+CR or LF+LF |
| 40 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1
)) { |
| 41 current--; |
| 42 } |
| 43 |
| 44 while (current > start && !isPdfEOL(*(current - 1))) { |
| 45 current--; |
| 46 } |
| 47 |
| 48 return current; |
| 49 } |
| 50 |
| 51 unsigned char* ignoreLine(unsigned char* current, unsigned char* end) { |
| 52 while (current < end && !isPdfEOL(*current)) { |
| 53 current++; |
| 54 } |
| 55 current++; |
| 56 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) { |
| 57 current++; |
| 58 } |
| 59 return current; |
| 60 } |
| 61 |
| 62 |
| 63 // TODO(edisonn): NYI |
| 64 // TODO(edisonn): 3 constructuctors from URL, from stream, from file ... |
| 65 // TODO(edisonn): write one that accepts errors in the file and ignores/fixis th
em |
| 66 // TODO(edisonn): testing: |
| 67 // 1) run on a lot of file |
| 68 // 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, u
se other white spaces, insert comments randomly, ... |
| 69 // 3) irrecoverable corrupt file |
| 70 SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAll
ocator()) { |
| 71 FILE* file = fopen(path, "r"); |
| 72 fContentLength = getFileSize(path); |
| 73 fFileContent = new unsigned char[fContentLength]; |
| 74 fread(fFileContent, fContentLength, 1, file); |
| 75 fclose(file); |
| 76 file = NULL; |
| 77 |
| 78 unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLengt
h - 1); |
| 79 unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine); |
| 80 unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByt
eOffsetLine); |
| 81 |
| 82 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { |
| 83 // TODO(edisonn): report/issue |
| 84 } |
| 85 |
| 86 long xrefByteOffset = atol((const char*)xrefByteOffsetLine); |
| 87 |
| 88 bool storeCatalog = true; |
| 89 while (xrefByteOffset >= 0) { |
| 90 unsigned char* trailerStart = readCrossReferenceSection(fFileContent + x
refByteOffset, xrefstartKeywordLine); |
| 91 xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCa
talog); |
| 92 storeCatalog = false; |
| 93 } |
| 94 |
| 95 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == f
RefCatalogGeneration |
| 96 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed usi
ng mapper |
| 97 // load catalog |
| 98 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); |
| 99 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); |
| 100 |
| 101 fillPages(tree); |
| 102 |
| 103 // now actually read all objects if we want, or do it lazyly |
| 104 // and resolve references?... or not ... |
| 105 } |
| 106 |
| 107 // TODO(edisonn): NYI |
9 SkNativeParsedPDF::~SkNativeParsedPDF() { | 108 SkNativeParsedPDF::~SkNativeParsedPDF() { |
10 // TODO(edisonn): Auto-generated destructor stub | 109 delete[] fFileContent; |
11 } | 110 delete fAllocator; |
12 | 111 } |
| 112 |
| 113 unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefS
tart, unsigned char* trailerEnd) { |
| 114 unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn
): verify next keyord is "xref", use nextObject here |
| 115 |
| 116 SkPdfObject token; |
| 117 while (current < trailerEnd) { |
| 118 token.reset(); |
| 119 unsigned char* previous = current; |
| 120 current = nextObject(current, trailerEnd, &token, NULL); |
| 121 if (!token.isInteger()) { |
| 122 return previous; |
| 123 } |
| 124 |
| 125 int startId = token.intValue(); |
| 126 token.reset(); |
| 127 current = nextObject(current, trailerEnd, &token, NULL); |
| 128 |
| 129 if (!token.isInteger()) { |
| 130 // TODO(edisonn): report/warning |
| 131 return current; |
| 132 } |
| 133 |
| 134 int entries = token.intValue(); |
| 135 |
| 136 for (int i = 0; i < entries; i++) { |
| 137 token.reset(); |
| 138 current = nextObject(current, trailerEnd, &token, NULL); |
| 139 if (!token.isInteger()) { |
| 140 // TODO(edisonn): report/warning |
| 141 return current; |
| 142 } |
| 143 int offset = token.intValue(); |
| 144 |
| 145 token.reset(); |
| 146 current = nextObject(current, trailerEnd, &token, NULL); |
| 147 if (!token.isInteger()) { |
| 148 // TODO(edisonn): report/warning |
| 149 return current; |
| 150 } |
| 151 int generation = token.intValue(); |
| 152 |
| 153 token.reset(); |
| 154 current = nextObject(current, trailerEnd, &token, NULL); |
| 155 if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f'
&& *token.c_str() != 'n')) { |
| 156 // TODO(edisonn): report/warning |
| 157 return current; |
| 158 } |
| 159 |
| 160 addCrossSectionInfo(startId + i, generation, offset, *token.c_str()
== 'f'); |
| 161 } |
| 162 } |
| 163 // TODO(edisonn): it should never get here? there is no trailer? |
| 164 return current; |
| 165 } |
| 166 |
| 167 long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char*
trailerEnd, bool storeCatalog) { |
| 168 unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edis
onn): verify next keyord is "trailer" use nextObject here |
| 169 |
| 170 SkPdfObject token; |
| 171 current = nextObject(current, trailerEnd, &token, fAllocator); |
| 172 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; |
| 173 |
| 174 if (storeCatalog) { |
| 175 const SkPdfObject* ref = trailer->Root(NULL); |
| 176 if (ref == NULL || !ref->isReference()) { |
| 177 // TODO(edisonn): oops, we have to fix the corrup pdf file |
| 178 return -1; |
| 179 } |
| 180 fRootCatalogRef = ref; |
| 181 } |
| 182 |
| 183 if (trailer->has_Prev()) { |
| 184 return trailer->Prev(NULL); |
| 185 } |
| 186 |
| 187 return -1; |
| 188 } |
| 189 |
| 190 void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset,
bool isFreed) { |
| 191 // TODO(edisonn): security here |
| 192 while (fObjects.count() < id + 1) { |
| 193 reset(fObjects.append()); |
| 194 } |
| 195 |
| 196 fObjects[id].fOffset = offset; |
| 197 fObjects[id].fObj = NULL; |
| 198 } |
| 199 |
| 200 SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) c
onst { |
| 201 long startOffset = fObjects[id].fOffset; |
| 202 //long endOffset = fObjects[id].fOffsetEnd; |
| 203 // TODO(edisonn): use hinted endOffset |
| 204 // TODO(edisonn): current implementation will result in a lot of memory usag
e |
| 205 // to decrease memory usage, we wither need to be smart and know where objec
ts end, and we will |
| 206 // alocate only the chancks needed, or the tokenizer will not make copies, b
ut then it needs to |
| 207 // cache the results so it does not go twice on the same buffer |
| 208 unsigned char* current = fFileContent + startOffset; |
| 209 unsigned char* end = fFileContent + fContentLength; |
| 210 |
| 211 SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator); |
| 212 |
| 213 SkPdfObject idObj; |
| 214 SkPdfObject generationObj; |
| 215 SkPdfObject objKeyword; |
| 216 SkPdfObject* dict = fAllocator->allocObject(); |
| 217 |
| 218 current = nextObject(current, end, &idObj, NULL); |
| 219 if (current >= end) { |
| 220 // TODO(edisonn): report warning/error |
| 221 return NULL; |
| 222 } |
| 223 |
| 224 current = nextObject(current, end, &generationObj, NULL); |
| 225 if (current >= end) { |
| 226 // TODO(edisonn): report warning/error |
| 227 return NULL; |
| 228 } |
| 229 |
| 230 current = nextObject(current, end, &objKeyword, NULL); |
| 231 if (current >= end) { |
| 232 // TODO(edisonn): report warning/error |
| 233 return NULL; |
| 234 } |
| 235 |
| 236 if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue
()/* || generation != generationObj.intValue()*/) { |
| 237 // TODO(edisonn): report warning/error |
| 238 } |
| 239 |
| 240 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { |
| 241 // TODO(edisonn): report warning/error |
| 242 } |
| 243 |
| 244 current = nextObject(current, end, dict, fAllocator); |
| 245 |
| 246 // TODO(edisonn): report warning/error - verify last token is endobj |
| 247 |
| 248 return dict; |
| 249 } |
| 250 |
| 251 void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) { |
| 252 const SkPdfArray* kids = tree->Kids(this); |
| 253 if (kids == NULL) { |
| 254 *fPages.append() = (SkPdfPageObjectDictionary*)tree; |
| 255 return; |
| 256 } |
| 257 |
| 258 int cnt = kids->size(); |
| 259 for (int i = 0; i < cnt; i++) { |
| 260 const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i)); |
| 261 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdf
ObjectType) { |
| 262 *fPages.append() = (SkPdfPageObjectDictionary*)obj; |
| 263 } else { |
| 264 // TODO(edisonn): verify that it is a page tree indeed |
| 265 fillPages((SkPdfPageTreeNodeDictionary*)obj); |
| 266 } |
| 267 } |
| 268 } |
| 269 |
| 270 int SkNativeParsedPDF::pages() const { |
| 271 return fPages.count(); |
| 272 } |
| 273 |
| 274 SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) { |
| 275 return fPages[page]->Resources(this); |
| 276 } |
| 277 |
| 278 // TODO(edisonn): Partial implemented. Move the logics directly in the code gene
rator for inheritable and default value? |
| 279 SkRect SkNativeParsedPDF::MediaBox(int page) const { |
| 280 SkPdfPageObjectDictionary* current = fPages[page]; |
| 281 while (!current->has_MediaBox() && current->has_Parent()) { |
| 282 current = (SkPdfPageObjectDictionary*)current->Parent(this); |
| 283 } |
| 284 if (current) { |
| 285 return current->MediaBox(this); |
| 286 } |
| 287 return SkRect::MakeEmpty(); |
| 288 } |
| 289 |
| 290 // TODO(edisonn): stream or array ... ? for now only array |
| 291 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const { |
| 292 if (fPages[page]->isContentsAStream(this)) { |
| 293 return tokenizerOfStream(fPages[page]->getContentsAsStream(this)); |
| 294 } else { |
| 295 // TODO(edisonn): NYI, we need to concatenate all streams in the array o
r make the tokenizer smart |
| 296 // so we don't allocate new memory |
| 297 return NULL; |
| 298 } |
| 299 } |
| 300 |
| 301 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream)
const { |
| 302 if (stream == NULL) { |
| 303 return NULL; |
| 304 } |
| 305 |
| 306 return new SkPdfNativeTokenizer(stream, fMapper, fAllocator); |
| 307 } |
| 308 |
| 309 // TODO(edisonn): NYI |
| 310 SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer
, size_t len) const { |
| 311 // warning does not track two calls in the same buffer! the buffer is update
d! |
| 312 // make a clean copy if needed! |
| 313 return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator); |
| 314 } |
| 315 |
| 316 size_t SkNativeParsedPDF::objects() const { |
| 317 return fObjects.count(); |
| 318 } |
| 319 |
| 320 SkPdfObject* SkNativeParsedPDF::object(int i) { |
| 321 SkASSERT(!(i < 0 || i > fObjects.count())); |
| 322 |
| 323 if (i < 0 || i > fObjects.count()) { |
| 324 return NULL; |
| 325 } |
| 326 |
| 327 if (fObjects[i].fObj == NULL) { |
| 328 // TODO(edisonn): when we read the cross reference sections, store the s
tart of the next object |
| 329 // and fill fOffsetEnd |
| 330 fObjects[i].fObj = readObject(i); |
| 331 } |
| 332 |
| 333 return fObjects[i].fObj; |
| 334 } |
| 335 |
| 336 const SkPdfMapper* SkNativeParsedPDF::mapper() const { |
| 337 return fMapper; |
| 338 } |
| 339 |
| 340 SkPdfReal* SkNativeParsedPDF::createReal(double value) const { |
| 341 SkPdfObject* obj = fAllocator->allocObject(); |
| 342 SkPdfObject::makeReal(value, obj); |
| 343 return (SkPdfReal*)obj; |
| 344 } |
| 345 |
| 346 SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const { |
| 347 SkPdfObject* obj = fAllocator->allocObject(); |
| 348 SkPdfObject::makeInteger(value, obj); |
| 349 return (SkPdfInteger*)obj; |
| 350 } |
| 351 |
| 352 SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) cons
t { |
| 353 SkPdfObject* obj = fAllocator->allocObject(); |
| 354 SkPdfObject::makeString(sz, len, obj); |
| 355 return (SkPdfString*)obj; |
| 356 } |
| 357 |
| 358 PdfContext* gPdfContext = NULL; |
| 359 |
| 360 void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) { |
| 361 SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page); |
| 362 |
| 363 PdfContext pdfContext(this); |
| 364 pdfContext.fOriginalMatrix = SkMatrix::I(); |
| 365 pdfContext.fGraphicsState.fResources = pageResources(page); |
| 366 |
| 367 gPdfContext = &pdfContext; |
| 368 |
| 369 // TODO(edisonn): get matrix stuff right. |
| 370 // TODO(edisonn): add DPI/scale/zoom. |
| 371 SkScalar z = SkIntToScalar(0); |
| 372 SkRect rect = MediaBox(page); |
| 373 SkScalar w = rect.width(); |
| 374 SkScalar h = rect.height(); |
| 375 |
| 376 SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Ma
ke(w, h), SkPoint::Make(z, h)}; |
| 377 // SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w,
h), SkPoint::Make(w, z), SkPoint::Make(z, z)}; |
| 378 |
| 379 // TODO(edisonn): add flag for this app to create sourunding buffer zone |
| 380 // TODO(edisonn): add flagg for no clipping. |
| 381 // Use larger image to make sure we do not draw anything outside of page |
| 382 // could be used in tests. |
| 383 |
| 384 #ifdef PDF_DEBUG_3X |
| 385 SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), Sk
Point::Make(w+w, h+z), SkPoint::Make(w+z, h+z)}; |
| 386 #else |
| 387 SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::M
ake(w, z), SkPoint::Make(z, z)}; |
| 388 #endif |
| 389 //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)}; |
| 390 //SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)}; |
| 391 |
| 392 //SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)}; |
| 393 //SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)}; |
| 394 |
| 395 //SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint::
Make(w, h)}; |
| 396 //SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint:
:Make(w, 0)}; |
| 397 |
| 398 SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace,
4)); |
| 399 SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix"); |
| 400 |
| 401 |
| 402 pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix; |
| 403 pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix; |
| 404 pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix; |
| 405 |
| 406 canvas->setMatrix(pdfContext.fOriginalMatrix); |
| 407 |
| 408 #ifndef PDF_DEBUG_NO_PAGE_CLIPING |
| 409 canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true
); |
| 410 #endif |
| 411 |
| 412 // erase with red before? |
| 413 // SkPaint paint; |
| 414 // paint.setColor(SK_ColorRED); |
| 415 // canvas->drawRect(rect, paint); |
| 416 |
| 417 PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas); |
| 418 looper.loop(); |
| 419 |
| 420 delete tokenizer; |
| 421 |
| 422 canvas->flush(); |
| 423 } |
| 424 |
| 425 SkPdfAllocator* SkNativeParsedPDF::allocator() const { |
| 426 return fAllocator; |
| 427 } |
| 428 |
| 429 SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const { |
| 430 return (SkPdfObject*)resolveReference((const SkPdfObject*)ref); |
| 431 } |
| 432 |
| 433 // TODO(edisonn): fix infinite loop if ref to itself! |
| 434 // TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolv
edReference? |
| 435 SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const { |
| 436 if (ref && ref->isReference()) { |
| 437 int id = ref->referenceId(); |
| 438 // TODO(edisonn): generation/updates not supported now |
| 439 //int gen = ref->referenceGeneration(); |
| 440 |
| 441 SkASSERT(!(id < 0 || id > fObjects.count())); |
| 442 |
| 443 if (id < 0 || id > fObjects.count()) { |
| 444 return NULL; |
| 445 } |
| 446 |
| 447 // TODO(edisonn): verify id and gen expected |
| 448 |
| 449 if (fObjects[id].fResolvedReference != NULL) { |
| 450 return fObjects[id].fResolvedReference; |
| 451 } |
| 452 |
| 453 if (fObjects[id].fObj == NULL) { |
| 454 fObjects[id].fObj = readObject(id); |
| 455 } |
| 456 |
| 457 if (fObjects[id].fResolvedReference == NULL) { |
| 458 if (!fObjects[id].fObj->isReference()) { |
| 459 fObjects[id].fResolvedReference = fObjects[id].fObj; |
| 460 } else { |
| 461 fObjects[id].fResolvedReference = resolveReference(fObjects[id].
fObj); |
| 462 } |
| 463 } |
| 464 |
| 465 return fObjects[id].fResolvedReference; |
| 466 } |
| 467 // TODO(edisonn): fix the mess with const, probably we need to remove it pre
tty much everywhere |
| 468 return (SkPdfObject*)ref; |
| 469 } |
OLD | NEW |