experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - Issue 1266093003: Remove experimental/PdfViewer

Side by Side Diff: experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp

Issue 1266093003: Remove experimental/PdfViewer (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: 2015-08-03 (Monday) 10:43:56 EDT Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h ('k') | experimental/PdfViewer/pdfparser/native/pdfapi/SkPdfALinkAnnotationDictionary_autogen.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 /*

2 * Copyright 2013 Google Inc.

3 *

4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.

6 */

7

8 #include "SkPdfConfig.h"

9 #include "SkPdfDiffEncoder.h"

10 #include "SkPdfNativeObject.h"

11 #include "SkPdfNativeTokenizer.h"

12 #include "SkPdfUtils.h"

13

14 // TODO(edisonn): mac builder does not find the header ... but from headers is o k

15 //#include "SkPdfStreamCommonDictionary_autogen.h"

16 //#include "SkPdfImageDictionary_autogen.h"

17 #include "SkPdfHeaders_autogen.h"

18

19

20 // TODO(edisonn): Perf, Make this function run faster.

21 // There could be 0s between start and end.

22 // needle will not contain 0s.

23 static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {

24 size_t needleLen = strlen(needle);

25 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needl eLen == hayEnd)) &&

26 strncmp(hayStart, needle, needleLen) == 0) {

27 return hayStart;

28 }

29

30 hayStart++;

31

32 while (hayStart < hayEnd) {

33 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&

34 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\|

35 (hayStart+needleLen == hayEnd)) &&

36 strncmp(hayStart, needle, needleLen) == 0) {

37 return hayStart;

38 }

39 hayStart++;

40 }

41 return NULL;

42 }

43

44 const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsign ed char* end) {

45 while (start < end && (isPdfWhiteSpace(start) \|\| start == kComment_PdfDeli miter)) {

46 TRACE_COMMENT(*start);

47 if (*start == kComment_PdfDelimiter) {

48 // skip the comment until end of line

49 while (start < end && !isPdfEOL(*start)) {

50 start++;

51 TRACE_COMMENT(*start);

52 }

53 } else {

54 start++;

55 }

56 }

57 return start;

58 }

59

60 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned ch ar* end) {

61 SkASSERT(!isPdfWhiteSpace(*start));

62

63 if (start < end && isPdfDelimiter(*start)) {

64 TRACE_TK(*start);

65 start++;

66 return start;

67 }

68

69 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {

70 TRACE_TK(*start);

71 start++;

72 }

73 return start;

74 }

75

76 // The parsing should end with a ].

77 static const unsigned char* readArray(const unsigned char* start, const unsigned char* end,

78 SkPdfNativeObject* array,

79 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {

80 SkPdfNativeObject::makeEmptyArray(array);

81 // PUT_TRACK_STREAM(array, start, start)

82

83 if (allocator == NULL) {

84 // TODO(edisonn): report/warning error/assert

85 return end;

86 }

87

88 while (start < end) {

89 // skip white spaces

90 start = skipPdfWhiteSpaces(start, end);

91

92 const unsigned char* endOfToken = endOfPdfToken(start, end);

93

94 if (endOfToken == start) {

95 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray

96 return start;

97 }

98

99 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimit er) {

100 return endOfToken;

101 }

102

103 SkPdfNativeObject* newObj = allocator->allocObject();

104 start = nextObject(start, end, newObj, allocator, doc);

105 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array

106 // only when we are sure they are not references!

107 if (newObj->isKeywordReference() && array->size() >= 2 &&

108 array->objAtAIndex(SkToInt(array->size() - 1))->isInteger() &&

109 array->objAtAIndex(SkToInt(array->size() - 2))->isInteger()) {

110 SkPdfNativeObject* gen = array->removeLastInArray();

111 SkPdfNativeObject* id = array->removeLastInArray();

112

113 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue( ),

114 (unsigned int)gen->intValue (), newObj);

115 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now

116 }

117 array->appendInArray(newObj);

118 }

119 // TODO(edisonn): report not reached, we should never get here

120 // TODO(edisonn): there might be a bug here, enable an assert and run it on files

121 // or it might be that the files were actually corrupted

122 return start;

123 }

124

125 static const unsigned char* readString(const unsigned char* start, const unsigne d char* end,

126 unsigned char* out) {

127 const unsigned char* in = start;

128 bool hasOut = (out != NULL);

129

130 int openRoundBrackets = 1;

131 while (in < end) {

132 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);

133 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);

134 if (openRoundBrackets == 0) {

135 in++; // consumed )

136 break;

137 }

138

139 if (*in == kEscape_PdfSpecial) {

140 if (in + 1 < end) {

141 switch (in[1]) {

142 case 'n':

143 if (hasOut) { *out = kLF_PdfWhiteSpace; }

144 out++;

145 in += 2;

146 break;

147

148 case 'r':

149 if (hasOut) { *out = kCR_PdfWhiteSpace; }

150 out++;

151 in += 2;

152 break;

153

154 case 't':

155 if (hasOut) { *out = kHT_PdfWhiteSpace; }

156 out++;

157 in += 2;

158 break;

159

160 case 'b':

161 // TODO(edisonn): any special meaning to backspace?

162 if (hasOut) { *out = kBackspace_PdfSpecial; }

163 out++;

164 in += 2;

165 break;

166

167 case 'f':

168 if (hasOut) { *out = kFF_PdfWhiteSpace; }

169 out++;

170 in += 2;

171 break;

172

173 case kOpenedRoundBracket_PdfDelimiter:

174 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }

175 out++;

176 in += 2;

177 break;

178

179 case kClosedRoundBracket_PdfDelimiter:

180 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }

181 out++;

182 in += 2;

183 break;

184

185 case kEscape_PdfSpecial:

186 if (hasOut) { *out = kEscape_PdfSpecial; }

187 out++;

188 in += 2;

189 break;

190

191 case '0':

192 case '1':

193 case '2':

194 case '3':

195 case '4':

196 case '5':

197 case '6':

198 case '7': {

199 //read octals

200 in++; // consume backslash

201

202 int code = 0;

203 int i = 0;

204 while (in < end && in >= '0' && in < '8') {

205 code = (code << 3) + ((in) - '0'); // code 8 + d

206 i++;

207 in++;

208 if (i == 3) {

209 if (hasOut) { *out = code & 0xff; }

210 out++;

211 i = 0;

212 }

213 }

214 if (i > 0) {

215 if (hasOut) { *out = code & 0xff; }

216 out++;

217 }

218 }

219 break;

220

221 default:

222 // Per spec, backslash is ignored if escaped ch is unkno wn

223 in++;

224 break;

225 }

226 } else {

227 in++;

228 }

229 } else {

230 if (hasOut) { out = in; }

231 in++;

232 out++;

233 }

234 }

235

236 if (hasOut) {

237 return in; // consumed already ) at the end of the string

238 } else {

239 // return where the string would end if we reuse the string

240 return start + (out - (const unsigned char*)NULL);

241 }

242 }

243

244 static size_t readStringLength(const unsigned char* start, const unsigned char* end) {

245 return readString(start, end, NULL) - start;

246 }

247

248 static const unsigned char* readString(const unsigned char* start, const unsigne d char* end,

249 SkPdfNativeObject* str, SkPdfAllocator* a llocator) {

250 if (!allocator) {

251 // TODO(edisonn): report error/warn/assert

252 return end;

253 }

254

255 size_t outLength = readStringLength(start, end);

256 unsigned char* out = (unsigned char*)allocator->alloc(outLength);

257 const unsigned char* now = readString(start, end, out);

258 SkPdfNativeObject::makeString(out, out + outLength, str);

259 // PUT_TRACK_STREAM(str, start, now)

260 TRACE_STRING(out, out + outLength);

261 return now; // consumed already ) at the end of the string

262 }

263

264 static const unsigned char* readHexString(const unsigned char* start, const unsi gned char* end,

265 unsigned char* out) {

266 bool hasOut = (out != NULL);

267 const unsigned char* in = start;

268

269 unsigned char code = 0;

270

271 while (in < end) {

272 while (in < end && isPdfWhiteSpace(*in)) {

273 in++;

274 }

275

276 if (*in == kClosedInequityBracket_PdfDelimiter) {

277 in++; // consume >

278 // normal exit

279 break;

280 }

281

282 if (in >= end) {

283 // end too soon

284 break;

285 }

286

287 switch (*in) {

288 case '0':

289 case '1':

290 case '2':

291 case '3':

292 case '4':

293 case '5':

294 case '6':

295 case '7':

296 case '8':

297 case '9':

298 code = (*in - '0') << 4;

299 break;

300

301 case 'a':

302 case 'b':

303 case 'c':

304 case 'd':

305 case 'e':

306 case 'f':

307 code = (*in - 'a' + 10) << 4;

308 break;

309

310 case 'A':

311 case 'B':

312 case 'C':

313 case 'D':

314 case 'E':

315 case 'F':

316 code = (*in - 'A' + 10) << 4;

317 break;

318

319 // TODO(edisonn): spec does not say how to handle this error

320 default:

321 break;

322 }

323

324 in++; // advance

325

326 while (in < end && isPdfWhiteSpace(*in)) {

327 in++;

328 }

329

330 // TODO(edisonn): report error

331 if (in >= end) {

332 if (hasOut) { *out = code; }

333 out++;

334 break;

335 }

336

337 if (*in == kClosedInequityBracket_PdfDelimiter) {

338 if (hasOut) { *out = code; }

339 out++;

340 in++;

341 break;

342 }

343

344 switch (*in) {

345 case '0':

346 case '1':

347 case '2':

348 case '3':

349 case '4':

350 case '5':

351 case '6':

352 case '7':

353 case '8':

354 case '9':

355 code += (*in - '0');

356 break;

357

358 case 'a':

359 case 'b':

360 case 'c':

361 case 'd':

362 case 'e':

363 case 'f':

364 code += (*in - 'a' + 10);

365 break;

366

367 case 'A':

368 case 'B':

369 case 'C':

370 case 'D':

371 case 'E':

372 case 'F':

373 code += (*in - 'A' + 10);

374 break;

375

376 // TODO(edisonn): spec does not say how to handle this error

377 default:

378 break;

379 }

380

381 if (hasOut) { *out = code; }

382 out++;

383 in++;

384 }

385

386 if (hasOut) {

387 return in; // consumed already ) at the end of the string

388 } else {

389 // return where the string would end if we reuse the string

390 return start + (out - (const unsigned char*)NULL);

391 }

392 }

393

394 static size_t readHexStringLength(const unsigned char* start, const unsigned cha r* end) {

395 return readHexString(start, end, NULL) - start;

396 }

397

398 static const unsigned char* readHexString(const unsigned char* start, const unsi gned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {

399 if (!allocator) {

400 // TODO(edisonn): report error/warn/assert

401 return end;

402 }

403 size_t outLength = readHexStringLength(start, end);

404 unsigned char* out = (unsigned char*)allocator->alloc(outLength);

405 const unsigned char* now = readHexString(start, end, out);

406 SkPdfNativeObject::makeHexString(out, out + outLength, str);

407 // str PUT_TRACK_STREAM(start, now)

408 TRACE_HEXSTRING(out, out + outLength);

409 return now; // consumed already > at the end of the string

410 }

411

412 // TODO(edisonn): add version parameter, before PDF 1.2 name could not have spec ial characters.

413 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,

414 unsigned char* out) {

415 bool hasOut = (out != NULL);

416 const unsigned char* in = start;

417

418 unsigned char code = 0;

419

420 while (in < end) {

421 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {

422 break;

423 }

424

425 if (*in == '#' && in + 2 < end) {

426 in++;

427 switch (*in) {

428 case '0':

429 case '1':

430 case '2':

431 case '3':

432 case '4':

433 case '5':

434 case '6':

435 case '7':

436 case '8':

437 case '9':

438 code = (*in - '0') << 4;

439 break;

440

441 case 'a':

442 case 'b':

443 case 'c':

444 case 'd':

445 case 'e':

446 case 'f':

447 code = (*in - 'a' + 10) << 4;

448 break;

449

450 case 'A':

451 case 'B':

452 case 'C':

453 case 'D':

454 case 'E':

455 case 'F':

456 code = (*in - 'A' + 10) << 4;

457 break;

458

459 // TODO(edisonn): spec does not say how to handle this error

460 default:

461 break;

462 }

463

464 in++; // advance

465

466 switch (*in) {

467 case '0':

468 case '1':

469 case '2':

470 case '3':

471 case '4':

472 case '5':

473 case '6':

474 case '7':

475 case '8':

476 case '9':

477 code += (*in - '0');

478 break;

479

480 case 'a':

481 case 'b':

482 case 'c':

483 case 'd':

484 case 'e':

485 case 'f':

486 code += (*in - 'a' + 10);

487 break;

488

489 case 'A':

490 case 'B':

491 case 'C':

492 case 'D':

493 case 'E':

494 case 'F':

495 code += (*in - 'A' + 10);

496 break;

497

498 // TODO(edisonn): spec does not say how to handle this error

499 default:

500 break;

501 }

502

503 if (hasOut) { *out = code; }

504 out++;

505 in++;

506 } else {

507 if (hasOut) { out = in; }

508 out++;

509 in++;

510 }

511 }

512

513 if (hasOut) {

514 return in; // consumed already ) at the end of the string

515 } else {

516 // return where the string would end if we reuse the string

517 return start + (out - (const unsigned char*)NULL);

518 }

519 }

520

521 static size_t readNameLength(const unsigned char* start, const unsigned char* en d) {

522 return readName(start, end, NULL) - start;

523 }

524

525 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,

526 SkPdfNativeObject* name, SkPdfAllocator* al locator) {

527 if (!allocator) {

528 // TODO(edisonn): report error/warn/assert

529 return end;

530 }

531 size_t outLength = readNameLength(start, end);

532 unsigned char* out = (unsigned char*)allocator->alloc(outLength);

533 const unsigned char* now = readName(start, end, out);

534 SkPdfNativeObject::makeName(out, out + outLength, name);

535 //PUT_TRACK_STREAM(start, now)

536 TRACE_NAME(out, out + outLength);

537 return now;

538 }

539

540 // TODO(edisonn): pdf spec let Length to be an indirect object define after the stream

541 // that makes for an interesting scenario, where the stream itself contains ends tream, together

542 // with a reference object with the length, but the real length object would be somewhere else

543 // it could confuse the parser

544 /*example:

545

546 7 0 obj

547 << /length 8 0 R>>

548 stream

549 ...............

550 endstream

551 8 0 obj #we are in stream actually, not a real object

552 << 10 >> #we are in stream actually, not a real object

553 endobj

554 endstream

555 8 0 obj #real obj

556 << 100 >> #real obj

557 endobj

558 and it could get worse, with multiple object like this

559 */

560

561 // right now implement the silly algorithm that assumes endstream is finishing t he stream

562

563 static const unsigned char* readStream(const unsigned char* start, const unsigne d char* end,

564 SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {

565 start = skipPdfWhiteSpaces(start, end);

566 if (!( start[0] == 's' &&

567 start[1] == 't' &&

568 start[2] == 'r' &&

569 start[3] == 'e' &&

570 start[4] == 'a' &&

571 start[5] == 'm')) {

572 // no stream. return.

573 return start;

574 }

575

576 start += 6; // strlen("stream")

577 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {

578 start += 2;

579 } else if (start[0] == kLF_PdfWhiteSpace) {

580 start += 1;

581 } else if (isPdfWhiteSpace(start[0])) {

582 start += 1;

583 } else {

584 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?

585 }

586

587 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;

588 // TODO(edisonn): load Length

589 int64_t length = -1;

590

591 // TODO(edisonn): very basic implementation

592 if (stream->has_Length() && stream->Length(doc) > 0) {

593 length = stream->Length(doc);

594 }

595

596 // TODO(edisonn): load external streams

597 // TODO(edisonn): look at the last filter, to determine how to deal with pos sible parsing

598 // issues. The last filter can have special rules to terminate a stream, whi ch we could

599 // use to determine end of stream.

600

601 if (length >= 0) {

602 const unsigned char* endstream = start + length;

603

604 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpa ce) {

605 endstream += 2;

606 } else if (endstream[0] == kLF_PdfWhiteSpace) {

607 endstream += 1;

608 }

609

610 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {

611 length = -1;

612 }

613 }

614

615 if (length < 0) {

616 // scan the buffer, until we find first endstream

617 // TODO(edisonn): all buffers must have a 0 at the end now,

618 const unsigned char* endstream = (const unsigned char)strrstrk((char)s tart, (char*)end,

619 "endstre am");

620

621 if (endstream) {

622 length = endstream - start;

623 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;

624 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;

625 }

626 }

627 if (length >= 0) {

628 const unsigned char* endstream = start + length;

629

630 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpa ce) {

631 endstream += 2;

632 } else if (endstream[0] == kLF_PdfWhiteSpace) {

633 endstream += 1;

634 }

635

636 // TODO(edisonn): verify the next bytes are "endstream"

637

638 endstream += strlen("endstream");

639 // TODO(edisonn): Assert? report error/warning?

640 dict->addStream(start, (size_t)length);

641 return endstream;

642 }

643 return start;

644 }

645

646 static const unsigned char* readInlineImageStream(const unsigned char* start,

647 const unsigned char* end,

648 SkPdfImageDictionary* inlineIm age,

649 SkPdfNativeDoc* doc) {

650 // We already processed ID keyword, and we should be positioned immediately after it

651

652 // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end,

653 // instead of this if.

654 //if (end - start <= 2) {

655 // // TODO(edisonn): warning?

656 // return end; // but can we have a pixel image encoded in 1-2 bytes?

657 //}

658

659 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {

660 start += 2;

661 } else if (start[0] == kLF_PdfWhiteSpace) {

662 start += 1;

663 } else if (isPdfWhiteSpace(start[0])) {

664 start += 1;

665 } else {

666 SkASSERT(isPdfDelimiter(start[0]));

667 // TODO(edisonn): warning?

668 }

669

670 const unsigned char* endstream = (const unsigned char)strrstrk((char)start , (char*)end, "EI");

671 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strl en("EI")

672

673 if (endstream) {

674 size_t length = endstream - start;

675 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;

676 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;

677 inlineImage->addStream(start, (size_t)length);

678 } else {

679 // TODO(edisonn): report error in inline image stream (ID-EI) section

680 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly

681 return end;

682 }

683 return endEI;

684 }

685

686 static const unsigned char* readDictionary(const unsigned char* start, const uns igned char* end,

687 SkPdfNativeObject* dict,

688 SkPdfAllocator* allocator, SkPdfNativ eDoc* doc) {

689 if (allocator == NULL) {

690 // TODO(edisonn): report/warning error

691 return end;

692 }

693 SkPdfNativeObject::makeEmptyDictionary(dict);

694 // PUT_TRACK_STREAM(dict, start, start)

695

696 start = skipPdfWhiteSpaces(start, end);

697 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set.

698

699 while (start < end && *start == kNamed_PdfDelimiter) {

700 SkPdfNativeObject key;

701 //*start = '\0';

702 start++;

703 start = readName(start, end, &key, &tmpStorage);

704 start = skipPdfWhiteSpaces(start, end);

705

706 if (start < end) {

707 SkPdfNativeObject* value = allocator->allocObject();

708 start = nextObject(start, end, value, allocator, doc);

709

710 start = skipPdfWhiteSpaces(start, end);

711

712 if (start < end) {

713 // We should have an indirect reference

714 if (isPdfDigit(*start)) {

715 SkPdfNativeObject generation;

716 start = nextObject(start, end, &generation, allocator, doc);

717

718 SkPdfNativeObject keywordR;

719 start = nextObject(start, end, &keywordR, allocator, doc);

720

721 if (value->isInteger() && generation.isInteger() &&

722 keywordR.isKeywordReference()) {

723 int64_t id = value->intValue();

724 SkPdfNativeObject::resetAndMakeReference(

725 (unsigned int)id,

726 (unsigned int)generation.intValue(),

727 value);

728 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation)

729 dict->set(&key, value);

730 } else {

731 // TODO(edisonn) error?, ignore it for now.

732 dict->set(&key, value);

733 }

734 } else {

735 // next elem is not a digit, but it might not be / either!

736 dict->set(&key, value);

737 }

738 } else {

739 // /key >>

740 dict->set(&key, value);

741 return end;

742 }

743 start = skipPdfWhiteSpaces(start, end);

744 } else {

745 dict->set(&key, &SkPdfNativeObject::kNull);

746 return end;

747 }

748 }

749

750 // now we should expect >>

751 start = skipPdfWhiteSpaces(start, end);

752 if (*start != kClosedInequityBracket_PdfDelimiter) {

753 // TODO(edisonn): report/warning

754 }

755

756 start++; // skip >

757 if (*start != kClosedInequityBracket_PdfDelimiter) {

758 // TODO(edisonn): report/warning

759 }

760

761 start++; // skip >

762

763 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);

764

765 start = readStream(start, end, dict, doc);

766

767 return start;

768 }

769

770 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,

771 SkPdfNativeObject* token,

772 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {

773 const unsigned char* current;

774

775 // skip white spaces

776 start = skipPdfWhiteSpaces(start, end);

777

778 if (start >= end) {

779 return end;

780 }

781

782 current = endOfPdfToken(start, end);

783

784 // no token, len would be 0

785 if (current == start \|\| current == end) {

786 return end;

787 }

788

789 size_t tokenLen = current - start;

790

791 if (tokenLen == 1) {

792 // start array

793 switch (*start) {

794 case kOpenedSquareBracket_PdfDelimiter:

795 return readArray(current, end, token, allocator, doc);

796

797 case kOpenedRoundBracket_PdfDelimiter:

798 return readString(start + 1, end, token, allocator);

799

800 case kOpenedInequityBracket_PdfDelimiter:

801 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDel imiter) {

802 // TODO(edisonn): pass here the length somehow?

803 return readDictionary(start + 2, end, token, allocator, doc) ; // skip <<

804 } else {

805 return readHexString(start + 1, end, token, allocator); // skip <

806 }

807

808 case kNamed_PdfDelimiter:

809 return readName(start + 1, end, token, allocator);

810

811 // TODO(edisonn): what to do curly brackets?

812 case kOpenedCurlyBracket_PdfDelimiter:

813 default:

814 break;

815 }

816

817 SkASSERT(!isPdfWhiteSpace(*start));

818 if (isPdfDelimiter(*start)) {

819 // TODO(edisonn): how unexpected stream ] } > ) will be handled?

820 // for now ignore, and it will become a keyword to be ignored

821 }

822 }

823

824 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' & & start[3] == 'l') {

825 SkPdfNativeObject::makeNull(token);

826 // PUT_TRACK_STREAM(start, start + 4)

827 return current;

828 }

829

830 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' & & start[3] == 'e') {

831 SkPdfNativeObject::makeBoolean(true, token);

832 // PUT_TRACK_STREAM(start, start + 4)

833 return current;

834 }

835

836 // TODO(edisonn): again, make all buffers have 5 extra bytes

837 if (tokenLen == 5 && start[0] == 'f' &&

838 start[1] == 'a' &&

839 start[2] == 'l' &&

840 start[3] == 's' &&

841 start[4] == 'e') {

842 SkPdfNativeObject::makeBoolean(false, token);

843 // PUT_TRACK_STREAM(start, start + 5)

844 return current;

845 }

846

847 if (isPdfNumeric(*start)) {

848 SkPdfNativeObject::makeNumeric(start, current, token);

849 // PUT_TRACK_STREAM(start, current)

850 } else {

851 SkPdfNativeObject::makeKeyword(start, current, token);

852 // PUT_TRACK_STREAM(start, current)

853 }

854 return current;

855 }

856

857 SkPdfNativeObject* SkPdfAllocator::allocBlock() {

858 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);

859 return new SkPdfNativeObject[BUFFER_SIZE];

860 }

861

862 SkPdfAllocator::~SkPdfAllocator() {

863 for (int i = 0 ; i < fHandles.count(); i++) {

864 free(fHandles[i]);

865 }

866 for (int i = 0 ; i < fHistory.count(); i++) {

867 for (int j = 0 ; j < BUFFER_SIZE; j++) {

868 fHistory[i][j].reset();

869 }

870 delete[] fHistory[i];

871 }

872 for (int j = 0 ; j < BUFFER_SIZE; j++) {

873 fCurrent[j].reset();

874 }

875 delete[] fCurrent;

876 }

877

878 SkPdfNativeObject* SkPdfAllocator::allocObject() {

879 if (fCurrentUsed >= BUFFER_SIZE) {

880 fHistory.push(fCurrent);

881 fCurrent = allocBlock();

882 fCurrentUsed = 0;

883 fSizeInBytes += sizeof(SkPdfNativeObject*);

884 }

885 fCurrentUsed++;

886 return &fCurrent[fCurrentUsed - 1];

887 }

888

889 // TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache t he result,

890 // so there is no need of a second pass

891 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,

892 SkPdfAllocator* allocator,

893 SkPdfNativeDoc* doc)

894 : fDoc(doc)

895 , fAllocator(allocator)

896 , fUncompressedStream(NULL)

897 , fUncompressedStreamEnd(NULL)

898 , fEmpty(false)

899 , fHasPutBack(false) {

900 const unsigned char* buffer = NULL;

901 size_t len = 0;

902 objWithStream->GetFilteredStreamRef(&buffer, &len);

903 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)

904 // we need to do now for perf, and our generated pdfs do not have comments,

905 // but we need to remove this hack for pdfs in the wild

906 char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");

907 if (endobj) {

908 len = endobj - (char*)buffer + strlen("endobj");

909 }

910 fUncompressedStreamStart = fUncompressedStream = buffer;

911 fUncompressedStreamEnd = fUncompressedStream + len;

912 }

913

914 SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len,

915 SkPdfAllocator* allocator,

916 SkPdfNativeDoc* doc) : fDoc(doc)

917 , fAllocator(all ocator)

918 , fEmpty(false)

919 , fHasPutBack(fa lse) {

920 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)

921 // we need to do now for perf, and our generated pdfs do not have comments,

922 // but we need to remove this hack for pdfs in the wild

923 char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");

924 if (endobj) {

925 len = SkToInt(endobj - (char*)buffer + strlen("endobj"));

926 }

927 fUncompressedStreamStart = fUncompressedStream = buffer;

928 fUncompressedStreamEnd = fUncompressedStream + len;

929 }

930

931 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {

932 }

933

934 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {

935 #ifdef PDF_TRACE_READ_TOKEN

936 static int read_op = 0;

937 #endif

938

939 token->fKeyword = NULL;

940 token->fObject = NULL;

941

942 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd);

943 if (fUncompressedStream >= fUncompressedStreamEnd) {

944 fEmpty = true;

945 return false;

946 }

947

948 SkPdfNativeObject obj;

949 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd , &obj, fAllocator, fDoc);

950 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)

951

952 // If it is a keyword, we will only get the pointer of the string.

953 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {

954 token->fKeyword = obj.c_str();

955 token->fKeywordLength = obj.lenstr();

956 token->fType = kKeyword_TokenType;

957 } else {

958 SkPdfNativeObject* pobj = fAllocator->allocObject();

959 *pobj = obj;

960 token->fObject = pobj;

961 token->fType = kObject_TokenType;

962 }

963

964 #ifdef PDF_TRACE_READ_TOKEN

965 read_op++;

966 #if 0

967 if (548 == read_op) {

968 printf("break;\n");

969 }

970 #endif

971 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Key word" : "Object",

972 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_ str() :

973 token->fObject->toString().c_str());

974 #endif

975

976 return true;

977 }

978

979 void SkPdfNativeTokenizer::PutBack(PdfToken token) {

980 SkASSERT(!fHasPutBack);

981 fHasPutBack = true;

982 fPutBack = token;

983 #ifdef PDF_TRACE_READ_TOKEN

984 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : " Object",

985 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str () :

986 token.fObject->toString().c_str());

987 #endif

988 }

989

990 bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) {

991 if (fHasPutBack) {

992 *token = fPutBack;

993 fHasPutBack = false;

994 #ifdef PDF_TRACE_READ_TOKEN

995 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keywor d" : "Object",

996 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength ).c_str() :

997 token->fObject->toString().c_str());

998 #endif

999 if (writeDiff) {

1000 SkPdfDiffEncoder::WriteToFile(token);

1001 }

1002 return true;

1003 }

1004

1005 if (fEmpty) {

1006 #ifdef PDF_TRACE_READ_TOKEN

1007 printf("EMPTY TOKENIZER\n");

1008 #endif

1009 return false;

1010 }

1011

1012 const bool result = readTokenCore(token);

1013 if (result && writeDiff) {

1014 SkPdfDiffEncoder::WriteToFile(token);

1015 }

1016 return result;

1017 }

1018

1019 #define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)

1020

1021 // keys

1022 DECLARE_PDF_NAME(BitsPerComponent);

1023 DECLARE_PDF_NAME(ColorSpace);

1024 DECLARE_PDF_NAME(Decode);

1025 DECLARE_PDF_NAME(DecodeParms);

1026 DECLARE_PDF_NAME(Filter);

1027 DECLARE_PDF_NAME(Height);

1028 DECLARE_PDF_NAME(ImageMask);

1029 DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations?

1030 DECLARE_PDF_NAME(Interpolate);

1031 DECLARE_PDF_NAME(Width);

1032

1033 // values

1034 DECLARE_PDF_NAME(DeviceGray);

1035 DECLARE_PDF_NAME(DeviceRGB);

1036 DECLARE_PDF_NAME(DeviceCMYK);

1037 DECLARE_PDF_NAME(Indexed);

1038 DECLARE_PDF_NAME(ASCIIHexDecode);

1039 DECLARE_PDF_NAME(ASCII85Decode);

1040 DECLARE_PDF_NAME(LZWDecode);

1041 DECLARE_PDF_NAME(FlateDecode); // PDF 1.2

1042 DECLARE_PDF_NAME(RunLengthDecode);

1043 DECLARE_PDF_NAME(CCITTFaxDecode);

1044 DECLARE_PDF_NAME(DCTDecode);

1045

1046 #define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) re turn &longName;

1047

1048

1049 static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* ke y) {

1050 if (!key \|\| !key->isName()) {

1051 return key;

1052 }

1053

1054 // TODO(edisonn): use autogenerated code!

1055 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);

1056 HANDLE_NAME_ABBR(key, ColorSpace, CS);

1057 HANDLE_NAME_ABBR(key, Decode, D);

1058 HANDLE_NAME_ABBR(key, DecodeParms, DP);

1059 HANDLE_NAME_ABBR(key, Filter, F);

1060 HANDLE_NAME_ABBR(key, Height, H);

1061 HANDLE_NAME_ABBR(key, ImageMask, IM);

1062 // HANDLE_NAME_ABBR(key, Intent, );

1063 HANDLE_NAME_ABBR(key, Interpolate, I);

1064 HANDLE_NAME_ABBR(key, Width, W);

1065

1066 return key;

1067 }

1068

1069 static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {

1070 if (!value \|\| !value->isName()) {

1071 return value;

1072 }

1073

1074 // TODO(edisonn): use autogenerated code!

1075 HANDLE_NAME_ABBR(value, DeviceGray, G);

1076 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);

1077 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);

1078 HANDLE_NAME_ABBR(value, Indexed, I);

1079 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);

1080 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);

1081 HANDLE_NAME_ABBR(value, LZWDecode, LZW);

1082 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)

1083 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);

1084 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);

1085 HANDLE_NAME_ABBR(value, DCTDecode, DCT);

1086

1087 return value;

1088 }

1089

1090 SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {

1091 // BI already processed

1092 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedS treamEnd);

1093 if (fUncompressedStream >= fUncompressedStreamEnd) {

1094 return NULL;

1095 }

1096

1097 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->alloc Object();

1098 SkPdfNativeObject::makeEmptyDictionary(inlineImage);

1099 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompresse dStreamStart,

1100 // fUncompressedStream - fUncompressedStreamStar t)

1101

1102 while (fUncompressedStream < fUncompressedStreamEnd) {

1103 SkPdfNativeObject* key = fAllocator->allocObject();

1104 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStrea mEnd, key,

1105 fAllocator, fDoc);

1106 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s

1107

1108 if (key->isKeyword() && key->lenstr() == 2 &&

1109 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID

1110 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUn compressedStreamEnd,

1111 inlineImage, fDoc);

1112 return inlineImage;

1113 } else {

1114 SkPdfNativeObject* obj = fAllocator->allocObject();

1115 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedS treamEnd, obj,

1116 fAllocator, fDoc);

1117 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart) s

1118 // TODO(edisonn): perf maybe we should not expand abBreviation like this

1119 inlineImage->set(inlineImageKeyAbbreviationExpand(key),

1120 inlineImageValueAbbreviationExpand(obj));

1121 }

1122 }

1123 // TODO(edisonn): report end of data with inline image without an EI

1124 return inlineImage;

1125 }

OLD	NEW