Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(829)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1365253003: Merge to XFA: Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@xfa
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/src/fpdfapi/fpdf_page/pageint.h ('k') | testing/resources/pixel/bug_524043_1.in » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <set> 7 #include <set>
8 #include <utility> 8 #include <utility>
9 #include <vector> 9 #include <vector>
10 10
(...skipping 2446 matching lines...) Expand 10 before | Expand all | Expand 10 after
2457 FX_DWORD objnum, 2457 FX_DWORD objnum,
2458 FX_DWORD gennum) { 2458 FX_DWORD gennum) {
2459 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length")); 2459 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
2460 FX_FILESIZE len = -1; 2460 FX_FILESIZE len = -1;
2461 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) || 2461 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) ||
2462 ((((CPDF_Reference*)pLenObj)->GetObjList()) && 2462 ((((CPDF_Reference*)pLenObj)->GetObjList()) &&
2463 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) { 2463 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
2464 len = pLenObj->GetInteger(); 2464 len = pLenObj->GetInteger();
2465 } 2465 }
2466 // Check whether end of line markers follow the keyword 'stream'. 2466 // Check whether end of line markers follow the keyword 'stream'.
2467 unsigned int numMarkers = ReadEOLMarkers(m_Pos); 2467 // The stream starts after end of line markers.
2468 m_Pos += numMarkers; 2468 m_Pos += ReadEOLMarkers(m_Pos);
2469 FX_FILESIZE streamStartPos = m_Pos; 2469 FX_FILESIZE streamStartPos = m_Pos;
2470 if (pContext) { 2470 if (pContext) {
2471 pContext->m_DataStart = streamStartPos; 2471 pContext->m_DataStart = streamStartPos;
2472 } 2472 }
2473 const unsigned int ENDSTREAM_LEN = sizeof("endstream") - 1; 2473 const unsigned int ENDSTREAM_LEN = sizeof("endstream") - 1;
2474 const unsigned int ENDOBJ_LEN = sizeof("endobj") - 1; 2474 const unsigned int ENDOBJ_LEN = sizeof("endobj") - 1;
2475 CPDF_CryptoHandler* pCryptoHandler = 2475 CPDF_CryptoHandler* pCryptoHandler =
2476 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler; 2476 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;
2477 if (!pCryptoHandler) { 2477 if (!pCryptoHandler) {
2478 FX_BOOL bSearchForKeyword = TRUE; 2478 FX_BOOL bSearchForKeyword = TRUE;
2479 unsigned int prevMarkers = 0;
2480 unsigned int nextMarkers = 0;
2481 if (len >= 0) { 2479 if (len >= 0) {
2482 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; 2480 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
2483 pos += len; 2481 pos += len;
2484 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) { 2482 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
2485 m_Pos = pos.ValueOrDie(); 2483 m_Pos = pos.ValueOrDie();
2486 } 2484 }
2487 prevMarkers = ReadEOLMarkers(m_Pos); 2485 m_Pos += ReadEOLMarkers(m_Pos);
2486 FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1);
2488 GetNextWord(); 2487 GetNextWord();
2489 nextMarkers = ReadEOLMarkers(m_Pos); 2488 if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 &&
2490 if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 && 2489 IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
2491 FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) { 2490 FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
2492 bSearchForKeyword = FALSE; 2491 bSearchForKeyword = FALSE;
2493 } 2492 }
2494 } 2493 }
2495 if (bSearchForKeyword) { 2494 if (bSearchForKeyword) {
2496 // If len is not available, len needs to be calculated 2495 // If len is not available, len needs to be calculated
2497 // by searching the keywords "endstream" or "endobj". 2496 // by searching the keywords "endstream" or "endobj".
2498 m_Pos = streamStartPos; 2497 m_Pos = streamStartPos;
2499 FX_FILESIZE endStreamOffset = 0; 2498 FX_FILESIZE endStreamOffset = 0;
2500 while (endStreamOffset >= 0) { 2499 while (endStreamOffset >= 0) {
2501 endStreamOffset = FindTag(FX_BSTRC("endstream"), 0); 2500 endStreamOffset = FindTag(FX_BSTRC("endstream"), 0);
2502 if (endStreamOffset < 0) { 2501 if (endStreamOffset < 0) {
2503 // Can't find any "endstream". 2502 // Can't find any "endstream".
2504 break; 2503 break;
2505 } 2504 }
2506 prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); 2505 if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
2507 nextMarkers = 2506 FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
2508 ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);
2509 if (prevMarkers != 0 && nextMarkers != 0) {
2510 // Stop searching when the keyword "endstream" is found. 2507 // Stop searching when the keyword "endstream" is found.
2508 endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN;
2511 break; 2509 break;
2512 } else {
2513 unsigned char ch = 0x00;
2514 GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);
2515 if (ch == 0x09 || ch == 0x20) {
2516 //"endstream" is treated as a keyword
2517 // when it is followed by a tab or whitespace
2518 break;
2519 }
2520 } 2510 }
2521 m_Pos += ENDSTREAM_LEN;
2522 } 2511 }
2523 m_Pos = streamStartPos; 2512 m_Pos = streamStartPos;
2524 FX_FILESIZE endObjOffset = 0; 2513 FX_FILESIZE endObjOffset = 0;
2525 while (endObjOffset >= 0) { 2514 while (endObjOffset >= 0) {
2526 endObjOffset = FindTag(FX_BSTRC("endobj"), 0); 2515 endObjOffset = FindTag(FX_BSTRC("endobj"), 0);
2527 if (endObjOffset < 0) { 2516 if (endObjOffset < 0) {
2528 // Can't find any "endobj". 2517 // Can't find any "endobj".
2529 break; 2518 break;
2530 } 2519 }
2531 prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1); 2520 if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen,
2532 nextMarkers = 2521 FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) {
2533 ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);
2534 if (prevMarkers != 0 && nextMarkers != 0) {
2535 // Stop searching when the keyword "endobj" is found. 2522 // Stop searching when the keyword "endobj" is found.
2523 endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN;
2536 break; 2524 break;
2537 } 2525 }
2538 m_Pos += ENDOBJ_LEN;
2539 } 2526 }
2540 if (endStreamOffset < 0 && endObjOffset < 0) { 2527 if (endStreamOffset < 0 && endObjOffset < 0) {
2541 // Can't find "endstream" or "endobj". 2528 // Can't find "endstream" or "endobj".
2542 return nullptr; 2529 return nullptr;
2543 } 2530 }
2544 if (endStreamOffset < 0 && endObjOffset >= 0) { 2531 if (endStreamOffset < 0 && endObjOffset >= 0) {
2545 // Correct the position of end stream. 2532 // Correct the position of end stream.
2546 endStreamOffset = endObjOffset; 2533 endStreamOffset = endObjOffset;
2547 } else if (endStreamOffset >= 0 && endObjOffset < 0) { 2534 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
2548 // Correct the position of end obj. 2535 // Correct the position of end obj.
2549 endObjOffset = endStreamOffset; 2536 endObjOffset = endStreamOffset;
2550 } else if (endStreamOffset > endObjOffset) { 2537 } else if (endStreamOffset > endObjOffset) {
2551 endStreamOffset = endObjOffset; 2538 endStreamOffset = endObjOffset;
2552 } 2539 }
2553 len = endStreamOffset; 2540 len = endStreamOffset;
2554 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); 2541 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
2555 if (numMarkers == 2) { 2542 if (numMarkers == 2) {
2556 len -= 2; 2543 len -= 2;
2557 } else { 2544 } else {
2558 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); 2545 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
2559 if (numMarkers == 1) { 2546 if (numMarkers == 1) {
2560 len -= 1; 2547 len -= 1;
2561 } 2548 }
2562 } 2549 }
2563 if (len <= 0) { 2550 if (len <= 0) {
2564 return nullptr; 2551 return nullptr;
(...skipping 16 matching lines...) Expand all
2581 FX_Free(pData); 2568 FX_Free(pData);
2582 pData = dest_buf.GetBuffer(); 2569 pData = dest_buf.GetBuffer();
2583 len = dest_buf.GetSize(); 2570 len = dest_buf.GetSize();
2584 dest_buf.DetachBuffer(); 2571 dest_buf.DetachBuffer();
2585 } 2572 }
2586 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); 2573 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
2587 if (pContext) { 2574 if (pContext) {
2588 pContext->m_DataEnd = pContext->m_DataStart + len; 2575 pContext->m_DataEnd = pContext->m_DataStart + len;
2589 } 2576 }
2590 streamStartPos = m_Pos; 2577 streamStartPos = m_Pos;
2578 FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1);
2591 GetNextWord(); 2579 GetNextWord();
2592 numMarkers = ReadEOLMarkers(m_Pos); 2580 int numMarkers = ReadEOLMarkers(m_Pos);
2593 if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 && 2581 if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&
2594 FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) { 2582 FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {
2595 m_Pos = streamStartPos; 2583 m_Pos = streamStartPos;
2596 } 2584 }
2597 return pStream; 2585 return pStream;
2598 } 2586 }
2599 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, 2587 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
2600 FX_DWORD HeaderOffset) { 2588 FX_DWORD HeaderOffset) {
2601 FX_Free(m_pFileBuf); 2589 FX_Free(m_pFileBuf);
2602 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); 2590 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
(...skipping 10 matching lines...) Expand all
2613 GetNextWord(); 2601 GetNextWord();
2614 if (!m_bIsNumber) { 2602 if (!m_bIsNumber) {
2615 return 0; 2603 return 0;
2616 } 2604 }
2617 m_WordBuffer[m_WordSize] = 0; 2605 m_WordBuffer[m_WordSize] = 0;
2618 return FXSYS_atoi((const FX_CHAR*)m_WordBuffer); 2606 return FXSYS_atoi((const FX_CHAR*)m_WordBuffer);
2619 } 2607 }
2620 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, 2608 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
2621 FX_FILESIZE limit, 2609 FX_FILESIZE limit,
2622 const uint8_t* tag, 2610 const uint8_t* tag,
2623 FX_DWORD taglen) { 2611 FX_DWORD taglen,
2612 FX_BOOL checkKeyword) {
2624 uint8_t type = PDF_CharType[tag[0]]; 2613 uint8_t type = PDF_CharType[tag[0]];
2625 FX_BOOL bCheckLeft = type != 'D' && type != 'W'; 2614 FX_BOOL bCheckLeft = type != 'D' && type != 'W';
2626 type = PDF_CharType[tag[taglen - 1]]; 2615 type = PDF_CharType[tag[taglen - 1]];
2627 FX_BOOL bCheckRight = type != 'D' && type != 'W'; 2616 FX_BOOL bCheckRight = type != 'D' && type != 'W';
2628 uint8_t ch; 2617 uint8_t ch;
2629 if (bCheckRight && startpos + (int32_t)taglen <= limit && 2618 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
2630 GetCharAt(startpos + (int32_t)taglen, ch)) { 2619 GetCharAt(startpos + (int32_t)taglen, ch)) {
2631 uint8_t type = PDF_CharType[ch]; 2620 uint8_t type = PDF_CharType[ch];
2632 if (type == 'N' || type == 'R') { 2621 if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
2633 return FALSE; 2622 return FALSE;
2634 } 2623 }
2635 } 2624 }
2636 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { 2625 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2637 uint8_t type = PDF_CharType[ch]; 2626 uint8_t type = PDF_CharType[ch];
2638 if (type == 'N' || type == 'R') { 2627 if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
2639 return FALSE; 2628 return FALSE;
2640 } 2629 }
2641 } 2630 }
2642 return TRUE; 2631 return TRUE;
2643 } 2632 }
2644 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, 2633 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
2645 FX_BOOL bWholeWord, 2634 FX_BOOL bWholeWord,
2646 FX_BOOL bForward, 2635 FX_BOOL bForward,
2647 FX_FILESIZE limit) { 2636 FX_FILESIZE limit) {
2648 int32_t taglen = tag.GetLength(); 2637 int32_t taglen = tag.GetLength();
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
2684 continue; 2673 continue;
2685 } 2674 }
2686 } else { 2675 } else {
2687 offset--; 2676 offset--;
2688 if (offset >= 0) { 2677 if (offset >= 0) {
2689 pos--; 2678 pos--;
2690 continue; 2679 continue;
2691 } 2680 }
2692 } 2681 }
2693 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; 2682 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2694 if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) { 2683 if (!bWholeWord ||
2684 IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) {
2695 m_Pos = startpos; 2685 m_Pos = startpos;
2696 return TRUE; 2686 return TRUE;
2697 } 2687 }
2698 } 2688 }
2699 if (bForward) { 2689 if (bForward) {
2700 offset = byte == tag_data[0] ? 1 : 0; 2690 offset = byte == tag_data[0] ? 1 : 0;
2701 pos++; 2691 pos++;
2702 } else { 2692 } else {
2703 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; 2693 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2704 pos--; 2694 pos--;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
2741 uint8_t byte; 2731 uint8_t byte;
2742 GetCharAt(pos++, byte); 2732 GetCharAt(pos++, byte);
2743 int32_t found = -1; 2733 int32_t found = -1;
2744 while (1) { 2734 while (1) {
2745 for (i = 0; i < ntags; i++) { 2735 for (i = 0; i < ntags; i++) {
2746 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) { 2736 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {
2747 pPatterns[i].m_Offset++; 2737 pPatterns[i].m_Offset++;
2748 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) { 2738 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
2749 if (!bWholeWord || 2739 if (!bWholeWord ||
2750 IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, 2740 IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,
2751 pPatterns[i].m_Len)) { 2741 pPatterns[i].m_Len, FALSE)) {
2752 found = i; 2742 found = i;
2753 goto end; 2743 goto end;
2754 } else { 2744 } else {
2755 if (pPatterns[i].m_pTag[0] == byte) { 2745 if (pPatterns[i].m_pTag[0] == byte) {
2756 pPatterns[i].m_Offset = 1; 2746 pPatterns[i].m_Offset = 1;
2757 } else { 2747 } else {
2758 pPatterns[i].m_Offset = 0; 2748 pPatterns[i].m_Offset = 0;
2759 } 2749 }
2760 } 2750 }
2761 } 2751 }
(...skipping 1930 matching lines...) Expand 10 before | Expand all | Expand 10 after
4692 return FALSE; 4682 return FALSE;
4693 } 4683 }
4694 CPDF_PageNode::~CPDF_PageNode() { 4684 CPDF_PageNode::~CPDF_PageNode() {
4695 int32_t iSize = m_childNode.GetSize(); 4685 int32_t iSize = m_childNode.GetSize();
4696 for (int32_t i = 0; i < iSize; ++i) { 4686 for (int32_t i = 0; i < iSize; ++i) {
4697 CPDF_PageNode* pNode = (CPDF_PageNode*)m_childNode[i]; 4687 CPDF_PageNode* pNode = (CPDF_PageNode*)m_childNode[i];
4698 delete pNode; 4688 delete pNode;
4699 } 4689 }
4700 m_childNode.RemoveAll(); 4690 m_childNode.RemoveAll();
4701 } 4691 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_page/pageint.h ('k') | testing/resources/pixel/bug_524043_1.in » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698