core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp - Issue 1365253003: Merge to XFA: Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1365253003: Merge to XFA: Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@xfa

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 PDFium Authors. All rights reserved.	1 // Copyright 2014 PDFium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

6	6

7 #include <set>	7 #include <set>

8 #include <utility>	8 #include <utility>

9 #include <vector>	9 #include <vector>

10	10

(...skipping 2446 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2457 FX_DWORD objnum,	2457 FX_DWORD objnum,

2458 FX_DWORD gennum) {	2458 FX_DWORD gennum) {

2459 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));	2459 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));

2460 FX_FILESIZE len = -1;	2460 FX_FILESIZE len = -1;

2461 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) \|\|	2461 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) \|\|

2462 ((((CPDF_Reference*)pLenObj)->GetObjList()) &&	2462 ((((CPDF_Reference*)pLenObj)->GetObjList()) &&

2463 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {	2463 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {

2464 len = pLenObj->GetInteger();	2464 len = pLenObj->GetInteger();

2465 }	2465 }

2466 // Check whether end of line markers follow the keyword 'stream'.	2466 // Check whether end of line markers follow the keyword 'stream'.

2467 unsigned int numMarkers = ReadEOLMarkers(m_Pos);	2467 // The stream starts after end of line markers.

2468 m_Pos += numMarkers;	2468 m_Pos += ReadEOLMarkers(m_Pos);

2469 FX_FILESIZE streamStartPos = m_Pos;	2469 FX_FILESIZE streamStartPos = m_Pos;

2470 if (pContext) {	2470 if (pContext) {

2471 pContext->m_DataStart = streamStartPos;	2471 pContext->m_DataStart = streamStartPos;

2472 }	2472 }

2473 const unsigned int ENDSTREAM_LEN = sizeof("endstream") - 1;	2473 const unsigned int ENDSTREAM_LEN = sizeof("endstream") - 1;

2474 const unsigned int ENDOBJ_LEN = sizeof("endobj") - 1;	2474 const unsigned int ENDOBJ_LEN = sizeof("endobj") - 1;

2475 CPDF_CryptoHandler* pCryptoHandler =	2475 CPDF_CryptoHandler* pCryptoHandler =

2476 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;	2476 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;

2477 if (!pCryptoHandler) {	2477 if (!pCryptoHandler) {

2478 FX_BOOL bSearchForKeyword = TRUE;	2478 FX_BOOL bSearchForKeyword = TRUE;

2479 unsigned int prevMarkers = 0;

2480 unsigned int nextMarkers = 0;

2481 if (len >= 0) {	2479 if (len >= 0) {

2482 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;	2480 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

2483 pos += len;	2481 pos += len;

2484 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {	2482 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {

2485 m_Pos = pos.ValueOrDie();	2483 m_Pos = pos.ValueOrDie();

2486 }	2484 }

2487 prevMarkers = ReadEOLMarkers(m_Pos);	2485 m_Pos += ReadEOLMarkers(m_Pos);

	2486 FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1);

2488 GetNextWord();	2487 GetNextWord();

2489 nextMarkers = ReadEOLMarkers(m_Pos);	2488 if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 &&

2490 if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 &&	2489 IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,

2491 FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) {	2490 FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {

2492 bSearchForKeyword = FALSE;	2491 bSearchForKeyword = FALSE;

2493 }	2492 }

2494 }	2493 }

2495 if (bSearchForKeyword) {	2494 if (bSearchForKeyword) {

2496 // If len is not available, len needs to be calculated	2495 // If len is not available, len needs to be calculated

2497 // by searching the keywords "endstream" or "endobj".	2496 // by searching the keywords "endstream" or "endobj".

2498 m_Pos = streamStartPos;	2497 m_Pos = streamStartPos;

2499 FX_FILESIZE endStreamOffset = 0;	2498 FX_FILESIZE endStreamOffset = 0;

2500 while (endStreamOffset >= 0) {	2499 while (endStreamOffset >= 0) {

2501 endStreamOffset = FindTag(FX_BSTRC("endstream"), 0);	2500 endStreamOffset = FindTag(FX_BSTRC("endstream"), 0);

2502 if (endStreamOffset < 0) {	2501 if (endStreamOffset < 0) {

2503 // Can't find any "endstream".	2502 // Can't find any "endstream".

2504 break;	2503 break;

2505 }	2504 }

2506 prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);	2505 if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,

2507 nextMarkers =	2506 FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {

2508 ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);

2509 if (prevMarkers != 0 && nextMarkers != 0) {

2510 // Stop searching when the keyword "endstream" is found.	2507 // Stop searching when the keyword "endstream" is found.

	2508 endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN;

2511 break;	2509 break;

2512 } else {

2513 unsigned char ch = 0x00;

2514 GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);

2515 if (ch == 0x09 \|\| ch == 0x20) {

2516 //"endstream" is treated as a keyword

2517 // when it is followed by a tab or whitespace

2518 break;

2519 }

2520 }	2510 }

2521 m_Pos += ENDSTREAM_LEN;

2522 }	2511 }

2523 m_Pos = streamStartPos;	2512 m_Pos = streamStartPos;

2524 FX_FILESIZE endObjOffset = 0;	2513 FX_FILESIZE endObjOffset = 0;

2525 while (endObjOffset >= 0) {	2514 while (endObjOffset >= 0) {

2526 endObjOffset = FindTag(FX_BSTRC("endobj"), 0);	2515 endObjOffset = FindTag(FX_BSTRC("endobj"), 0);

2527 if (endObjOffset < 0) {	2516 if (endObjOffset < 0) {

2528 // Can't find any "endobj".	2517 // Can't find any "endobj".

2529 break;	2518 break;

2530 }	2519 }

2531 prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1);	2520 if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen,

2532 nextMarkers =	2521 FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) {

2533 ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);

2534 if (prevMarkers != 0 && nextMarkers != 0) {

2535 // Stop searching when the keyword "endobj" is found.	2522 // Stop searching when the keyword "endobj" is found.

	2523 endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN;

2536 break;	2524 break;

2537 }	2525 }

2538 m_Pos += ENDOBJ_LEN;

2539 }	2526 }

2540 if (endStreamOffset < 0 && endObjOffset < 0) {	2527 if (endStreamOffset < 0 && endObjOffset < 0) {

2541 // Can't find "endstream" or "endobj".	2528 // Can't find "endstream" or "endobj".

2542 return nullptr;	2529 return nullptr;

2543 }	2530 }

2544 if (endStreamOffset < 0 && endObjOffset >= 0) {	2531 if (endStreamOffset < 0 && endObjOffset >= 0) {

2545 // Correct the position of end stream.	2532 // Correct the position of end stream.

2546 endStreamOffset = endObjOffset;	2533 endStreamOffset = endObjOffset;

2547 } else if (endStreamOffset >= 0 && endObjOffset < 0) {	2534 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

2548 // Correct the position of end obj.	2535 // Correct the position of end obj.

2549 endObjOffset = endStreamOffset;	2536 endObjOffset = endStreamOffset;

2550 } else if (endStreamOffset > endObjOffset) {	2537 } else if (endStreamOffset > endObjOffset) {

2551 endStreamOffset = endObjOffset;	2538 endStreamOffset = endObjOffset;

2552 }	2539 }

2553 len = endStreamOffset;	2540 len = endStreamOffset;

2554 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);	2541 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

2555 if (numMarkers == 2) {	2542 if (numMarkers == 2) {

2556 len -= 2;	2543 len -= 2;

2557 } else {	2544 } else {

2558 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);	2545 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

2559 if (numMarkers == 1) {	2546 if (numMarkers == 1) {

2560 len -= 1;	2547 len -= 1;

2561 }	2548 }

2562 }	2549 }

2563 if (len <= 0) {	2550 if (len <= 0) {

2564 return nullptr;	2551 return nullptr;

(...skipping 16 matching lines...) Expand all Loading...
2581 FX_Free(pData);	2568 FX_Free(pData);

2582 pData = dest_buf.GetBuffer();	2569 pData = dest_buf.GetBuffer();

2583 len = dest_buf.GetSize();	2570 len = dest_buf.GetSize();

2584 dest_buf.DetachBuffer();	2571 dest_buf.DetachBuffer();

2585 }	2572 }

2586 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);	2573 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

2587 if (pContext) {	2574 if (pContext) {

2588 pContext->m_DataEnd = pContext->m_DataStart + len;	2575 pContext->m_DataEnd = pContext->m_DataStart + len;

2589 }	2576 }

2590 streamStartPos = m_Pos;	2577 streamStartPos = m_Pos;

	2578 FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1);

2591 GetNextWord();	2579 GetNextWord();

2592 numMarkers = ReadEOLMarkers(m_Pos);	2580 int numMarkers = ReadEOLMarkers(m_Pos);

2593 if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&	2581 if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&

2594 FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {	2582 FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {

2595 m_Pos = streamStartPos;	2583 m_Pos = streamStartPos;

2596 }	2584 }

2597 return pStream;	2585 return pStream;

2598 }	2586 }

2599 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,	2587 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

2600 FX_DWORD HeaderOffset) {	2588 FX_DWORD HeaderOffset) {

2601 FX_Free(m_pFileBuf);	2589 FX_Free(m_pFileBuf);

2602 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);	2590 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

(...skipping 10 matching lines...) Expand all Loading...
2613 GetNextWord();	2601 GetNextWord();

2614 if (!m_bIsNumber) {	2602 if (!m_bIsNumber) {

2615 return 0;	2603 return 0;

2616 }	2604 }

2617 m_WordBuffer[m_WordSize] = 0;	2605 m_WordBuffer[m_WordSize] = 0;

2618 return FXSYS_atoi((const FX_CHAR*)m_WordBuffer);	2606 return FXSYS_atoi((const FX_CHAR*)m_WordBuffer);

2619 }	2607 }

2620 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,	2608 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

2621 FX_FILESIZE limit,	2609 FX_FILESIZE limit,

2622 const uint8_t* tag,	2610 const uint8_t* tag,

2623 FX_DWORD taglen) {	2611 FX_DWORD taglen,

	2612 FX_BOOL checkKeyword) {

2624 uint8_t type = PDF_CharType[tag[0]];	2613 uint8_t type = PDF_CharType[tag[0]];

2625 FX_BOOL bCheckLeft = type != 'D' && type != 'W';	2614 FX_BOOL bCheckLeft = type != 'D' && type != 'W';

2626 type = PDF_CharType[tag[taglen - 1]];	2615 type = PDF_CharType[tag[taglen - 1]];

2627 FX_BOOL bCheckRight = type != 'D' && type != 'W';	2616 FX_BOOL bCheckRight = type != 'D' && type != 'W';

2628 uint8_t ch;	2617 uint8_t ch;

2629 if (bCheckRight && startpos + (int32_t)taglen <= limit &&	2618 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

2630 GetCharAt(startpos + (int32_t)taglen, ch)) {	2619 GetCharAt(startpos + (int32_t)taglen, ch)) {

2631 uint8_t type = PDF_CharType[ch];	2620 uint8_t type = PDF_CharType[ch];

2632 if (type == 'N' \|\| type == 'R') {	2621 if (type == 'N' \|\| type == 'R' \|\| (checkKeyword && type == 'D')) {

2633 return FALSE;	2622 return FALSE;

2634 }	2623 }

2635 }	2624 }

2636 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {	2625 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

2637 uint8_t type = PDF_CharType[ch];	2626 uint8_t type = PDF_CharType[ch];

2638 if (type == 'N' \|\| type == 'R') {	2627 if (type == 'N' \|\| type == 'R' \|\| (checkKeyword && type == 'D')) {

2639 return FALSE;	2628 return FALSE;

2640 }	2629 }

2641 }	2630 }

2642 return TRUE;	2631 return TRUE;

2643 }	2632 }

2644 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,	2633 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

2645 FX_BOOL bWholeWord,	2634 FX_BOOL bWholeWord,

2646 FX_BOOL bForward,	2635 FX_BOOL bForward,

2647 FX_FILESIZE limit) {	2636 FX_FILESIZE limit) {

2648 int32_t taglen = tag.GetLength();	2637 int32_t taglen = tag.GetLength();

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2684 continue;	2673 continue;

2685 }	2674 }

2686 } else {	2675 } else {

2687 offset--;	2676 offset--;

2688 if (offset >= 0) {	2677 if (offset >= 0) {

2689 pos--;	2678 pos--;

2690 continue;	2679 continue;

2691 }	2680 }

2692 }	2681 }

2693 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;	2682 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

2694 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) {	2683 if (!bWholeWord \|\|

	2684 IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) {

2695 m_Pos = startpos;	2685 m_Pos = startpos;

2696 return TRUE;	2686 return TRUE;

2697 }	2687 }

2698 }	2688 }

2699 if (bForward) {	2689 if (bForward) {

2700 offset = byte == tag_data[0] ? 1 : 0;	2690 offset = byte == tag_data[0] ? 1 : 0;

2701 pos++;	2691 pos++;

2702 } else {	2692 } else {

2703 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;	2693 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

2704 pos--;	2694 pos--;

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2741 uint8_t byte;	2731 uint8_t byte;

2742 GetCharAt(pos++, byte);	2732 GetCharAt(pos++, byte);

2743 int32_t found = -1;	2733 int32_t found = -1;

2744 while (1) {	2734 while (1) {

2745 for (i = 0; i < ntags; i++) {	2735 for (i = 0; i < ntags; i++) {

2746 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {	2736 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {

2747 pPatterns[i].m_Offset++;	2737 pPatterns[i].m_Offset++;

2748 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {	2738 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {

2749 if (!bWholeWord \|\|	2739 if (!bWholeWord \|\|

2750 IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,	2740 IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,

2751 pPatterns[i].m_Len)) {	2741 pPatterns[i].m_Len, FALSE)) {

2752 found = i;	2742 found = i;

2753 goto end;	2743 goto end;

2754 } else {	2744 } else {

2755 if (pPatterns[i].m_pTag[0] == byte) {	2745 if (pPatterns[i].m_pTag[0] == byte) {

2756 pPatterns[i].m_Offset = 1;	2746 pPatterns[i].m_Offset = 1;

2757 } else {	2747 } else {

2758 pPatterns[i].m_Offset = 0;	2748 pPatterns[i].m_Offset = 0;

2759 }	2749 }

2760 }	2750 }

2761 }	2751 }

(...skipping 1930 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4692 return FALSE;	4682 return FALSE;

4693 }	4683 }

4694 CPDF_PageNode::~CPDF_PageNode() {	4684 CPDF_PageNode::~CPDF_PageNode() {

4695 int32_t iSize = m_childNode.GetSize();	4685 int32_t iSize = m_childNode.GetSize();

4696 for (int32_t i = 0; i < iSize; ++i) {	4686 for (int32_t i = 0; i < iSize; ++i) {

4697 CPDF_PageNode* pNode = (CPDF_PageNode*)m_childNode[i];	4687 CPDF_PageNode* pNode = (CPDF_PageNode*)m_childNode[i];

4698 delete pNode;	4688 delete pNode;

4699 }	4689 }

4700 m_childNode.RemoveAll();	4690 m_childNode.RemoveAll();

4701 }	4691 }

OLD	NEW

« no previous file with comments | « core/src/fpdfapi/fpdf_page/pageint.h ('k') | testing/resources/pixel/bug_524043_1.in » ('j') | no next file with comments »