| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #include <cwctype> | 9 #include <cwctype> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" | 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" |
| 22 #include "core/fpdftext/include/cpdf_linkextract.h" | 22 #include "core/fpdftext/include/cpdf_linkextract.h" |
| 23 #include "core/fpdftext/include/cpdf_textpage.h" | 23 #include "core/fpdftext/include/cpdf_textpage.h" |
| 24 #include "core/fpdftext/include/cpdf_textpagefind.h" | 24 #include "core/fpdftext/include/cpdf_textpagefind.h" |
| 25 #include "core/fpdftext/unicodenormalizationdata.h" | 25 #include "core/fpdftext/unicodenormalizationdata.h" |
| 26 #include "core/fxcrt/fx_bidi.h" | 26 #include "core/fxcrt/fx_bidi.h" |
| 27 #include "core/fxcrt/include/fx_ext.h" | 27 #include "core/fxcrt/include/fx_ext.h" |
| 28 #include "core/fxcrt/include/fx_ucd.h" | 28 #include "core/fxcrt/include/fx_ucd.h" |
| 29 #include "third_party/base/stl_util.h" | 29 #include "third_party/base/stl_util.h" |
| 30 | 30 |
| 31 #define FPDFTEXT_RLTB 1 | |
| 32 #define FPDFTEXT_LEFT -1 | |
| 33 #define FPDFTEXT_RIGHT 1 | |
| 34 | |
| 35 #define FPDFTEXT_MATCHCASE 0x00000001 | 31 #define FPDFTEXT_MATCHCASE 0x00000001 |
| 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | 32 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
| 37 #define FPDFTEXT_CONSECUTIVE 0x00000004 | 33 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| 38 | 34 |
| 39 #define FPDFTEXT_CHAR_ERROR -1 | 35 #define FPDFTEXT_CHAR_ERROR -1 |
| 40 #define FPDFTEXT_CHAR_NORMAL 0 | 36 #define FPDFTEXT_CHAR_NORMAL 0 |
| 41 #define FPDFTEXT_CHAR_GENERATED 1 | 37 #define FPDFTEXT_CHAR_GENERATED 1 |
| 42 #define FPDFTEXT_CHAR_UNUNICODE 2 | 38 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 43 #define FPDFTEXT_CHAR_HYPHEN 3 | 39 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 44 #define FPDFTEXT_CHAR_PIECE 4 | 40 #define FPDFTEXT_CHAR_PIECE 4 |
| 45 #define FPDFTEXT_MC_PASS 0 | 41 |
| 46 #define FPDFTEXT_MC_DONE 1 | 42 #define TEXT_BLANK_CHAR L' ' |
| 47 #define FPDFTEXT_MC_DELAY 2 | 43 #define TEXT_LINEFEED_CHAR L'\n' |
| 44 #define TEXT_RETURN_CHAR L'\r' |
| 45 #define TEXT_EMPTY L"" |
| 46 #define TEXT_BLANK L" " |
| 47 #define TEXT_RETURN_LINEFEED L"\r\n" |
| 48 #define TEXT_LINEFEED L"\n" |
| 49 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 48 | 50 |
| 49 namespace { | 51 namespace { |
| 50 | 52 |
| 51 const FX_FLOAT kDefaultFontSize = 1.0f; | 53 const FX_FLOAT kDefaultFontSize = 1.0f; |
| 52 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { | 54 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { |
| 53 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, | 55 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, |
| 54 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; | 56 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; |
| 55 | 57 |
| 56 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) { | 58 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
| 57 if (curChar < 255) { | 59 if (curChar < 255) { |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 136 FX_WCHAR n = wFind; | 138 FX_WCHAR n = wFind; |
| 137 while (n--) { | 139 while (n--) { |
| 138 *pDst++ = *pMap++; | 140 *pDst++ = *pMap++; |
| 139 } | 141 } |
| 140 } | 142 } |
| 141 return (FX_STRSIZE)wFind; | 143 return (FX_STRSIZE)wFind; |
| 142 } | 144 } |
| 143 | 145 |
| 144 } // namespace | 146 } // namespace |
| 145 | 147 |
| 146 #define TEXT_BLANK_CHAR L' ' | 148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) |
| 147 #define TEXT_LINEFEED_CHAR L'\n' | |
| 148 #define TEXT_RETURN_CHAR L'\r' | |
| 149 #define TEXT_EMPTY L"" | |
| 150 #define TEXT_BLANK L" " | |
| 151 #define TEXT_RETURN_LINEFEED L"\r\n" | |
| 152 #define TEXT_LINEFEED L"\n" | |
| 153 #define TEXT_CHARRATIO_GAPDELTA 0.070 | |
| 154 | |
| 155 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | |
| 156 : m_pPage(pPage), | 149 : m_pPage(pPage), |
| 157 m_parserflag(flags), | 150 m_parserflag(flags), |
| 158 m_pPreTextObj(nullptr), | 151 m_pPreTextObj(nullptr), |
| 159 m_bIsParsed(false), | 152 m_bIsParsed(false), |
| 160 m_TextlineDir(-1), | 153 m_TextlineDir(-1), |
| 161 m_CurlineRect(0, 0, 0, 0) { | 154 m_CurlineRect(0, 0, 0, 0) { |
| 162 m_TextBuf.EstimateSize(0, 10240); | 155 m_TextBuf.EstimateSize(0, 10240); |
| 163 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 156 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 164 (int)pPage->GetPageHeight(), 0); | 157 (int)pPage->GetPageHeight(), 0); |
| 165 } | 158 } |
| (...skipping 440 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 606 | 599 |
| 607 if (rectIndex < 0 || rectIndex >= pdfium::CollectionSize<int>(m_SelRects)) | 600 if (rectIndex < 0 || rectIndex >= pdfium::CollectionSize<int>(m_SelRects)) |
| 608 return; | 601 return; |
| 609 | 602 |
| 610 left = m_SelRects[rectIndex].left; | 603 left = m_SelRects[rectIndex].left; |
| 611 top = m_SelRects[rectIndex].top; | 604 top = m_SelRects[rectIndex].top; |
| 612 right = m_SelRects[rectIndex].right; | 605 right = m_SelRects[rectIndex].right; |
| 613 bottom = m_SelRects[rectIndex].bottom; | 606 bottom = m_SelRects[rectIndex].bottom; |
| 614 } | 607 } |
| 615 | 608 |
| 616 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | |
| 617 if (end == start) { | |
| 618 return FALSE; | |
| 619 } | |
| 620 FPDF_CHAR_INFO info_start; | |
| 621 FPDF_CHAR_INFO info_end; | |
| 622 GetCharInfo(start, &info_start); | |
| 623 GetCharInfo(end, &info_end); | |
| 624 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { | |
| 625 if (--end <= start) | |
| 626 return FALSE; | |
| 627 | |
| 628 GetCharInfo(end, &info_end); | |
| 629 } | |
| 630 FX_FLOAT dx = (info_end.m_OriginX - info_start.m_OriginX); | |
| 631 FX_FLOAT dy = (info_end.m_OriginY - info_start.m_OriginY); | |
| 632 if (dx == 0) { | |
| 633 if (dy > 0) { | |
| 634 Rotate = 90; | |
| 635 } else if (dy < 0) { | |
| 636 Rotate = 270; | |
| 637 } else { | |
| 638 Rotate = 0; | |
| 639 } | |
| 640 } else { | |
| 641 float a = FXSYS_atan2(dy, dx); | |
| 642 Rotate = (int)(a * 180 / FX_PI + 0.5); | |
| 643 } | |
| 644 if (Rotate < 0) { | |
| 645 Rotate = -Rotate; | |
| 646 } else if (Rotate > 0) { | |
| 647 Rotate = 360 - Rotate; | |
| 648 } | |
| 649 return TRUE; | |
| 650 } | |
| 651 | |
| 652 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 609 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 653 FX_FLOAT top, | 610 FX_FLOAT top, |
| 654 FX_FLOAT right, | 611 FX_FLOAT right, |
| 655 FX_FLOAT bottom, | 612 FX_FLOAT bottom, |
| 656 FX_BOOL bContains) { | 613 FX_BOOL bContains) { |
| 657 m_Segments.RemoveAll(); | 614 m_Segments.RemoveAll(); |
| 658 if (!m_bIsParsed) | 615 if (!m_bIsParsed) |
| 659 return -1; | 616 return -1; |
| 660 | 617 |
| 661 CFX_FloatRect rect(left, bottom, right, top); | 618 CFX_FloatRect rect(left, bottom, right, top); |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 720 } | 677 } |
| 721 if (segmentStatus == 1) { | 678 if (segmentStatus == 1) { |
| 722 segmentStatus = 2; | 679 segmentStatus = 2; |
| 723 m_Segments.Add(segment); | 680 m_Segments.Add(segment); |
| 724 segment.m_Start = 0; | 681 segment.m_Start = 0; |
| 725 segment.m_nCount = 0; | 682 segment.m_nCount = 0; |
| 726 } | 683 } |
| 727 return m_Segments.GetSize(); | 684 return m_Segments.GetSize(); |
| 728 } | 685 } |
| 729 | 686 |
| 730 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | |
| 731 if (!m_bIsParsed) | |
| 732 return -1; | |
| 733 | |
| 734 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | |
| 735 return -1; | |
| 736 | |
| 737 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) | |
| 738 return -1; | |
| 739 | |
| 740 const PAGECHAR_INFO& charinfo = m_CharList[index]; | |
| 741 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | |
| 742 return index; | |
| 743 } | |
| 744 if (!IsLetter(charinfo.m_Unicode)) { | |
| 745 return index; | |
| 746 } | |
| 747 int breakPos = index; | |
| 748 if (direction == FPDFTEXT_LEFT) { | |
| 749 while (--breakPos > 0) { | |
| 750 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | |
| 751 break; | |
| 752 } | |
| 753 } else if (direction == FPDFTEXT_RIGHT) { | |
| 754 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { | |
| 755 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | |
| 756 break; | |
| 757 } | |
| 758 } | |
| 759 return breakPos; | |
| 760 } | |
| 761 | |
| 762 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 687 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| 763 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | 688 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
| 764 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | 689 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
| 765 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 690 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
| 766 std::vector<uint8_t> nVerticalMask(nPageHeight); | 691 std::vector<uint8_t> nVerticalMask(nPageHeight); |
| 767 uint8_t* pDataH = nHorizontalMask.data(); | 692 uint8_t* pDataH = nHorizontalMask.data(); |
| 768 uint8_t* pDataV = nVerticalMask.data(); | 693 uint8_t* pDataV = nVerticalMask.data(); |
| 769 int32_t index = 0; | 694 int32_t index = 0; |
| 770 FX_FLOAT fLineHeight = 0.0f; | 695 FX_FLOAT fLineHeight = 0.0f; |
| 771 if (m_pPage->GetPageObjectList()->empty()) | 696 if (m_pPage->GetPageObjectList()->empty()) |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 898 return w; | 823 return w; |
| 899 | 824 |
| 900 CFX_ByteString str; | 825 CFX_ByteString str; |
| 901 pFont->AppendChar(str, charCode); | 826 pFont->AppendChar(str, charCode); |
| 902 if (int w = pFont->GetStringWidth(str.c_str(), 1)) | 827 if (int w = pFont->GetStringWidth(str.c_str(), 1)) |
| 903 return w; | 828 return w; |
| 904 | 829 |
| 905 return pFont->GetCharBBox(charCode).Width(); | 830 return pFont->GetCharBBox(charCode).Width(); |
| 906 } | 831 } |
| 907 | 832 |
| 908 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { | |
| 909 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); | |
| 910 if (seg.direction == CFX_BidiChar::RIGHT) { | |
| 911 for (int i = seg.start + seg.count; i > seg.start; i--) { | |
| 912 m_TextBuf.AppendChar(str.GetAt(i - 1)); | |
| 913 m_CharList.push_back(m_TempCharList[i - 1]); | |
| 914 } | |
| 915 } else { | |
| 916 for (int i = seg.start; i < seg.start + seg.count; i++) { | |
| 917 m_TextBuf.AppendChar(str.GetAt(i)); | |
| 918 m_CharList.push_back(m_TempCharList[i]); | |
| 919 } | |
| 920 } | |
| 921 } | |
| 922 | |
| 923 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, | 833 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| 924 PAGECHAR_INFO info) { | 834 PAGECHAR_INFO info) { |
| 925 if (!IsControlChar(info)) { | 835 if (!IsControlChar(info)) { |
| 926 info.m_Index = m_TextBuf.GetLength(); | 836 info.m_Index = m_TextBuf.GetLength(); |
| 927 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 837 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| 928 FX_WCHAR* pDst = NULL; | 838 FX_WCHAR* pDst = NULL; |
| 929 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); | 839 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| 930 if (nCount >= 1) { | 840 if (nCount >= 1) { |
| 931 pDst = FX_Alloc(FX_WCHAR, nCount); | 841 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 932 Unicode_GetNormalization(wChar, pDst); | 842 Unicode_GetNormalization(wChar, pDst); |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 989 } | 899 } |
| 990 if (bPrevSpace) { | 900 if (bPrevSpace) { |
| 991 m_TempTextBuf.Delete(i, 1); | 901 m_TempTextBuf.Delete(i, 1); |
| 992 m_TempCharList.erase(m_TempCharList.begin() + i); | 902 m_TempCharList.erase(m_TempCharList.begin() + i); |
| 993 str.Delete(i); | 903 str.Delete(i); |
| 994 i--; | 904 i--; |
| 995 } | 905 } |
| 996 bPrevSpace = TRUE; | 906 bPrevSpace = TRUE; |
| 997 } | 907 } |
| 998 CFX_BidiString bidi(str); | 908 CFX_BidiString bidi(str); |
| 999 if (m_parserflag == FPDFTEXT_RLTB) | 909 if (m_parserflag == FPDFText_Direction::Right) |
| 1000 bidi.SetOverallDirectionRight(); | 910 bidi.SetOverallDirectionRight(); |
| 1001 CFX_BidiChar::Direction eCurrentDirection = bidi.OverallDirection(); | 911 CFX_BidiChar::Direction eCurrentDirection = bidi.OverallDirection(); |
| 1002 for (const auto& segment : bidi) { | 912 for (const auto& segment : bidi) { |
| 1003 if (segment.direction == CFX_BidiChar::RIGHT || | 913 if (segment.direction == CFX_BidiChar::RIGHT || |
| 1004 (segment.direction == CFX_BidiChar::NEUTRAL && | 914 (segment.direction == CFX_BidiChar::NEUTRAL && |
| 1005 eCurrentDirection == CFX_BidiChar::RIGHT)) { | 915 eCurrentDirection == CFX_BidiChar::RIGHT)) { |
| 1006 eCurrentDirection = CFX_BidiChar::RIGHT; | 916 eCurrentDirection = CFX_BidiChar::RIGHT; |
| 1007 for (int m = segment.start + segment.count; m > segment.start; --m) | 917 for (int m = segment.start + segment.count; m > segment.start; --m) |
| 1008 AddCharInfoByRLDirection(bidi.CharAt(m - 1), m_TempCharList[m - 1]); | 918 AddCharInfoByRLDirection(bidi.CharAt(m - 1), m_TempCharList[m - 1]); |
| 1009 } else { | 919 } else { |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1091 m_LineObj.InsertAt(i + 1, Obj); | 1001 m_LineObj.InsertAt(i + 1, Obj); |
| 1092 } | 1002 } |
| 1093 break; | 1003 break; |
| 1094 } | 1004 } |
| 1095 } | 1005 } |
| 1096 if (i < 0) { | 1006 if (i < 0) { |
| 1097 m_LineObj.InsertAt(0, Obj); | 1007 m_LineObj.InsertAt(0, Obj); |
| 1098 } | 1008 } |
| 1099 } | 1009 } |
| 1100 | 1010 |
| 1101 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { | 1011 FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| 1102 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1012 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1103 CPDF_ContentMarkData* pMarkData = | 1013 CPDF_ContentMarkData* pMarkData = |
| 1104 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); | 1014 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1105 if (!pMarkData) | 1015 if (!pMarkData) |
| 1106 return FPDFTEXT_MC_PASS; | 1016 return FPDFText_MarkedContent::Pass; |
| 1107 | 1017 |
| 1108 int nContentMark = pMarkData->CountItems(); | 1018 int nContentMark = pMarkData->CountItems(); |
| 1109 if (nContentMark < 1) | 1019 if (nContentMark < 1) |
| 1110 return FPDFTEXT_MC_PASS; | 1020 return FPDFText_MarkedContent::Pass; |
| 1021 |
| 1111 CFX_WideString actText; | 1022 CFX_WideString actText; |
| 1112 FX_BOOL bExist = FALSE; | 1023 FX_BOOL bExist = FALSE; |
| 1113 CPDF_Dictionary* pDict = NULL; | 1024 CPDF_Dictionary* pDict = NULL; |
| 1114 int n = 0; | 1025 int n = 0; |
| 1115 for (n = 0; n < nContentMark; n++) { | 1026 for (n = 0; n < nContentMark; n++) { |
| 1116 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | 1027 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1117 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) | 1028 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) |
| 1118 continue; | 1029 continue; |
| 1119 pDict = item.GetParam(); | 1030 pDict = item.GetParam(); |
| 1120 CPDF_String* temp = | 1031 CPDF_String* temp = |
| 1121 ToString(pDict ? pDict->GetObjectBy("ActualText") : nullptr); | 1032 ToString(pDict ? pDict->GetObjectBy("ActualText") : nullptr); |
| 1122 if (temp) { | 1033 if (temp) { |
| 1123 bExist = TRUE; | 1034 bExist = TRUE; |
| 1124 actText = temp->GetUnicodeText(); | 1035 actText = temp->GetUnicodeText(); |
| 1125 } | 1036 } |
| 1126 } | 1037 } |
| 1127 if (!bExist) | 1038 if (!bExist) |
| 1128 return FPDFTEXT_MC_PASS; | 1039 return FPDFText_MarkedContent::Pass; |
| 1129 | 1040 |
| 1130 if (m_pPreTextObj) { | 1041 if (m_pPreTextObj) { |
| 1131 CPDF_ContentMarkData* pPreMarkData = | 1042 CPDF_ContentMarkData* pPreMarkData = |
| 1132 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject(); | 1043 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject(); |
| 1133 if (pPreMarkData && pPreMarkData->CountItems() == n && | 1044 if (pPreMarkData && pPreMarkData->CountItems() == n && |
| 1134 pDict == pPreMarkData->GetItem(n - 1).GetParam()) { | 1045 pDict == pPreMarkData->GetItem(n - 1).GetParam()) { |
| 1135 return FPDFTEXT_MC_DONE; | 1046 return FPDFText_MarkedContent::Done; |
| 1136 } | 1047 } |
| 1137 } | 1048 } |
| 1138 FX_STRSIZE nItems = actText.GetLength(); | 1049 FX_STRSIZE nItems = actText.GetLength(); |
| 1139 if (nItems < 1) | 1050 if (nItems < 1) |
| 1140 return FPDFTEXT_MC_PASS; | 1051 return FPDFText_MarkedContent::Pass; |
| 1141 | 1052 |
| 1142 CPDF_Font* pFont = pTextObj->GetFont(); | 1053 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1143 bExist = FALSE; | 1054 bExist = FALSE; |
| 1144 for (FX_STRSIZE i = 0; i < nItems; i++) { | 1055 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1145 if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != | 1056 if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != |
| 1146 CPDF_Font::kInvalidCharCode) { | 1057 CPDF_Font::kInvalidCharCode) { |
| 1147 bExist = TRUE; | 1058 bExist = TRUE; |
| 1148 break; | 1059 break; |
| 1149 } | 1060 } |
| 1150 } | 1061 } |
| 1151 if (!bExist) | 1062 if (!bExist) |
| 1152 return FPDFTEXT_MC_PASS; | 1063 return FPDFText_MarkedContent::Pass; |
| 1153 | 1064 |
| 1154 bExist = FALSE; | 1065 bExist = FALSE; |
| 1155 for (FX_STRSIZE i = 0; i < nItems; i++) { | 1066 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1156 FX_WCHAR wChar = actText.GetAt(i); | 1067 FX_WCHAR wChar = actText.GetAt(i); |
| 1157 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { | 1068 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { |
| 1158 bExist = TRUE; | 1069 bExist = TRUE; |
| 1159 break; | 1070 break; |
| 1160 } | 1071 } |
| 1161 } | 1072 } |
| 1162 if (!bExist) | 1073 if (!bExist) |
| 1163 return FPDFTEXT_MC_DONE; | 1074 return FPDFText_MarkedContent::Done; |
| 1164 | 1075 |
| 1165 return FPDFTEXT_MC_DELAY; | 1076 return FPDFText_MarkedContent::Delay; |
| 1166 } | 1077 } |
| 1167 | 1078 |
| 1168 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { | 1079 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { |
| 1169 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1080 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1170 CPDF_ContentMarkData* pMarkData = | 1081 CPDF_ContentMarkData* pMarkData = |
| 1171 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); | 1082 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1172 if (!pMarkData) | 1083 if (!pMarkData) |
| 1173 return; | 1084 return; |
| 1174 | 1085 |
| 1175 int nContentMark = pMarkData->CountItems(); | 1086 int nContentMark = pMarkData->CountItems(); |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1278 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { | 1189 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
| 1279 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1190 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1280 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { | 1191 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
| 1281 return; | 1192 return; |
| 1282 } | 1193 } |
| 1283 CFX_Matrix formMatrix = Obj.m_formMatrix; | 1194 CFX_Matrix formMatrix = Obj.m_formMatrix; |
| 1284 CPDF_Font* pFont = pTextObj->GetFont(); | 1195 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1285 CFX_Matrix matrix; | 1196 CFX_Matrix matrix; |
| 1286 pTextObj->GetTextMatrix(&matrix); | 1197 pTextObj->GetTextMatrix(&matrix); |
| 1287 matrix.Concat(formMatrix); | 1198 matrix.Concat(formMatrix); |
| 1288 int32_t bPreMKC = PreMarkedContent(Obj); | 1199 FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj); |
| 1289 if (FPDFTEXT_MC_DONE == bPreMKC) { | 1200 if (ePreMKC == FPDFText_MarkedContent::Done) { |
| 1290 m_pPreTextObj = pTextObj; | 1201 m_pPreTextObj = pTextObj; |
| 1291 m_perMatrix.Copy(formMatrix); | 1202 m_perMatrix.Copy(formMatrix); |
| 1292 return; | 1203 return; |
| 1293 } | 1204 } |
| 1294 int result = 0; | 1205 int result = 0; |
| 1295 if (m_pPreTextObj) { | 1206 if (m_pPreTextObj) { |
| 1296 result = ProcessInsertObject(pTextObj, formMatrix); | 1207 result = ProcessInsertObject(pTextObj, formMatrix); |
| 1297 if (2 == result) { | 1208 if (2 == result) { |
| 1298 m_CurlineRect = | 1209 m_CurlineRect = |
| 1299 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1210 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1355 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1266 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1356 charinfo->m_Unicode = 0x2; | 1267 charinfo->m_Unicode = 0x2; |
| 1357 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; | 1268 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; |
| 1358 m_TempTextBuf.AppendChar(0xfffe); | 1269 m_TempTextBuf.AppendChar(0xfffe); |
| 1359 } | 1270 } |
| 1360 } else { | 1271 } else { |
| 1361 m_CurlineRect = | 1272 m_CurlineRect = |
| 1362 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1273 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
| 1363 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | 1274 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
| 1364 } | 1275 } |
| 1365 if (FPDFTEXT_MC_DELAY == bPreMKC) { | 1276 if (ePreMKC == FPDFText_MarkedContent::Delay) { |
| 1366 ProcessMarkedContent(Obj); | 1277 ProcessMarkedContent(Obj); |
| 1367 m_pPreTextObj = pTextObj; | 1278 m_pPreTextObj = pTextObj; |
| 1368 m_perMatrix.Copy(formMatrix); | 1279 m_perMatrix.Copy(formMatrix); |
| 1369 return; | 1280 return; |
| 1370 } | 1281 } |
| 1371 m_pPreTextObj = pTextObj; | 1282 m_pPreTextObj = pTextObj; |
| 1372 m_perMatrix.Copy(formMatrix); | 1283 m_perMatrix.Copy(formMatrix); |
| 1373 int nItems = pTextObj->CountItems(); | 1284 int nItems = pTextObj->CountItems(); |
| 1374 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix); | 1285 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix); |
| 1375 | 1286 |
| (...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1872 return TRUE; | 1783 return TRUE; |
| 1873 } | 1784 } |
| 1874 | 1785 |
| 1875 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, | 1786 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
| 1876 const CFX_FloatRect& rect2) { | 1787 const CFX_FloatRect& rect2) { |
| 1877 CFX_FloatRect rect = rect1; | 1788 CFX_FloatRect rect = rect1; |
| 1878 rect.Intersect(rect2); | 1789 rect.Intersect(rect2); |
| 1879 return !rect.IsEmpty(); | 1790 return !rect.IsEmpty(); |
| 1880 } | 1791 } |
| 1881 | 1792 |
| 1882 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { | |
| 1883 if (unicode < L'A') { | |
| 1884 return FALSE; | |
| 1885 } | |
| 1886 if (unicode > L'Z' && unicode < L'a') { | |
| 1887 return FALSE; | |
| 1888 } | |
| 1889 if (unicode > L'z') { | |
| 1890 return FALSE; | |
| 1891 } | |
| 1892 return TRUE; | |
| 1893 } | |
| 1894 | |
| 1895 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) | 1793 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) |
| 1896 : m_pTextPage(pTextPage), | 1794 : m_pTextPage(pTextPage), |
| 1897 m_flags(0), | 1795 m_flags(0), |
| 1898 m_findNextStart(-1), | 1796 m_findNextStart(-1), |
| 1899 m_findPreStart(-1), | 1797 m_findPreStart(-1), |
| 1900 m_bMatchCase(FALSE), | 1798 m_bMatchCase(FALSE), |
| 1901 m_bMatchWholeWord(FALSE), | 1799 m_bMatchWholeWord(FALSE), |
| 1902 m_resStart(0), | 1800 m_resStart(0), |
| 1903 m_resEnd(-1), | 1801 m_resEnd(-1), |
| 1904 m_IsFind(FALSE) { | 1802 m_IsFind(FALSE) { |
| (...skipping 537 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2442 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; | 2340 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
| 2443 } | 2341 } |
| 2444 | 2342 |
| 2445 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { | 2343 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { |
| 2446 if (index >= m_LinkArray.size()) | 2344 if (index >= m_LinkArray.size()) |
| 2447 return std::vector<CFX_FloatRect>(); | 2345 return std::vector<CFX_FloatRect>(); |
| 2448 | 2346 |
| 2449 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, | 2347 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
| 2450 m_LinkArray[index].m_Count); | 2348 m_LinkArray[index].m_Count); |
| 2451 } | 2349 } |
| OLD | NEW |