| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_FPDFTEXT_FPDF_TEXT_INT_H_ | 7 #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 8 #define CORE_FPDFTEXT_FPDF_TEXT_INT_H_ | 8 #define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 9 | 9 |
| 10 #include <deque> | 10 #include <deque> |
| 11 #include <vector> | 11 #include <vector> |
| 12 | 12 |
| 13 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" | 13 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" |
| 14 #include "core/fpdfapi/fpdf_page/include/cpdf_form.h" | |
| 15 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h" | |
| 16 #include "core/fpdftext/include/ipdf_linkextract.h" | |
| 17 #include "core/fpdftext/include/ipdf_textpage.h" | |
| 18 #include "core/fpdftext/include/ipdf_textpagefind.h" | |
| 19 #include "core/fxcrt/include/fx_basic.h" | 14 #include "core/fxcrt/include/fx_basic.h" |
| 15 #include "core/fxcrt/include/fx_coordinates.h" |
| 16 #include "core/fxcrt/include/fx_string.h" |
| 20 | 17 |
| 21 class CFX_BidiChar; | 18 class CFX_BidiChar; |
| 19 class CPDF_Font; |
| 22 class CPDF_FormObject; | 20 class CPDF_FormObject; |
| 23 class CPDF_LinkExtract; | 21 class CPDF_Page; |
| 24 class CPDF_TextPageFind; | 22 class CPDF_TextObject; |
| 25 class CPDF_Font; | |
| 26 | 23 |
| 27 #define FPDFTEXT_CHAR_ERROR -1 | 24 struct FPDF_CHAR_INFO { |
| 28 #define FPDFTEXT_CHAR_NORMAL 0 | |
| 29 #define FPDFTEXT_CHAR_GENERATED 1 | |
| 30 #define FPDFTEXT_CHAR_UNUNICODE 2 | |
| 31 #define FPDFTEXT_CHAR_HYPHEN 3 | |
| 32 #define FPDFTEXT_CHAR_PIECE 4 | |
| 33 #define FPDFTEXT_MC_PASS 0 | |
| 34 #define FPDFTEXT_MC_DONE 1 | |
| 35 #define FPDFTEXT_MC_DELAY 2 | |
| 36 | |
| 37 struct PAGECHAR_INFO { | |
| 38 int m_CharCode; | |
| 39 FX_WCHAR m_Unicode; | 25 FX_WCHAR m_Unicode; |
| 26 FX_WCHAR m_Charcode; |
| 27 int32_t m_Flag; |
| 28 FX_FLOAT m_FontSize; |
| 40 FX_FLOAT m_OriginX; | 29 FX_FLOAT m_OriginX; |
| 41 FX_FLOAT m_OriginY; | 30 FX_FLOAT m_OriginY; |
| 42 int32_t m_Flag; | |
| 43 CFX_FloatRect m_CharBox; | 31 CFX_FloatRect m_CharBox; |
| 44 CPDF_TextObject* m_pTextObj; | 32 CPDF_TextObject* m_pTextObj; |
| 45 CFX_Matrix m_Matrix; | 33 CFX_Matrix m_Matrix; |
| 46 int m_Index; | |
| 47 }; | 34 }; |
| 48 | 35 |
| 49 struct FPDF_SEGMENT { | 36 struct FPDF_SEGMENT { |
| 50 int m_Start; | 37 int m_Start; |
| 51 int m_nCount; | 38 int m_nCount; |
| 52 }; | 39 }; |
| 53 | 40 |
| 41 struct PAGECHAR_INFO { |
| 42 int m_CharCode; |
| 43 FX_WCHAR m_Unicode; |
| 44 FX_FLOAT m_OriginX; |
| 45 FX_FLOAT m_OriginY; |
| 46 int32_t m_Flag; |
| 47 CFX_FloatRect m_CharBox; |
| 48 CPDF_TextObject* m_pTextObj; |
| 49 CFX_Matrix m_Matrix; |
| 50 int m_Index; |
| 51 }; |
| 52 |
| 54 struct PDFTEXT_Obj { | 53 struct PDFTEXT_Obj { |
| 55 CPDF_TextObject* m_pTextObj; | 54 CPDF_TextObject* m_pTextObj; |
| 56 CFX_Matrix m_formMatrix; | 55 CFX_Matrix m_formMatrix; |
| 57 }; | 56 }; |
| 58 | 57 |
| 59 class CPDF_TextPage : public IPDF_TextPage { | 58 class CPDF_TextPage { |
| 60 public: | 59 public: |
| 61 CPDF_TextPage(const CPDF_Page* pPage, int flags); | 60 CPDF_TextPage(const CPDF_Page* pPage, int flags); |
| 62 ~CPDF_TextPage() override {} | 61 ~CPDF_TextPage() {} |
| 63 | 62 |
| 64 // IPDF_TextPage: | 63 // IPDF_TextPage: |
| 65 void ParseTextPage() override; | 64 void ParseTextPage(); |
| 66 bool IsParsed() const override { return m_bIsParsed; } | 65 bool IsParsed() const { return m_bIsParsed; } |
| 67 int CharIndexFromTextIndex(int TextIndex) const override; | 66 int CharIndexFromTextIndex(int TextIndex) const; |
| 68 int TextIndexFromCharIndex(int CharIndex) const override; | 67 int TextIndexFromCharIndex(int CharIndex) const; |
| 69 int CountChars() const override; | 68 int CountChars() const; |
| 70 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override; | 69 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; |
| 71 void GetRectArray(int start, | 70 void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; |
| 72 int nCount, | |
| 73 CFX_RectArray& rectArray) const override; | |
| 74 int GetIndexAtPos(CFX_FloatPoint point, | 71 int GetIndexAtPos(CFX_FloatPoint point, |
| 75 FX_FLOAT xTolerance, | 72 FX_FLOAT xTolerance, |
| 76 FX_FLOAT yTolerance) const override; | 73 FX_FLOAT yTolerance) const; |
| 77 int GetIndexAtPos(FX_FLOAT x, | 74 int GetIndexAtPos(FX_FLOAT x, |
| 78 FX_FLOAT y, | 75 FX_FLOAT y, |
| 79 FX_FLOAT xTolerance, | 76 FX_FLOAT xTolerance, |
| 80 FX_FLOAT yTolerance) const override; | 77 FX_FLOAT yTolerance) const; |
| 81 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; | 78 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; |
| 82 void GetRectsArrayByRect(const CFX_FloatRect& rect, | 79 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 83 CFX_RectArray& resRectArray) const override; | 80 CFX_RectArray& resRectArray) const; |
| 84 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; | 81 CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
| 85 int CountRects(int start, int nCount) override; | 82 int CountRects(int start, int nCount); |
| 86 void GetRect(int rectIndex, | 83 void GetRect(int rectIndex, |
| 87 FX_FLOAT& left, | 84 FX_FLOAT& left, |
| 88 FX_FLOAT& top, | 85 FX_FLOAT& top, |
| 89 FX_FLOAT& right, | 86 FX_FLOAT& right, |
| 90 FX_FLOAT& bottom) const override; | 87 FX_FLOAT& bottom) const; |
| 91 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; | 88 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
| 92 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; | 89 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); |
| 93 int CountBoundedSegments(FX_FLOAT left, | 90 int CountBoundedSegments(FX_FLOAT left, |
| 94 FX_FLOAT top, | 91 FX_FLOAT top, |
| 95 FX_FLOAT right, | 92 FX_FLOAT right, |
| 96 FX_FLOAT bottom, | 93 FX_FLOAT bottom, |
| 97 FX_BOOL bContains = FALSE) override; | 94 FX_BOOL bContains = FALSE); |
| 98 void GetBoundedSegment(int index, int& start, int& count) const override; | 95 void GetBoundedSegment(int index, int& start, int& count) const; |
| 99 int GetWordBreak(int index, int direction) const override; | 96 int GetWordBreak(int index, int direction) const; |
| 100 | 97 |
| 101 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 98 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 102 const CFX_FloatRect& rect2); | 99 const CFX_FloatRect& rect2); |
| 103 static FX_BOOL IsLetter(FX_WCHAR unicode); | 100 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 104 | 101 |
| 105 private: | 102 private: |
| 106 FX_BOOL IsHyphen(FX_WCHAR curChar); | 103 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 107 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 104 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 108 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 105 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 109 void ProcessObject(); | 106 void ProcessObject(); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 CFX_Matrix m_perMatrix; | 147 CFX_Matrix m_perMatrix; |
| 151 bool m_bIsParsed; | 148 bool m_bIsParsed; |
| 152 CFX_Matrix m_DisplayMatrix; | 149 CFX_Matrix m_DisplayMatrix; |
| 153 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; | 150 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
| 154 CFX_RectArray m_SelRects; | 151 CFX_RectArray m_SelRects; |
| 155 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; | 152 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
| 156 int32_t m_TextlineDir; | 153 int32_t m_TextlineDir; |
| 157 CFX_FloatRect m_CurlineRect; | 154 CFX_FloatRect m_CurlineRect; |
| 158 }; | 155 }; |
| 159 | 156 |
| 160 class CPDF_TextPageFind : public IPDF_TextPageFind { | 157 #endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 161 public: | |
| 162 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | |
| 163 ~CPDF_TextPageFind() override {} | |
| 164 | |
| 165 // IPDF_TextPageFind | |
| 166 FX_BOOL FindFirst(const CFX_WideString& findwhat, | |
| 167 int flags, | |
| 168 int startPos = 0) override; | |
| 169 FX_BOOL FindNext() override; | |
| 170 FX_BOOL FindPrev() override; | |
| 171 void GetRectArray(CFX_RectArray& rects) const override; | |
| 172 int GetCurOrder() const override; | |
| 173 int GetMatchedCount() const override; | |
| 174 | |
| 175 protected: | |
| 176 void ExtractFindWhat(const CFX_WideString& findwhat); | |
| 177 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | |
| 178 int startPos, | |
| 179 int endPos); | |
| 180 FX_BOOL ExtractSubString(CFX_WideString& rString, | |
| 181 const FX_WCHAR* lpszFullString, | |
| 182 int iSubString, | |
| 183 FX_WCHAR chSep); | |
| 184 CFX_WideString MakeReverse(const CFX_WideString& str); | |
| 185 int ReverseFind(const CFX_WideString& csPageText, | |
| 186 const CFX_WideString& csWord, | |
| 187 int nStartPos, | |
| 188 int& WordLength); | |
| 189 int GetCharIndex(int index) const; | |
| 190 | |
| 191 private: | |
| 192 std::vector<uint16_t> m_CharIndex; | |
| 193 const IPDF_TextPage* m_pTextPage; | |
| 194 CFX_WideString m_strText; | |
| 195 CFX_WideString m_findWhat; | |
| 196 int m_flags; | |
| 197 std::vector<CFX_WideString> m_csFindWhatArray; | |
| 198 int m_findNextStart; | |
| 199 int m_findPreStart; | |
| 200 FX_BOOL m_bMatchCase; | |
| 201 FX_BOOL m_bMatchWholeWord; | |
| 202 int m_resStart; | |
| 203 int m_resEnd; | |
| 204 CFX_RectArray m_resArray; | |
| 205 FX_BOOL m_IsFind; | |
| 206 }; | |
| 207 | |
| 208 class CPDF_LinkExt { | |
| 209 public: | |
| 210 CPDF_LinkExt() {} | |
| 211 int m_Start; | |
| 212 int m_Count; | |
| 213 CFX_WideString m_strUrl; | |
| 214 virtual ~CPDF_LinkExt() {} | |
| 215 }; | |
| 216 | |
| 217 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | |
| 218 | |
| 219 class CPDF_LinkExtract : public IPDF_LinkExtract { | |
| 220 public: | |
| 221 CPDF_LinkExtract(); | |
| 222 ~CPDF_LinkExtract() override; | |
| 223 | |
| 224 // IPDF_LinkExtract | |
| 225 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; | |
| 226 int CountLinks() const override; | |
| 227 CFX_WideString GetURL(int index) const override; | |
| 228 void GetBoundedSegment(int index, int& start, int& count) const override; | |
| 229 void GetRects(int index, CFX_RectArray& rects) const override; | |
| 230 | |
| 231 FX_BOOL IsExtract() const { return m_bIsParsed; } | |
| 232 | |
| 233 protected: | |
| 234 void ParseLink(); | |
| 235 void DeleteLinkList(); | |
| 236 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | |
| 237 bool CheckMailLink(CFX_WideString& str); | |
| 238 void AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | |
| 239 | |
| 240 private: | |
| 241 LINK_InfoArray m_LinkList; | |
| 242 const CPDF_TextPage* m_pTextPage; | |
| 243 CFX_WideString m_strPageText; | |
| 244 bool m_bIsParsed; | |
| 245 }; | |
| 246 | |
| 247 #endif // CORE_FPDFTEXT_FPDF_TEXT_INT_H_ | |
| OLD | NEW |