| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #ifndef CORE_FPDFTEXT_TEXT_INT_H_ | |
| 8 #define CORE_FPDFTEXT_TEXT_INT_H_ | |
| 9 | |
| 10 #include <deque> | |
| 11 #include <vector> | |
| 12 | |
| 13 #include "core/fpdftext/include/ipdf_linkextract.h" | |
| 14 #include "core/fpdftext/include/ipdf_textpage.h" | |
| 15 #include "core/fpdftext/include/ipdf_textpagefind.h" | |
| 16 #include "core/include/fpdfapi/fpdf_page.h" | |
| 17 #include "core/include/fxcrt/fx_basic.h" | |
| 18 | |
| 19 class CFX_BidiChar; | |
| 20 class CPDF_FormObject; | |
| 21 class CPDF_LinkExtract; | |
| 22 class CPDF_TextPageFind; | |
| 23 | |
| 24 #define FPDFTEXT_CHAR_ERROR -1 | |
| 25 #define FPDFTEXT_CHAR_NORMAL 0 | |
| 26 #define FPDFTEXT_CHAR_GENERATED 1 | |
| 27 #define FPDFTEXT_CHAR_UNUNICODE 2 | |
| 28 #define FPDFTEXT_CHAR_HYPHEN 3 | |
| 29 #define FPDFTEXT_CHAR_PIECE 4 | |
| 30 #define FPDFTEXT_MC_PASS 0 | |
| 31 #define FPDFTEXT_MC_DONE 1 | |
| 32 #define FPDFTEXT_MC_DELAY 2 | |
| 33 | |
| 34 struct PAGECHAR_INFO { | |
| 35 int m_CharCode; | |
| 36 FX_WCHAR m_Unicode; | |
| 37 FX_FLOAT m_OriginX; | |
| 38 FX_FLOAT m_OriginY; | |
| 39 int32_t m_Flag; | |
| 40 CFX_FloatRect m_CharBox; | |
| 41 CPDF_TextObject* m_pTextObj; | |
| 42 CFX_Matrix m_Matrix; | |
| 43 int m_Index; | |
| 44 }; | |
| 45 | |
| 46 struct FPDF_SEGMENT { | |
| 47 int m_Start; | |
| 48 int m_nCount; | |
| 49 }; | |
| 50 | |
| 51 struct PDFTEXT_Obj { | |
| 52 CPDF_TextObject* m_pTextObj; | |
| 53 CFX_Matrix m_formMatrix; | |
| 54 }; | |
| 55 | |
| 56 class CPDF_TextPage : public IPDF_TextPage { | |
| 57 public: | |
| 58 CPDF_TextPage(const CPDF_Page* pPage, int flags); | |
| 59 ~CPDF_TextPage() override {} | |
| 60 | |
| 61 // IPDF_TextPage: | |
| 62 void ParseTextPage() override; | |
| 63 bool IsParsed() const override { return m_bIsParsed; } | |
| 64 int CharIndexFromTextIndex(int TextIndex) const override; | |
| 65 int TextIndexFromCharIndex(int CharIndex) const override; | |
| 66 int CountChars() const override; | |
| 67 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override; | |
| 68 void GetRectArray(int start, | |
| 69 int nCount, | |
| 70 CFX_RectArray& rectArray) const override; | |
| 71 int GetIndexAtPos(CFX_FloatPoint point, | |
| 72 FX_FLOAT xTolerance, | |
| 73 FX_FLOAT yTolerance) const override; | |
| 74 int GetIndexAtPos(FX_FLOAT x, | |
| 75 FX_FLOAT y, | |
| 76 FX_FLOAT xTolerance, | |
| 77 FX_FLOAT yTolerance) const override; | |
| 78 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; | |
| 79 void GetRectsArrayByRect(const CFX_FloatRect& rect, | |
| 80 CFX_RectArray& resRectArray) const override; | |
| 81 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; | |
| 82 int CountRects(int start, int nCount) override; | |
| 83 void GetRect(int rectIndex, | |
| 84 FX_FLOAT& left, | |
| 85 FX_FLOAT& top, | |
| 86 FX_FLOAT& right, | |
| 87 FX_FLOAT& bottom) const override; | |
| 88 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; | |
| 89 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; | |
| 90 int CountBoundedSegments(FX_FLOAT left, | |
| 91 FX_FLOAT top, | |
| 92 FX_FLOAT right, | |
| 93 FX_FLOAT bottom, | |
| 94 FX_BOOL bContains = FALSE) override; | |
| 95 void GetBoundedSegment(int index, int& start, int& count) const override; | |
| 96 int GetWordBreak(int index, int direction) const override; | |
| 97 | |
| 98 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | |
| 99 const CFX_FloatRect& rect2); | |
| 100 static FX_BOOL IsLetter(FX_WCHAR unicode); | |
| 101 | |
| 102 private: | |
| 103 FX_BOOL IsHyphen(FX_WCHAR curChar); | |
| 104 bool IsControlChar(const PAGECHAR_INFO& charInfo); | |
| 105 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | |
| 106 void ProcessObject(); | |
| 107 void ProcessFormObject(CPDF_FormObject* pFormObj, | |
| 108 const CFX_Matrix& formMatrix); | |
| 109 void ProcessTextObject(PDFTEXT_Obj pObj); | |
| 110 void ProcessTextObject(CPDF_TextObject* pTextObj, | |
| 111 const CFX_Matrix& formMatrix, | |
| 112 const CPDF_PageObjectList* pObjList, | |
| 113 CPDF_PageObjectList::const_iterator ObjPos); | |
| 114 int ProcessInsertObject(const CPDF_TextObject* pObj, | |
| 115 const CFX_Matrix& formMatrix); | |
| 116 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); | |
| 117 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | |
| 118 const CPDF_PageObjectList* pObjList, | |
| 119 CPDF_PageObjectList::const_iterator ObjPos); | |
| 120 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, | |
| 121 CPDF_TextObject* pTextObj2); | |
| 122 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; | |
| 123 void CloseTempLine(); | |
| 124 void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); | |
| 125 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | |
| 126 void ProcessMarkedContent(PDFTEXT_Obj pObj); | |
| 127 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; | |
| 128 void FindPreviousTextObject(void); | |
| 129 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
| 130 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
| 131 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | |
| 132 int32_t FindTextlineFlowDirection(); | |
| 133 | |
| 134 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | |
| 135 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | |
| 136 const CPDF_Font* pFont, | |
| 137 int nItems) const; | |
| 138 | |
| 139 const CPDF_Page* const m_pPage; | |
| 140 std::vector<FX_WORD> m_CharIndex; | |
| 141 std::deque<PAGECHAR_INFO> m_CharList; | |
| 142 std::deque<PAGECHAR_INFO> m_TempCharList; | |
| 143 CFX_WideTextBuf m_TextBuf; | |
| 144 CFX_WideTextBuf m_TempTextBuf; | |
| 145 const int m_parserflag; | |
| 146 CPDF_TextObject* m_pPreTextObj; | |
| 147 CFX_Matrix m_perMatrix; | |
| 148 bool m_bIsParsed; | |
| 149 CFX_Matrix m_DisplayMatrix; | |
| 150 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; | |
| 151 CFX_RectArray m_SelRects; | |
| 152 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; | |
| 153 int32_t m_TextlineDir; | |
| 154 CFX_FloatRect m_CurlineRect; | |
| 155 }; | |
| 156 | |
| 157 class CPDF_TextPageFind : public IPDF_TextPageFind { | |
| 158 public: | |
| 159 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | |
| 160 ~CPDF_TextPageFind() override {} | |
| 161 | |
| 162 // IPDF_TextPageFind | |
| 163 FX_BOOL FindFirst(const CFX_WideString& findwhat, | |
| 164 int flags, | |
| 165 int startPos = 0) override; | |
| 166 FX_BOOL FindNext() override; | |
| 167 FX_BOOL FindPrev() override; | |
| 168 void GetRectArray(CFX_RectArray& rects) const override; | |
| 169 int GetCurOrder() const override; | |
| 170 int GetMatchedCount() const override; | |
| 171 | |
| 172 protected: | |
| 173 void ExtractFindWhat(const CFX_WideString& findwhat); | |
| 174 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | |
| 175 int startPos, | |
| 176 int endPos); | |
| 177 FX_BOOL ExtractSubString(CFX_WideString& rString, | |
| 178 const FX_WCHAR* lpszFullString, | |
| 179 int iSubString, | |
| 180 FX_WCHAR chSep); | |
| 181 CFX_WideString MakeReverse(const CFX_WideString& str); | |
| 182 int ReverseFind(const CFX_WideString& csPageText, | |
| 183 const CFX_WideString& csWord, | |
| 184 int nStartPos, | |
| 185 int& WordLength); | |
| 186 int GetCharIndex(int index) const; | |
| 187 | |
| 188 private: | |
| 189 std::vector<FX_WORD> m_CharIndex; | |
| 190 const IPDF_TextPage* m_pTextPage; | |
| 191 CFX_WideString m_strText; | |
| 192 CFX_WideString m_findWhat; | |
| 193 int m_flags; | |
| 194 std::vector<CFX_WideString> m_csFindWhatArray; | |
| 195 int m_findNextStart; | |
| 196 int m_findPreStart; | |
| 197 FX_BOOL m_bMatchCase; | |
| 198 FX_BOOL m_bMatchWholeWord; | |
| 199 int m_resStart; | |
| 200 int m_resEnd; | |
| 201 CFX_RectArray m_resArray; | |
| 202 FX_BOOL m_IsFind; | |
| 203 }; | |
| 204 | |
| 205 class CPDF_LinkExt { | |
| 206 public: | |
| 207 CPDF_LinkExt() {} | |
| 208 int m_Start; | |
| 209 int m_Count; | |
| 210 CFX_WideString m_strUrl; | |
| 211 virtual ~CPDF_LinkExt() {} | |
| 212 }; | |
| 213 | |
| 214 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | |
| 215 | |
| 216 class CPDF_LinkExtract : public IPDF_LinkExtract { | |
| 217 public: | |
| 218 CPDF_LinkExtract(); | |
| 219 ~CPDF_LinkExtract() override; | |
| 220 | |
| 221 // IPDF_LinkExtract | |
| 222 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; | |
| 223 int CountLinks() const override; | |
| 224 CFX_WideString GetURL(int index) const override; | |
| 225 void GetBoundedSegment(int index, int& start, int& count) const override; | |
| 226 void GetRects(int index, CFX_RectArray& rects) const override; | |
| 227 | |
| 228 FX_BOOL IsExtract() const { return m_bIsParsed; } | |
| 229 | |
| 230 protected: | |
| 231 void ParseLink(); | |
| 232 void DeleteLinkList(); | |
| 233 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | |
| 234 bool CheckMailLink(CFX_WideString& str); | |
| 235 void AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | |
| 236 | |
| 237 private: | |
| 238 LINK_InfoArray m_LinkList; | |
| 239 const CPDF_TextPage* m_pTextPage; | |
| 240 CFX_WideString m_strPageText; | |
| 241 bool m_bIsParsed; | |
| 242 }; | |
| 243 | |
| 244 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | |
| 245 | |
| 246 #endif // CORE_FPDFTEXT_TEXT_INT_H_ | |
| OLD | NEW |