| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 9 | 9 |
| 10 class CPDF_TextPage; | |
| 11 class CPDF_LinkExtract; | 10 class CPDF_LinkExtract; |
| 12 class CPDF_TextPageFind; | 11 class CPDF_TextPageFind; |
| 13 class CPDF_DocProgressiveSearch; | 12 class CPDF_DocProgressiveSearch; |
| 14 #define FPDFTEXT_CHAR_ERROR -1 | 13 #define FPDFTEXT_CHAR_ERROR -1 |
| 15 #define FPDFTEXT_CHAR_NORMAL 0 | 14 #define FPDFTEXT_CHAR_NORMAL 0 |
| 16 #define FPDFTEXT_CHAR_GENERATED 1 | 15 #define FPDFTEXT_CHAR_GENERATED 1 |
| 17 #define FPDFTEXT_CHAR_UNUNICODE 2 | 16 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 18 #define FPDFTEXT_CHAR_HYPHEN 3 | 17 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 19 #define FPDFTEXT_CHAR_PIECE 4 | 18 #define FPDFTEXT_CHAR_PIECE 4 |
| 20 #define FPDFTEXT_MC_PASS 0 | 19 #define FPDFTEXT_MC_PASS 0 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 35 typedef struct { | 34 typedef struct { |
| 36 int m_Start; | 35 int m_Start; |
| 37 int m_nCount; | 36 int m_nCount; |
| 38 } FPDF_SEGMENT; | 37 } FPDF_SEGMENT; |
| 39 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 38 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; |
| 40 typedef struct { | 39 typedef struct { |
| 41 CPDF_TextObject* m_pTextObj; | 40 CPDF_TextObject* m_pTextObj; |
| 42 CFX_AffineMatrix m_formMatrix; | 41 CFX_AffineMatrix m_formMatrix; |
| 43 } PDFTEXT_Obj; | 42 } PDFTEXT_Obj; |
| 44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 43 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 44 |
| 45 class CPDF_TextPage : public IPDF_TextPage { | 45 class CPDF_TextPage : public IPDF_TextPage { |
| 46 public: | 46 public: |
| 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
| 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
| 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
| 50 virtual FX_BOOL ParseTextPage(); | 50 ~CPDF_TextPage() override{}; |
| 51 virtual void NormalizeObjects(FX_BOOL bNormalize); | |
| 52 virtual FX_BOOL IsParsered() const { return m_IsParsered; } | |
| 53 virtual ~CPDF_TextPage(){}; | |
| 54 | 51 |
| 55 public: | 52 // IPDF_TextPage |
| 56 virtual int CharIndexFromTextIndex(int TextIndex) const; | 53 FX_BOOL ParseTextPage() override; |
| 57 virtual int TextIndexFromCharIndex(int CharIndex) const; | 54 void NormalizeObjects(FX_BOOL bNormalize) override; |
| 58 virtual int CountChars() const; | 55 FX_BOOL IsParsered() const override { return m_IsParsered; } |
| 59 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const; | 56 int CharIndexFromTextIndex(int TextIndex) const override; |
| 60 virtual void GetRectArray(int start, | 57 int TextIndexFromCharIndex(int CharIndex) const override; |
| 61 int nCount, | 58 int CountChars() const override; |
| 62 CFX_RectArray& rectArray) const; | 59 void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override; |
| 63 virtual int GetIndexAtPos(CPDF_Point point, | 60 void GetRectArray(int start, |
| 64 FX_FLOAT xTolerance, | 61 int nCount, |
| 65 FX_FLOAT yTolerance) const; | 62 CFX_RectArray& rectArray) const override; |
| 66 virtual int GetIndexAtPos(FX_FLOAT x, | 63 int GetIndexAtPos(CPDF_Point point, |
| 67 FX_FLOAT y, | 64 FX_FLOAT xTolerance, |
| 68 FX_FLOAT xTolerance, | 65 FX_FLOAT yTolerance) const override; |
| 69 FX_FLOAT yTolerance) const; | 66 int GetIndexAtPos(FX_FLOAT x, |
| 70 virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | 67 FX_FLOAT y, |
| 71 virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, | 68 FX_FLOAT xTolerance, |
| 72 CFX_RectArray& resRectArray) const; | 69 FX_FLOAT yTolerance) const override; |
| 73 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | 70 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; |
| 71 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 72 CFX_RectArray& resRectArray) const override; |
| 73 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; |
| 74 int CountRects(int start, int nCount) override; |
| 75 void GetRect(int rectIndex, |
| 76 FX_FLOAT& left, |
| 77 FX_FLOAT& top, |
| 78 FX_FLOAT& right, |
| 79 FX_FLOAT& bottom) const override; |
| 80 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
| 81 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
| 82 int CountBoundedSegments(FX_FLOAT left, |
| 83 FX_FLOAT top, |
| 84 FX_FLOAT right, |
| 85 FX_FLOAT bottom, |
| 86 FX_BOOL bContains = FALSE) override; |
| 87 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 88 int GetWordBreak(int index, int direction) const override; |
| 74 | 89 |
| 75 virtual int CountRects(int start, int nCount); | |
| 76 virtual void GetRect(int rectIndex, | |
| 77 FX_FLOAT& left, | |
| 78 FX_FLOAT& top, | |
| 79 FX_FLOAT& right, | |
| 80 FX_FLOAT& bottom) const; | |
| 81 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); | |
| 82 virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); | |
| 83 virtual int CountBoundedSegments(FX_FLOAT left, | |
| 84 FX_FLOAT top, | |
| 85 FX_FLOAT right, | |
| 86 FX_FLOAT bottom, | |
| 87 FX_BOOL bContains = FALSE); | |
| 88 virtual void GetBoundedSegment(int index, int& start, int& count) const; | |
| 89 virtual int GetWordBreak(int index, int direction) const; | |
| 90 | |
| 91 public: | |
| 92 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | 90 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } |
| 93 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 91 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 94 const CFX_FloatRect& rect2); | 92 const CFX_FloatRect& rect2); |
| 95 static FX_BOOL IsLetter(FX_WCHAR unicode); | 93 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 96 | 94 |
| 97 private: | 95 private: |
| 98 FX_BOOL IsHyphen(FX_WCHAR curChar); | 96 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 99 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 97 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 100 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 98 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 101 void ProcessObject(); | 99 void ProcessObject(); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 CFX_AffineMatrix m_perMatrix; | 138 CFX_AffineMatrix m_perMatrix; |
| 141 FX_BOOL m_IsParsered; | 139 FX_BOOL m_IsParsered; |
| 142 CFX_AffineMatrix m_DisplayMatrix; | 140 CFX_AffineMatrix m_DisplayMatrix; |
| 143 | 141 |
| 144 SEGMENT_Array m_Segment; | 142 SEGMENT_Array m_Segment; |
| 145 CFX_RectArray m_SelRects; | 143 CFX_RectArray m_SelRects; |
| 146 LINEOBJ m_LineObj; | 144 LINEOBJ m_LineObj; |
| 147 int32_t m_TextlineDir; | 145 int32_t m_TextlineDir; |
| 148 CFX_FloatRect m_CurlineRect; | 146 CFX_FloatRect m_CurlineRect; |
| 149 }; | 147 }; |
| 148 |
| 150 class CPDF_TextPageFind : public IPDF_TextPageFind { | 149 class CPDF_TextPageFind : public IPDF_TextPageFind { |
| 151 public: | 150 public: |
| 152 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 151 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
| 153 virtual ~CPDF_TextPageFind(){}; | 152 ~CPDF_TextPageFind() override{}; |
| 154 | 153 |
| 155 public: | 154 // IPDF_TextPageFind |
| 156 virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, | 155 FX_BOOL FindFirst(const CFX_WideString& findwhat, |
| 157 int flags, | 156 int flags, |
| 158 int startPos = 0); | 157 int startPos = 0) override; |
| 159 virtual FX_BOOL FindNext(); | 158 FX_BOOL FindNext() override; |
| 160 virtual FX_BOOL FindPrev(); | 159 FX_BOOL FindPrev() override; |
| 161 | 160 void GetRectArray(CFX_RectArray& rects) const override; |
| 162 virtual void GetRectArray(CFX_RectArray& rects) const; | 161 int GetCurOrder() const override; |
| 163 virtual int GetCurOrder() const; | 162 int GetMatchedCount() const override; |
| 164 virtual int GetMatchedCount() const; | |
| 165 | 163 |
| 166 protected: | 164 protected: |
| 167 void ExtractFindWhat(const CFX_WideString& findwhat); | 165 void ExtractFindWhat(const CFX_WideString& findwhat); |
| 168 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | 166 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, |
| 169 int startPos, | 167 int startPos, |
| 170 int endPos); | 168 int endPos); |
| 171 FX_BOOL ExtractSubString(CFX_WideString& rString, | 169 FX_BOOL ExtractSubString(CFX_WideString& rString, |
| 172 const FX_WCHAR* lpszFullString, | 170 const FX_WCHAR* lpszFullString, |
| 173 int iSubString, | 171 int iSubString, |
| 174 FX_WCHAR chSep); | 172 FX_WCHAR chSep); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 196 FX_BOOL m_IsFind; | 194 FX_BOOL m_IsFind; |
| 197 }; | 195 }; |
| 198 class CPDF_LinkExt { | 196 class CPDF_LinkExt { |
| 199 public: | 197 public: |
| 200 CPDF_LinkExt(){}; | 198 CPDF_LinkExt(){}; |
| 201 int m_Start; | 199 int m_Start; |
| 202 int m_Count; | 200 int m_Count; |
| 203 CFX_WideString m_strUrl; | 201 CFX_WideString m_strUrl; |
| 204 virtual ~CPDF_LinkExt(){}; | 202 virtual ~CPDF_LinkExt(){}; |
| 205 }; | 203 }; |
| 204 |
| 206 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 205 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 206 |
| 207 class CPDF_LinkExtract : public IPDF_LinkExtract { | 207 class CPDF_LinkExtract : public IPDF_LinkExtract { |
| 208 public: | 208 public: |
| 209 CPDF_LinkExtract(); | 209 CPDF_LinkExtract(); |
| 210 virtual ~CPDF_LinkExtract(); | 210 ~CPDF_LinkExtract() override; |
| 211 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); | |
| 212 virtual FX_BOOL IsExtract() const { return m_IsParserd; } | |
| 213 | 211 |
| 214 public: | 212 // IPDF_LinkExtract |
| 215 virtual int CountLinks() const; | 213 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; |
| 216 virtual CFX_WideString GetURL(int index) const; | 214 int CountLinks() const override; |
| 217 virtual void GetBoundedSegment(int index, int& start, int& count) const; | 215 CFX_WideString GetURL(int index) const override; |
| 218 virtual void GetRects(int index, CFX_RectArray& rects) const; | 216 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 217 void GetRects(int index, CFX_RectArray& rects) const override; |
| 218 |
| 219 FX_BOOL IsExtract() const { return m_IsParserd; } |
| 219 | 220 |
| 220 protected: | 221 protected: |
| 221 void parserLink(); | 222 void parserLink(); |
| 222 void DeleteLinkList(); | 223 void DeleteLinkList(); |
| 223 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | 224 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
| 224 FX_BOOL CheckMailLink(CFX_WideString& str); | 225 FX_BOOL CheckMailLink(CFX_WideString& str); |
| 225 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | 226 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); |
| 226 | 227 |
| 227 private: | 228 private: |
| 228 LINK_InfoArray m_LinkList; | 229 LINK_InfoArray m_LinkList; |
| 229 const CPDF_TextPage* m_pTextPage; | 230 const CPDF_TextPage* m_pTextPage; |
| 230 CFX_WideString m_strPageText; | 231 CFX_WideString m_strPageText; |
| 231 FX_BOOL m_IsParserd; | 232 FX_BOOL m_IsParserd; |
| 232 }; | 233 }; |
| 234 |
| 233 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 235 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
| 234 void NormalizeString(CFX_WideString& str); | 236 void NormalizeString(CFX_WideString& str); |
| 235 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 237 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
| 236 | 238 |
| 237 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 239 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| OLD | NEW |