| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 9 | 9 |
| 10 class CPDF_TextPage; | |
| 11 class CPDF_LinkExtract; | 10 class CPDF_LinkExtract; |
| 12 class CPDF_TextPageFind; | 11 class CPDF_TextPageFind; |
| 13 class CPDF_DocProgressiveSearch; | 12 class CPDF_DocProgressiveSearch; |
| 14 #define FPDFTEXT_CHAR_ERROR -1 | 13 #define FPDFTEXT_CHAR_ERROR -1 |
| 15 #define FPDFTEXT_CHAR_NORMAL 0 | 14 #define FPDFTEXT_CHAR_NORMAL 0 |
| 16 #define FPDFTEXT_CHAR_GENERATED 1 | 15 #define FPDFTEXT_CHAR_GENERATED 1 |
| 17 #define FPDFTEXT_CHAR_UNUNICODE 2 | 16 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 18 #define FPDFTEXT_CHAR_HYPHEN 3 | 17 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 19 #define FPDFTEXT_CHAR_PIECE 4 | 18 #define FPDFTEXT_CHAR_PIECE 4 |
| 20 #define FPDFTEXT_MC_PASS 0 | 19 #define FPDFTEXT_MC_PASS 0 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 35 typedef struct { | 34 typedef struct { |
| 36 int m_Start; | 35 int m_Start; |
| 37 int m_nCount; | 36 int m_nCount; |
| 38 } FPDF_SEGMENT; | 37 } FPDF_SEGMENT; |
| 39 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 38 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; |
| 40 typedef struct { | 39 typedef struct { |
| 41 CPDF_TextObject* m_pTextObj; | 40 CPDF_TextObject* m_pTextObj; |
| 42 CFX_AffineMatrix m_formMatrix; | 41 CFX_AffineMatrix m_formMatrix; |
| 43 } PDFTEXT_Obj; | 42 } PDFTEXT_Obj; |
| 44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 43 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 44 |
| 45 class CPDF_TextPage : public IPDF_TextPage { | 45 class CPDF_TextPage : public IPDF_TextPage { |
| 46 public: | 46 public: |
| 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
| 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
| 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
| 50 virtual FX_BOOL ParseTextPage(); | 50 ~CPDF_TextPage() override{}; |
| 51 virtual void NormalizeObjects(FX_BOOL bNormalize); | |
| 52 virtual FX_BOOL IsParsered() const { return m_IsParsered; } | |
| 53 virtual ~CPDF_TextPage(){}; | |
| 54 | 51 |
| 55 public: | 52 // IPDF_TextPage |
| 56 virtual int CharIndexFromTextIndex(int TextIndex) const; | 53 FX_BOOL ParseTextPage() override; |
| 57 virtual int TextIndexFromCharIndex(int CharIndex) const; | 54 void NormalizeObjects(FX_BOOL bNormalize) override; |
| 58 virtual int CountChars() const; | 55 FX_BOOL IsParsered() const override { return m_IsParsered; } |
| 59 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const; | 56 int CharIndexFromTextIndex(int TextIndex) const override; |
| 60 virtual void GetRectArray(int start, | 57 int TextIndexFromCharIndex(int CharIndex) const override; |
| 61 int nCount, | 58 int CountChars() const override; |
| 62 CFX_RectArray& rectArray) const; | 59 void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override; |
| 63 virtual int GetIndexAtPos(CPDF_Point point, | 60 void GetRectArray(int start, |
| 64 FX_FLOAT xTolerance, | 61 int nCount, |
| 65 FX_FLOAT yTolerance) const; | 62 CFX_RectArray& rectArray) const override; |
| 66 virtual int GetIndexAtPos(FX_FLOAT x, | 63 int GetIndexAtPos(CPDF_Point point, |
| 67 FX_FLOAT y, | 64 FX_FLOAT xTolerance, |
| 68 FX_FLOAT xTolerance, | 65 FX_FLOAT yTolerance) const override; |
| 69 FX_FLOAT yTolerance) const; | 66 int GetIndexAtPos(FX_FLOAT x, |
| 70 virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | 67 FX_FLOAT y, |
| 71 virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, | 68 FX_FLOAT xTolerance, |
| 72 CFX_RectArray& resRectArray) const; | 69 FX_FLOAT yTolerance) const override; |
| 73 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | 70 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; |
| 71 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 72 CFX_RectArray& resRectArray) const override; |
| 73 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; |
| 74 int CountRects(int start, int nCount) override; |
| 75 void GetRect(int rectIndex, |
| 76 FX_FLOAT& left, |
| 77 FX_FLOAT& top, |
| 78 FX_FLOAT& right, |
| 79 FX_FLOAT& bottom) const override; |
| 80 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
| 81 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
| 82 int CountBoundedSegments(FX_FLOAT left, |
| 83 FX_FLOAT top, |
| 84 FX_FLOAT right, |
| 85 FX_FLOAT bottom, |
| 86 FX_BOOL bContains = FALSE) override; |
| 87 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 88 int GetWordBreak(int index, int direction) const override; |
| 74 | 89 |
| 75 virtual int CountRects(int start, int nCount); | |
| 76 virtual void GetRect(int rectIndex, | |
| 77 FX_FLOAT& left, | |
| 78 FX_FLOAT& top, | |
| 79 FX_FLOAT& right, | |
| 80 FX_FLOAT& bottom) const; | |
| 81 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); | |
| 82 virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); | |
| 83 virtual int CountBoundedSegments(FX_FLOAT left, | |
| 84 FX_FLOAT top, | |
| 85 FX_FLOAT right, | |
| 86 FX_FLOAT bottom, | |
| 87 FX_BOOL bContains = FALSE); | |
| 88 virtual void GetBoundedSegment(int index, int& start, int& count) const; | |
| 89 virtual int GetWordBreak(int index, int direction) const; | |
| 90 | |
| 91 public: | |
| 92 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | 90 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } |
| 93 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 91 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 94 const CFX_FloatRect& rect2); | 92 const CFX_FloatRect& rect2); |
| 95 static FX_BOOL IsLetter(FX_WCHAR unicode); | 93 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 96 | 94 |
| 97 private: | 95 private: |
| 98 FX_BOOL IsHyphen(FX_WCHAR curChar); | 96 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 99 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 97 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 100 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 98 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 101 void ProcessObject(); | 99 void ProcessObject(); |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 136 CFX_AffineMatrix m_perMatrix; | 134 CFX_AffineMatrix m_perMatrix; |
| 137 FX_BOOL m_IsParsered; | 135 FX_BOOL m_IsParsered; |
| 138 CFX_AffineMatrix m_DisplayMatrix; | 136 CFX_AffineMatrix m_DisplayMatrix; |
| 139 | 137 |
| 140 SEGMENT_Array m_Segment; | 138 SEGMENT_Array m_Segment; |
| 141 CFX_RectArray m_SelRects; | 139 CFX_RectArray m_SelRects; |
| 142 LINEOBJ m_LineObj; | 140 LINEOBJ m_LineObj; |
| 143 int32_t m_TextlineDir; | 141 int32_t m_TextlineDir; |
| 144 CFX_FloatRect m_CurlineRect; | 142 CFX_FloatRect m_CurlineRect; |
| 145 }; | 143 }; |
| 144 |
| 146 class CPDF_TextPageFind : public IPDF_TextPageFind { | 145 class CPDF_TextPageFind : public IPDF_TextPageFind { |
| 147 public: | 146 public: |
| 148 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
| 149 virtual ~CPDF_TextPageFind(){}; | 148 ~CPDF_TextPageFind() override{}; |
| 150 | 149 |
| 151 public: | 150 // IPDF_TextPageFind |
| 152 virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, | 151 FX_BOOL FindFirst(const CFX_WideString& findwhat, |
| 153 int flags, | 152 int flags, |
| 154 int startPos = 0); | 153 int startPos = 0) override; |
| 155 virtual FX_BOOL FindNext(); | 154 FX_BOOL FindNext() override; |
| 156 virtual FX_BOOL FindPrev(); | 155 FX_BOOL FindPrev() override; |
| 157 | 156 void GetRectArray(CFX_RectArray& rects) const override; |
| 158 virtual void GetRectArray(CFX_RectArray& rects) const; | 157 int GetCurOrder() const override; |
| 159 virtual int GetCurOrder() const; | 158 int GetMatchedCount() const override; |
| 160 virtual int GetMatchedCount() const; | |
| 161 | 159 |
| 162 protected: | 160 protected: |
| 163 void ExtractFindWhat(const CFX_WideString& findwhat); | 161 void ExtractFindWhat(const CFX_WideString& findwhat); |
| 164 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | 162 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, |
| 165 int startPos, | 163 int startPos, |
| 166 int endPos); | 164 int endPos); |
| 167 FX_BOOL ExtractSubString(CFX_WideString& rString, | 165 FX_BOOL ExtractSubString(CFX_WideString& rString, |
| 168 const FX_WCHAR* lpszFullString, | 166 const FX_WCHAR* lpszFullString, |
| 169 int iSubString, | 167 int iSubString, |
| 170 FX_WCHAR chSep); | 168 FX_WCHAR chSep); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 192 FX_BOOL m_IsFind; | 190 FX_BOOL m_IsFind; |
| 193 }; | 191 }; |
| 194 class CPDF_LinkExt { | 192 class CPDF_LinkExt { |
| 195 public: | 193 public: |
| 196 CPDF_LinkExt(){}; | 194 CPDF_LinkExt(){}; |
| 197 int m_Start; | 195 int m_Start; |
| 198 int m_Count; | 196 int m_Count; |
| 199 CFX_WideString m_strUrl; | 197 CFX_WideString m_strUrl; |
| 200 virtual ~CPDF_LinkExt(){}; | 198 virtual ~CPDF_LinkExt(){}; |
| 201 }; | 199 }; |
| 200 |
| 202 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 201 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 202 |
| 203 class CPDF_LinkExtract : public IPDF_LinkExtract { | 203 class CPDF_LinkExtract : public IPDF_LinkExtract { |
| 204 public: | 204 public: |
| 205 CPDF_LinkExtract(); | 205 CPDF_LinkExtract(); |
| 206 virtual ~CPDF_LinkExtract(); | 206 ~CPDF_LinkExtract() override; |
| 207 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); | |
| 208 virtual FX_BOOL IsExtract() const { return m_IsParserd; } | |
| 209 | 207 |
| 210 public: | 208 // IPDF_LinkExtract |
| 211 virtual int CountLinks() const; | 209 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; |
| 212 virtual CFX_WideString GetURL(int index) const; | 210 int CountLinks() const override; |
| 213 virtual void GetBoundedSegment(int index, int& start, int& count) const; | 211 CFX_WideString GetURL(int index) const override; |
| 214 virtual void GetRects(int index, CFX_RectArray& rects) const; | 212 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 213 void GetRects(int index, CFX_RectArray& rects) const override; |
| 214 |
| 215 FX_BOOL IsExtract() const { return m_IsParserd; } |
| 215 | 216 |
| 216 protected: | 217 protected: |
| 217 void parserLink(); | 218 void parserLink(); |
| 218 void DeleteLinkList(); | 219 void DeleteLinkList(); |
| 219 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | 220 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
| 220 FX_BOOL CheckMailLink(CFX_WideString& str); | 221 FX_BOOL CheckMailLink(CFX_WideString& str); |
| 221 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | 222 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); |
| 222 | 223 |
| 223 private: | 224 private: |
| 224 LINK_InfoArray m_LinkList; | 225 LINK_InfoArray m_LinkList; |
| 225 const CPDF_TextPage* m_pTextPage; | 226 const CPDF_TextPage* m_pTextPage; |
| 226 CFX_WideString m_strPageText; | 227 CFX_WideString m_strPageText; |
| 227 FX_BOOL m_IsParserd; | 228 FX_BOOL m_IsParserd; |
| 228 }; | 229 }; |
| 230 |
| 229 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 231 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
| 230 void NormalizeString(CFX_WideString& str); | 232 void NormalizeString(CFX_WideString& str); |
| 231 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 233 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
| 232 | 234 |
| 233 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 235 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| OLD | NEW |