OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
9 | 9 |
10 class CPDF_TextPage; | |
11 class CPDF_LinkExtract; | 10 class CPDF_LinkExtract; |
12 class CPDF_TextPageFind; | 11 class CPDF_TextPageFind; |
13 class CPDF_DocProgressiveSearch; | 12 class CPDF_DocProgressiveSearch; |
14 #define FPDFTEXT_CHAR_ERROR -1 | 13 #define FPDFTEXT_CHAR_ERROR -1 |
15 #define FPDFTEXT_CHAR_NORMAL 0 | 14 #define FPDFTEXT_CHAR_NORMAL 0 |
16 #define FPDFTEXT_CHAR_GENERATED 1 | 15 #define FPDFTEXT_CHAR_GENERATED 1 |
17 #define FPDFTEXT_CHAR_UNUNICODE 2 | 16 #define FPDFTEXT_CHAR_UNUNICODE 2 |
18 #define FPDFTEXT_CHAR_HYPHEN 3 | 17 #define FPDFTEXT_CHAR_HYPHEN 3 |
19 #define FPDFTEXT_CHAR_PIECE 4 | 18 #define FPDFTEXT_CHAR_PIECE 4 |
20 #define FPDFTEXT_MC_PASS 0 | 19 #define FPDFTEXT_MC_PASS 0 |
(...skipping 14 matching lines...) Expand all Loading... |
35 typedef struct { | 34 typedef struct { |
36 int m_Start; | 35 int m_Start; |
37 int m_nCount; | 36 int m_nCount; |
38 } FPDF_SEGMENT; | 37 } FPDF_SEGMENT; |
39 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 38 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; |
40 typedef struct { | 39 typedef struct { |
41 CPDF_TextObject* m_pTextObj; | 40 CPDF_TextObject* m_pTextObj; |
42 CFX_AffineMatrix m_formMatrix; | 41 CFX_AffineMatrix m_formMatrix; |
43 } PDFTEXT_Obj; | 42 } PDFTEXT_Obj; |
44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 43 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 44 |
45 class CPDF_TextPage : public IPDF_TextPage { | 45 class CPDF_TextPage : public IPDF_TextPage { |
46 public: | 46 public: |
47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
50 virtual FX_BOOL ParseTextPage(); | 50 ~CPDF_TextPage() override{}; |
51 virtual void NormalizeObjects(FX_BOOL bNormalize); | |
52 virtual FX_BOOL IsParsered() const { return m_IsParsered; } | |
53 virtual ~CPDF_TextPage(){}; | |
54 | 51 |
55 public: | 52 // IPDF_TextPage |
56 virtual int CharIndexFromTextIndex(int TextIndex) const; | 53 FX_BOOL ParseTextPage() override; |
57 virtual int TextIndexFromCharIndex(int CharIndex) const; | 54 void NormalizeObjects(FX_BOOL bNormalize) override; |
58 virtual int CountChars() const; | 55 FX_BOOL IsParsered() const override { return m_IsParsered; } |
59 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const; | 56 int CharIndexFromTextIndex(int TextIndex) const override; |
60 virtual void GetRectArray(int start, | 57 int TextIndexFromCharIndex(int CharIndex) const override; |
61 int nCount, | 58 int CountChars() const override; |
62 CFX_RectArray& rectArray) const; | 59 void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override; |
63 virtual int GetIndexAtPos(CPDF_Point point, | 60 void GetRectArray(int start, |
64 FX_FLOAT xTolerance, | 61 int nCount, |
65 FX_FLOAT yTolerance) const; | 62 CFX_RectArray& rectArray) const override; |
66 virtual int GetIndexAtPos(FX_FLOAT x, | 63 int GetIndexAtPos(CPDF_Point point, |
67 FX_FLOAT y, | 64 FX_FLOAT xTolerance, |
68 FX_FLOAT xTolerance, | 65 FX_FLOAT yTolerance) const override; |
69 FX_FLOAT yTolerance) const; | 66 int GetIndexAtPos(FX_FLOAT x, |
70 virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | 67 FX_FLOAT y, |
71 virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, | 68 FX_FLOAT xTolerance, |
72 CFX_RectArray& resRectArray) const; | 69 FX_FLOAT yTolerance) const override; |
73 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | 70 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; |
| 71 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 72 CFX_RectArray& resRectArray) const override; |
| 73 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; |
| 74 int CountRects(int start, int nCount) override; |
| 75 void GetRect(int rectIndex, |
| 76 FX_FLOAT& left, |
| 77 FX_FLOAT& top, |
| 78 FX_FLOAT& right, |
| 79 FX_FLOAT& bottom) const override; |
| 80 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
| 81 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
| 82 int CountBoundedSegments(FX_FLOAT left, |
| 83 FX_FLOAT top, |
| 84 FX_FLOAT right, |
| 85 FX_FLOAT bottom, |
| 86 FX_BOOL bContains = FALSE) override; |
| 87 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 88 int GetWordBreak(int index, int direction) const override; |
74 | 89 |
75 virtual int CountRects(int start, int nCount); | |
76 virtual void GetRect(int rectIndex, | |
77 FX_FLOAT& left, | |
78 FX_FLOAT& top, | |
79 FX_FLOAT& right, | |
80 FX_FLOAT& bottom) const; | |
81 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); | |
82 virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); | |
83 virtual int CountBoundedSegments(FX_FLOAT left, | |
84 FX_FLOAT top, | |
85 FX_FLOAT right, | |
86 FX_FLOAT bottom, | |
87 FX_BOOL bContains = FALSE); | |
88 virtual void GetBoundedSegment(int index, int& start, int& count) const; | |
89 virtual int GetWordBreak(int index, int direction) const; | |
90 | |
91 public: | |
92 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | 90 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } |
93 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 91 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
94 const CFX_FloatRect& rect2); | 92 const CFX_FloatRect& rect2); |
95 static FX_BOOL IsLetter(FX_WCHAR unicode); | 93 static FX_BOOL IsLetter(FX_WCHAR unicode); |
96 | 94 |
97 private: | 95 private: |
98 FX_BOOL IsHyphen(FX_WCHAR curChar); | 96 FX_BOOL IsHyphen(FX_WCHAR curChar); |
99 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 97 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
100 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 98 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
101 void ProcessObject(); | 99 void ProcessObject(); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
140 CFX_AffineMatrix m_perMatrix; | 138 CFX_AffineMatrix m_perMatrix; |
141 FX_BOOL m_IsParsered; | 139 FX_BOOL m_IsParsered; |
142 CFX_AffineMatrix m_DisplayMatrix; | 140 CFX_AffineMatrix m_DisplayMatrix; |
143 | 141 |
144 SEGMENT_Array m_Segment; | 142 SEGMENT_Array m_Segment; |
145 CFX_RectArray m_SelRects; | 143 CFX_RectArray m_SelRects; |
146 LINEOBJ m_LineObj; | 144 LINEOBJ m_LineObj; |
147 int32_t m_TextlineDir; | 145 int32_t m_TextlineDir; |
148 CFX_FloatRect m_CurlineRect; | 146 CFX_FloatRect m_CurlineRect; |
149 }; | 147 }; |
| 148 |
150 class CPDF_TextPageFind : public IPDF_TextPageFind { | 149 class CPDF_TextPageFind : public IPDF_TextPageFind { |
151 public: | 150 public: |
152 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 151 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
153 virtual ~CPDF_TextPageFind(){}; | 152 ~CPDF_TextPageFind() override{}; |
154 | 153 |
155 public: | 154 // IPDF_TextPageFind |
156 virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, | 155 FX_BOOL FindFirst(const CFX_WideString& findwhat, |
157 int flags, | 156 int flags, |
158 int startPos = 0); | 157 int startPos = 0) override; |
159 virtual FX_BOOL FindNext(); | 158 FX_BOOL FindNext() override; |
160 virtual FX_BOOL FindPrev(); | 159 FX_BOOL FindPrev() override; |
161 | 160 void GetRectArray(CFX_RectArray& rects) const override; |
162 virtual void GetRectArray(CFX_RectArray& rects) const; | 161 int GetCurOrder() const override; |
163 virtual int GetCurOrder() const; | 162 int GetMatchedCount() const override; |
164 virtual int GetMatchedCount() const; | |
165 | 163 |
166 protected: | 164 protected: |
167 void ExtractFindWhat(const CFX_WideString& findwhat); | 165 void ExtractFindWhat(const CFX_WideString& findwhat); |
168 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | 166 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, |
169 int startPos, | 167 int startPos, |
170 int endPos); | 168 int endPos); |
171 FX_BOOL ExtractSubString(CFX_WideString& rString, | 169 FX_BOOL ExtractSubString(CFX_WideString& rString, |
172 const FX_WCHAR* lpszFullString, | 170 const FX_WCHAR* lpszFullString, |
173 int iSubString, | 171 int iSubString, |
174 FX_WCHAR chSep); | 172 FX_WCHAR chSep); |
(...skipping 21 matching lines...) Expand all Loading... |
196 FX_BOOL m_IsFind; | 194 FX_BOOL m_IsFind; |
197 }; | 195 }; |
198 class CPDF_LinkExt { | 196 class CPDF_LinkExt { |
199 public: | 197 public: |
200 CPDF_LinkExt(){}; | 198 CPDF_LinkExt(){}; |
201 int m_Start; | 199 int m_Start; |
202 int m_Count; | 200 int m_Count; |
203 CFX_WideString m_strUrl; | 201 CFX_WideString m_strUrl; |
204 virtual ~CPDF_LinkExt(){}; | 202 virtual ~CPDF_LinkExt(){}; |
205 }; | 203 }; |
| 204 |
206 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 205 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 206 |
207 class CPDF_LinkExtract : public IPDF_LinkExtract { | 207 class CPDF_LinkExtract : public IPDF_LinkExtract { |
208 public: | 208 public: |
209 CPDF_LinkExtract(); | 209 CPDF_LinkExtract(); |
210 virtual ~CPDF_LinkExtract(); | 210 ~CPDF_LinkExtract() override; |
211 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); | |
212 virtual FX_BOOL IsExtract() const { return m_IsParserd; } | |
213 | 211 |
214 public: | 212 // IPDF_LinkExtract |
215 virtual int CountLinks() const; | 213 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; |
216 virtual CFX_WideString GetURL(int index) const; | 214 int CountLinks() const override; |
217 virtual void GetBoundedSegment(int index, int& start, int& count) const; | 215 CFX_WideString GetURL(int index) const override; |
218 virtual void GetRects(int index, CFX_RectArray& rects) const; | 216 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 217 void GetRects(int index, CFX_RectArray& rects) const override; |
| 218 |
| 219 FX_BOOL IsExtract() const { return m_IsParserd; } |
219 | 220 |
220 protected: | 221 protected: |
221 void parserLink(); | 222 void parserLink(); |
222 void DeleteLinkList(); | 223 void DeleteLinkList(); |
223 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | 224 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
224 FX_BOOL CheckMailLink(CFX_WideString& str); | 225 FX_BOOL CheckMailLink(CFX_WideString& str); |
225 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | 226 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); |
226 | 227 |
227 private: | 228 private: |
228 LINK_InfoArray m_LinkList; | 229 LINK_InfoArray m_LinkList; |
229 const CPDF_TextPage* m_pTextPage; | 230 const CPDF_TextPage* m_pTextPage; |
230 CFX_WideString m_strPageText; | 231 CFX_WideString m_strPageText; |
231 FX_BOOL m_IsParserd; | 232 FX_BOOL m_IsParserd; |
232 }; | 233 }; |
| 234 |
233 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 235 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
234 void NormalizeString(CFX_WideString& str); | 236 void NormalizeString(CFX_WideString& str); |
235 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 237 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
236 | 238 |
237 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 239 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
OLD | NEW |