OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
9 | 9 |
10 class CPDF_TextPage; | |
11 class CPDF_LinkExtract; | 10 class CPDF_LinkExtract; |
12 class CPDF_TextPageFind; | 11 class CPDF_TextPageFind; |
13 class CPDF_DocProgressiveSearch; | 12 class CPDF_DocProgressiveSearch; |
14 #define FPDFTEXT_CHAR_ERROR -1 | 13 #define FPDFTEXT_CHAR_ERROR -1 |
15 #define FPDFTEXT_CHAR_NORMAL 0 | 14 #define FPDFTEXT_CHAR_NORMAL 0 |
16 #define FPDFTEXT_CHAR_GENERATED 1 | 15 #define FPDFTEXT_CHAR_GENERATED 1 |
17 #define FPDFTEXT_CHAR_UNUNICODE 2 | 16 #define FPDFTEXT_CHAR_UNUNICODE 2 |
18 #define FPDFTEXT_CHAR_HYPHEN 3 | 17 #define FPDFTEXT_CHAR_HYPHEN 3 |
19 #define FPDFTEXT_CHAR_PIECE 4 | 18 #define FPDFTEXT_CHAR_PIECE 4 |
20 #define FPDFTEXT_MC_PASS 0 | 19 #define FPDFTEXT_MC_PASS 0 |
(...skipping 14 matching lines...) Expand all Loading... |
35 typedef struct { | 34 typedef struct { |
36 int m_Start; | 35 int m_Start; |
37 int m_nCount; | 36 int m_nCount; |
38 } FPDF_SEGMENT; | 37 } FPDF_SEGMENT; |
39 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 38 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; |
40 typedef struct { | 39 typedef struct { |
41 CPDF_TextObject* m_pTextObj; | 40 CPDF_TextObject* m_pTextObj; |
42 CFX_AffineMatrix m_formMatrix; | 41 CFX_AffineMatrix m_formMatrix; |
43 } PDFTEXT_Obj; | 42 } PDFTEXT_Obj; |
44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 43 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 44 |
45 class CPDF_TextPage : public IPDF_TextPage { | 45 class CPDF_TextPage : public IPDF_TextPage { |
46 public: | 46 public: |
47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 47 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 49 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
50 virtual FX_BOOL ParseTextPage(); | 50 ~CPDF_TextPage() override{}; |
51 virtual void NormalizeObjects(FX_BOOL bNormalize); | |
52 virtual FX_BOOL IsParsered() const { return m_IsParsered; } | |
53 virtual ~CPDF_TextPage(){}; | |
54 | 51 |
55 public: | 52 // IPDF_TextPage |
56 virtual int CharIndexFromTextIndex(int TextIndex) const; | 53 FX_BOOL ParseTextPage() override; |
57 virtual int TextIndexFromCharIndex(int CharIndex) const; | 54 void NormalizeObjects(FX_BOOL bNormalize) override; |
58 virtual int CountChars() const; | 55 FX_BOOL IsParsered() const override { return m_IsParsered; } |
59 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const; | 56 int CharIndexFromTextIndex(int TextIndex) const override; |
60 virtual void GetRectArray(int start, | 57 int TextIndexFromCharIndex(int CharIndex) const override; |
61 int nCount, | 58 int CountChars() const override; |
62 CFX_RectArray& rectArray) const; | 59 void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override; |
63 virtual int GetIndexAtPos(CPDF_Point point, | 60 void GetRectArray(int start, |
64 FX_FLOAT xTolerance, | 61 int nCount, |
65 FX_FLOAT yTolerance) const; | 62 CFX_RectArray& rectArray) const override; |
66 virtual int GetIndexAtPos(FX_FLOAT x, | 63 int GetIndexAtPos(CPDF_Point point, |
67 FX_FLOAT y, | 64 FX_FLOAT xTolerance, |
68 FX_FLOAT xTolerance, | 65 FX_FLOAT yTolerance) const override; |
69 FX_FLOAT yTolerance) const; | 66 int GetIndexAtPos(FX_FLOAT x, |
70 virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | 67 FX_FLOAT y, |
71 virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, | 68 FX_FLOAT xTolerance, |
72 CFX_RectArray& resRectArray) const; | 69 FX_FLOAT yTolerance) const override; |
73 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | 70 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; |
| 71 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 72 CFX_RectArray& resRectArray) const override; |
| 73 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; |
| 74 int CountRects(int start, int nCount) override; |
| 75 void GetRect(int rectIndex, |
| 76 FX_FLOAT& left, |
| 77 FX_FLOAT& top, |
| 78 FX_FLOAT& right, |
| 79 FX_FLOAT& bottom) const override; |
| 80 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
| 81 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
| 82 int CountBoundedSegments(FX_FLOAT left, |
| 83 FX_FLOAT top, |
| 84 FX_FLOAT right, |
| 85 FX_FLOAT bottom, |
| 86 FX_BOOL bContains = FALSE) override; |
| 87 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 88 int GetWordBreak(int index, int direction) const override; |
74 | 89 |
75 virtual int CountRects(int start, int nCount); | |
76 virtual void GetRect(int rectIndex, | |
77 FX_FLOAT& left, | |
78 FX_FLOAT& top, | |
79 FX_FLOAT& right, | |
80 FX_FLOAT& bottom) const; | |
81 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); | |
82 virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); | |
83 virtual int CountBoundedSegments(FX_FLOAT left, | |
84 FX_FLOAT top, | |
85 FX_FLOAT right, | |
86 FX_FLOAT bottom, | |
87 FX_BOOL bContains = FALSE); | |
88 virtual void GetBoundedSegment(int index, int& start, int& count) const; | |
89 virtual int GetWordBreak(int index, int direction) const; | |
90 | |
91 public: | |
92 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | 90 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } |
93 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 91 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
94 const CFX_FloatRect& rect2); | 92 const CFX_FloatRect& rect2); |
95 static FX_BOOL IsLetter(FX_WCHAR unicode); | 93 static FX_BOOL IsLetter(FX_WCHAR unicode); |
96 | 94 |
97 private: | 95 private: |
98 FX_BOOL IsHyphen(FX_WCHAR curChar); | 96 FX_BOOL IsHyphen(FX_WCHAR curChar); |
99 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 97 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
100 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 98 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
101 void ProcessObject(); | 99 void ProcessObject(); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
136 CFX_AffineMatrix m_perMatrix; | 134 CFX_AffineMatrix m_perMatrix; |
137 FX_BOOL m_IsParsered; | 135 FX_BOOL m_IsParsered; |
138 CFX_AffineMatrix m_DisplayMatrix; | 136 CFX_AffineMatrix m_DisplayMatrix; |
139 | 137 |
140 SEGMENT_Array m_Segment; | 138 SEGMENT_Array m_Segment; |
141 CFX_RectArray m_SelRects; | 139 CFX_RectArray m_SelRects; |
142 LINEOBJ m_LineObj; | 140 LINEOBJ m_LineObj; |
143 int32_t m_TextlineDir; | 141 int32_t m_TextlineDir; |
144 CFX_FloatRect m_CurlineRect; | 142 CFX_FloatRect m_CurlineRect; |
145 }; | 143 }; |
| 144 |
146 class CPDF_TextPageFind : public IPDF_TextPageFind { | 145 class CPDF_TextPageFind : public IPDF_TextPageFind { |
147 public: | 146 public: |
148 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
149 virtual ~CPDF_TextPageFind(){}; | 148 ~CPDF_TextPageFind() override{}; |
150 | 149 |
151 public: | 150 // IPDF_TextPageFind |
152 virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, | 151 FX_BOOL FindFirst(const CFX_WideString& findwhat, |
153 int flags, | 152 int flags, |
154 int startPos = 0); | 153 int startPos = 0) override; |
155 virtual FX_BOOL FindNext(); | 154 FX_BOOL FindNext() override; |
156 virtual FX_BOOL FindPrev(); | 155 FX_BOOL FindPrev() override; |
157 | 156 void GetRectArray(CFX_RectArray& rects) const override; |
158 virtual void GetRectArray(CFX_RectArray& rects) const; | 157 int GetCurOrder() const override; |
159 virtual int GetCurOrder() const; | 158 int GetMatchedCount() const override; |
160 virtual int GetMatchedCount() const; | |
161 | 159 |
162 protected: | 160 protected: |
163 void ExtractFindWhat(const CFX_WideString& findwhat); | 161 void ExtractFindWhat(const CFX_WideString& findwhat); |
164 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | 162 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, |
165 int startPos, | 163 int startPos, |
166 int endPos); | 164 int endPos); |
167 FX_BOOL ExtractSubString(CFX_WideString& rString, | 165 FX_BOOL ExtractSubString(CFX_WideString& rString, |
168 const FX_WCHAR* lpszFullString, | 166 const FX_WCHAR* lpszFullString, |
169 int iSubString, | 167 int iSubString, |
170 FX_WCHAR chSep); | 168 FX_WCHAR chSep); |
(...skipping 21 matching lines...) Expand all Loading... |
192 FX_BOOL m_IsFind; | 190 FX_BOOL m_IsFind; |
193 }; | 191 }; |
194 class CPDF_LinkExt { | 192 class CPDF_LinkExt { |
195 public: | 193 public: |
196 CPDF_LinkExt(){}; | 194 CPDF_LinkExt(){}; |
197 int m_Start; | 195 int m_Start; |
198 int m_Count; | 196 int m_Count; |
199 CFX_WideString m_strUrl; | 197 CFX_WideString m_strUrl; |
200 virtual ~CPDF_LinkExt(){}; | 198 virtual ~CPDF_LinkExt(){}; |
201 }; | 199 }; |
| 200 |
202 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 201 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 202 |
203 class CPDF_LinkExtract : public IPDF_LinkExtract { | 203 class CPDF_LinkExtract : public IPDF_LinkExtract { |
204 public: | 204 public: |
205 CPDF_LinkExtract(); | 205 CPDF_LinkExtract(); |
206 virtual ~CPDF_LinkExtract(); | 206 ~CPDF_LinkExtract() override; |
207 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); | |
208 virtual FX_BOOL IsExtract() const { return m_IsParserd; } | |
209 | 207 |
210 public: | 208 // IPDF_LinkExtract |
211 virtual int CountLinks() const; | 209 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; |
212 virtual CFX_WideString GetURL(int index) const; | 210 int CountLinks() const override; |
213 virtual void GetBoundedSegment(int index, int& start, int& count) const; | 211 CFX_WideString GetURL(int index) const override; |
214 virtual void GetRects(int index, CFX_RectArray& rects) const; | 212 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 213 void GetRects(int index, CFX_RectArray& rects) const override; |
| 214 |
| 215 FX_BOOL IsExtract() const { return m_IsParserd; } |
215 | 216 |
216 protected: | 217 protected: |
217 void parserLink(); | 218 void parserLink(); |
218 void DeleteLinkList(); | 219 void DeleteLinkList(); |
219 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | 220 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
220 FX_BOOL CheckMailLink(CFX_WideString& str); | 221 FX_BOOL CheckMailLink(CFX_WideString& str); |
221 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | 222 FX_BOOL AppendToLinkList(int start, int count, const CFX_WideString& strUrl); |
222 | 223 |
223 private: | 224 private: |
224 LINK_InfoArray m_LinkList; | 225 LINK_InfoArray m_LinkList; |
225 const CPDF_TextPage* m_pTextPage; | 226 const CPDF_TextPage* m_pTextPage; |
226 CFX_WideString m_strPageText; | 227 CFX_WideString m_strPageText; |
227 FX_BOOL m_IsParserd; | 228 FX_BOOL m_IsParserd; |
228 }; | 229 }; |
| 230 |
229 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 231 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
230 void NormalizeString(CFX_WideString& str); | 232 void NormalizeString(CFX_WideString& str); |
231 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 233 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
232 | 234 |
233 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 235 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
OLD | NEW |