OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | |
9 | |
10 #include <deque> | |
11 #include <vector> | |
12 | |
13 #include "core/include/fpdfapi/fpdf_page.h" | |
14 #include "core/include/fpdftext/fpdf_text.h" | |
15 #include "core/include/fxcrt/fx_basic.h" | |
16 | |
17 class CFX_BidiChar; | |
18 class CPDF_FormObject; | |
19 class CPDF_LinkExtract; | |
20 class CPDF_TextPageFind; | |
21 | |
22 #define FPDFTEXT_CHAR_ERROR -1 | |
23 #define FPDFTEXT_CHAR_NORMAL 0 | |
24 #define FPDFTEXT_CHAR_GENERATED 1 | |
25 #define FPDFTEXT_CHAR_UNUNICODE 2 | |
26 #define FPDFTEXT_CHAR_HYPHEN 3 | |
27 #define FPDFTEXT_CHAR_PIECE 4 | |
28 #define FPDFTEXT_MC_PASS 0 | |
29 #define FPDFTEXT_MC_DONE 1 | |
30 #define FPDFTEXT_MC_DELAY 2 | |
31 | |
32 struct PAGECHAR_INFO { | |
33 int m_CharCode; | |
34 FX_WCHAR m_Unicode; | |
35 FX_FLOAT m_OriginX; | |
36 FX_FLOAT m_OriginY; | |
37 int32_t m_Flag; | |
38 CFX_FloatRect m_CharBox; | |
39 CPDF_TextObject* m_pTextObj; | |
40 CFX_Matrix m_Matrix; | |
41 int m_Index; | |
42 }; | |
43 | |
44 struct FPDF_SEGMENT { | |
45 int m_Start; | |
46 int m_nCount; | |
47 }; | |
48 | |
49 struct PDFTEXT_Obj { | |
50 CPDF_TextObject* m_pTextObj; | |
51 CFX_Matrix m_formMatrix; | |
52 }; | |
53 | |
54 class CPDF_TextPage : public IPDF_TextPage { | |
55 public: | |
56 CPDF_TextPage(const CPDF_Page* pPage, int flags); | |
57 ~CPDF_TextPage() override {} | |
58 | |
59 // IPDF_TextPage: | |
60 void ParseTextPage() override; | |
61 bool IsParsed() const override { return m_bIsParsed; } | |
62 int CharIndexFromTextIndex(int TextIndex) const override; | |
63 int TextIndexFromCharIndex(int CharIndex) const override; | |
64 int CountChars() const override; | |
65 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override; | |
66 void GetRectArray(int start, | |
67 int nCount, | |
68 CFX_RectArray& rectArray) const override; | |
69 int GetIndexAtPos(CFX_FloatPoint point, | |
70 FX_FLOAT xTolerance, | |
71 FX_FLOAT yTolerance) const override; | |
72 int GetIndexAtPos(FX_FLOAT x, | |
73 FX_FLOAT y, | |
74 FX_FLOAT xTolerance, | |
75 FX_FLOAT yTolerance) const override; | |
76 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override; | |
77 void GetRectsArrayByRect(const CFX_FloatRect& rect, | |
78 CFX_RectArray& resRectArray) const override; | |
79 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override; | |
80 int CountRects(int start, int nCount) override; | |
81 void GetRect(int rectIndex, | |
82 FX_FLOAT& left, | |
83 FX_FLOAT& top, | |
84 FX_FLOAT& right, | |
85 FX_FLOAT& bottom) const override; | |
86 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; | |
87 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; | |
88 int CountBoundedSegments(FX_FLOAT left, | |
89 FX_FLOAT top, | |
90 FX_FLOAT right, | |
91 FX_FLOAT bottom, | |
92 FX_BOOL bContains = FALSE) override; | |
93 void GetBoundedSegment(int index, int& start, int& count) const override; | |
94 int GetWordBreak(int index, int direction) const override; | |
95 | |
96 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | |
97 const CFX_FloatRect& rect2); | |
98 static FX_BOOL IsLetter(FX_WCHAR unicode); | |
99 | |
100 private: | |
101 FX_BOOL IsHyphen(FX_WCHAR curChar); | |
102 bool IsControlChar(const PAGECHAR_INFO& charInfo); | |
103 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | |
104 void ProcessObject(); | |
105 void ProcessFormObject(CPDF_FormObject* pFormObj, | |
106 const CFX_Matrix& formMatrix); | |
107 void ProcessTextObject(PDFTEXT_Obj pObj); | |
108 void ProcessTextObject(CPDF_TextObject* pTextObj, | |
109 const CFX_Matrix& formMatrix, | |
110 const CPDF_PageObjectList* pObjList, | |
111 CPDF_PageObjectList::const_iterator ObjPos); | |
112 int ProcessInsertObject(const CPDF_TextObject* pObj, | |
113 const CFX_Matrix& formMatrix); | |
114 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); | |
115 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | |
116 const CPDF_PageObjectList* pObjList, | |
117 CPDF_PageObjectList::const_iterator ObjPos); | |
118 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, | |
119 CPDF_TextObject* pTextObj2); | |
120 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; | |
121 void CloseTempLine(); | |
122 void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); | |
123 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | |
124 void ProcessMarkedContent(PDFTEXT_Obj pObj); | |
125 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; | |
126 void FindPreviousTextObject(void); | |
127 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
128 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
129 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | |
130 int32_t FindTextlineFlowDirection(); | |
131 | |
132 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | |
133 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | |
134 const CPDF_Font* pFont, | |
135 int nItems) const; | |
136 | |
137 const CPDF_Page* const m_pPage; | |
138 std::vector<FX_WORD> m_CharIndex; | |
139 std::deque<PAGECHAR_INFO> m_CharList; | |
140 std::deque<PAGECHAR_INFO> m_TempCharList; | |
141 CFX_WideTextBuf m_TextBuf; | |
142 CFX_WideTextBuf m_TempTextBuf; | |
143 const int m_parserflag; | |
144 CPDF_TextObject* m_pPreTextObj; | |
145 CFX_Matrix m_perMatrix; | |
146 bool m_bIsParsed; | |
147 CFX_Matrix m_DisplayMatrix; | |
148 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; | |
149 CFX_RectArray m_SelRects; | |
150 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; | |
151 int32_t m_TextlineDir; | |
152 CFX_FloatRect m_CurlineRect; | |
153 }; | |
154 | |
155 class CPDF_TextPageFind : public IPDF_TextPageFind { | |
156 public: | |
157 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | |
158 ~CPDF_TextPageFind() override {} | |
159 | |
160 // IPDF_TextPageFind | |
161 FX_BOOL FindFirst(const CFX_WideString& findwhat, | |
162 int flags, | |
163 int startPos = 0) override; | |
164 FX_BOOL FindNext() override; | |
165 FX_BOOL FindPrev() override; | |
166 void GetRectArray(CFX_RectArray& rects) const override; | |
167 int GetCurOrder() const override; | |
168 int GetMatchedCount() const override; | |
169 | |
170 protected: | |
171 void ExtractFindWhat(const CFX_WideString& findwhat); | |
172 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText, | |
173 int startPos, | |
174 int endPos); | |
175 FX_BOOL ExtractSubString(CFX_WideString& rString, | |
176 const FX_WCHAR* lpszFullString, | |
177 int iSubString, | |
178 FX_WCHAR chSep); | |
179 CFX_WideString MakeReverse(const CFX_WideString& str); | |
180 int ReverseFind(const CFX_WideString& csPageText, | |
181 const CFX_WideString& csWord, | |
182 int nStartPos, | |
183 int& WordLength); | |
184 int GetCharIndex(int index) const; | |
185 | |
186 private: | |
187 std::vector<FX_WORD> m_CharIndex; | |
188 const IPDF_TextPage* m_pTextPage; | |
189 CFX_WideString m_strText; | |
190 CFX_WideString m_findWhat; | |
191 int m_flags; | |
192 std::vector<CFX_WideString> m_csFindWhatArray; | |
193 int m_findNextStart; | |
194 int m_findPreStart; | |
195 FX_BOOL m_bMatchCase; | |
196 FX_BOOL m_bMatchWholeWord; | |
197 int m_resStart; | |
198 int m_resEnd; | |
199 CFX_RectArray m_resArray; | |
200 FX_BOOL m_IsFind; | |
201 }; | |
202 | |
203 class CPDF_LinkExt { | |
204 public: | |
205 CPDF_LinkExt() {} | |
206 int m_Start; | |
207 int m_Count; | |
208 CFX_WideString m_strUrl; | |
209 virtual ~CPDF_LinkExt() {} | |
210 }; | |
211 | |
212 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | |
213 | |
214 class CPDF_LinkExtract : public IPDF_LinkExtract { | |
215 public: | |
216 CPDF_LinkExtract(); | |
217 ~CPDF_LinkExtract() override; | |
218 | |
219 // IPDF_LinkExtract | |
220 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override; | |
221 int CountLinks() const override; | |
222 CFX_WideString GetURL(int index) const override; | |
223 void GetBoundedSegment(int index, int& start, int& count) const override; | |
224 void GetRects(int index, CFX_RectArray& rects) const override; | |
225 | |
226 FX_BOOL IsExtract() const { return m_bIsParsed; } | |
227 | |
228 protected: | |
229 void ParseLink(); | |
230 void DeleteLinkList(); | |
231 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); | |
232 bool CheckMailLink(CFX_WideString& str); | |
233 void AppendToLinkList(int start, int count, const CFX_WideString& strUrl); | |
234 | |
235 private: | |
236 LINK_InfoArray m_LinkList; | |
237 const CPDF_TextPage* m_pTextPage; | |
238 CFX_WideString m_strPageText; | |
239 bool m_bIsParsed; | |
240 }; | |
241 | |
242 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | |
243 | |
244 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | |
OLD | NEW |