Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(449)

Side by Side Diff: core/fpdftext/text_int.h

Issue 1796303002: Tidy fpdftext/ directory (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: rebase Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/fpdftext/fpdf_text_int_unittest.cpp ('k') | core/fpdftext/unicodenormalization.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #ifndef CORE_FPDFTEXT_TEXT_INT_H_
8 #define CORE_FPDFTEXT_TEXT_INT_H_
9
10 #include <deque>
11 #include <vector>
12
13 #include "core/fpdftext/include/ipdf_linkextract.h"
14 #include "core/fpdftext/include/ipdf_textpage.h"
15 #include "core/fpdftext/include/ipdf_textpagefind.h"
16 #include "core/include/fpdfapi/fpdf_page.h"
17 #include "core/include/fxcrt/fx_basic.h"
18
19 class CFX_BidiChar;
20 class CPDF_FormObject;
21 class CPDF_LinkExtract;
22 class CPDF_TextPageFind;
23
24 #define FPDFTEXT_CHAR_ERROR -1
25 #define FPDFTEXT_CHAR_NORMAL 0
26 #define FPDFTEXT_CHAR_GENERATED 1
27 #define FPDFTEXT_CHAR_UNUNICODE 2
28 #define FPDFTEXT_CHAR_HYPHEN 3
29 #define FPDFTEXT_CHAR_PIECE 4
30 #define FPDFTEXT_MC_PASS 0
31 #define FPDFTEXT_MC_DONE 1
32 #define FPDFTEXT_MC_DELAY 2
33
34 struct PAGECHAR_INFO {
35 int m_CharCode;
36 FX_WCHAR m_Unicode;
37 FX_FLOAT m_OriginX;
38 FX_FLOAT m_OriginY;
39 int32_t m_Flag;
40 CFX_FloatRect m_CharBox;
41 CPDF_TextObject* m_pTextObj;
42 CFX_Matrix m_Matrix;
43 int m_Index;
44 };
45
46 struct FPDF_SEGMENT {
47 int m_Start;
48 int m_nCount;
49 };
50
51 struct PDFTEXT_Obj {
52 CPDF_TextObject* m_pTextObj;
53 CFX_Matrix m_formMatrix;
54 };
55
56 class CPDF_TextPage : public IPDF_TextPage {
57 public:
58 CPDF_TextPage(const CPDF_Page* pPage, int flags);
59 ~CPDF_TextPage() override {}
60
61 // IPDF_TextPage:
62 void ParseTextPage() override;
63 bool IsParsed() const override { return m_bIsParsed; }
64 int CharIndexFromTextIndex(int TextIndex) const override;
65 int TextIndexFromCharIndex(int CharIndex) const override;
66 int CountChars() const override;
67 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override;
68 void GetRectArray(int start,
69 int nCount,
70 CFX_RectArray& rectArray) const override;
71 int GetIndexAtPos(CFX_FloatPoint point,
72 FX_FLOAT xTolerance,
73 FX_FLOAT yTolerance) const override;
74 int GetIndexAtPos(FX_FLOAT x,
75 FX_FLOAT y,
76 FX_FLOAT xTolerance,
77 FX_FLOAT yTolerance) const override;
78 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override;
79 void GetRectsArrayByRect(const CFX_FloatRect& rect,
80 CFX_RectArray& resRectArray) const override;
81 CFX_WideString GetPageText(int start = 0, int nCount = -1) const override;
82 int CountRects(int start, int nCount) override;
83 void GetRect(int rectIndex,
84 FX_FLOAT& left,
85 FX_FLOAT& top,
86 FX_FLOAT& right,
87 FX_FLOAT& bottom) const override;
88 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override;
89 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override;
90 int CountBoundedSegments(FX_FLOAT left,
91 FX_FLOAT top,
92 FX_FLOAT right,
93 FX_FLOAT bottom,
94 FX_BOOL bContains = FALSE) override;
95 void GetBoundedSegment(int index, int& start, int& count) const override;
96 int GetWordBreak(int index, int direction) const override;
97
98 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
99 const CFX_FloatRect& rect2);
100 static FX_BOOL IsLetter(FX_WCHAR unicode);
101
102 private:
103 FX_BOOL IsHyphen(FX_WCHAR curChar);
104 bool IsControlChar(const PAGECHAR_INFO& charInfo);
105 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
106 void ProcessObject();
107 void ProcessFormObject(CPDF_FormObject* pFormObj,
108 const CFX_Matrix& formMatrix);
109 void ProcessTextObject(PDFTEXT_Obj pObj);
110 void ProcessTextObject(CPDF_TextObject* pTextObj,
111 const CFX_Matrix& formMatrix,
112 const CPDF_PageObjectList* pObjList,
113 CPDF_PageObjectList::const_iterator ObjPos);
114 int ProcessInsertObject(const CPDF_TextObject* pObj,
115 const CFX_Matrix& formMatrix);
116 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
117 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
118 const CPDF_PageObjectList* pObjList,
119 CPDF_PageObjectList::const_iterator ObjPos);
120 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1,
121 CPDF_TextObject* pTextObj2);
122 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
123 void CloseTempLine();
124 void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str);
125 int32_t PreMarkedContent(PDFTEXT_Obj pObj);
126 void ProcessMarkedContent(PDFTEXT_Obj pObj);
127 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
128 void FindPreviousTextObject(void);
129 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
130 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
131 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
132 int32_t FindTextlineFlowDirection();
133
134 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
135 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
136 const CPDF_Font* pFont,
137 int nItems) const;
138
139 const CPDF_Page* const m_pPage;
140 std::vector<FX_WORD> m_CharIndex;
141 std::deque<PAGECHAR_INFO> m_CharList;
142 std::deque<PAGECHAR_INFO> m_TempCharList;
143 CFX_WideTextBuf m_TextBuf;
144 CFX_WideTextBuf m_TempTextBuf;
145 const int m_parserflag;
146 CPDF_TextObject* m_pPreTextObj;
147 CFX_Matrix m_perMatrix;
148 bool m_bIsParsed;
149 CFX_Matrix m_DisplayMatrix;
150 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
151 CFX_RectArray m_SelRects;
152 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
153 int32_t m_TextlineDir;
154 CFX_FloatRect m_CurlineRect;
155 };
156
157 class CPDF_TextPageFind : public IPDF_TextPageFind {
158 public:
159 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
160 ~CPDF_TextPageFind() override {}
161
162 // IPDF_TextPageFind
163 FX_BOOL FindFirst(const CFX_WideString& findwhat,
164 int flags,
165 int startPos = 0) override;
166 FX_BOOL FindNext() override;
167 FX_BOOL FindPrev() override;
168 void GetRectArray(CFX_RectArray& rects) const override;
169 int GetCurOrder() const override;
170 int GetMatchedCount() const override;
171
172 protected:
173 void ExtractFindWhat(const CFX_WideString& findwhat);
174 FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText,
175 int startPos,
176 int endPos);
177 FX_BOOL ExtractSubString(CFX_WideString& rString,
178 const FX_WCHAR* lpszFullString,
179 int iSubString,
180 FX_WCHAR chSep);
181 CFX_WideString MakeReverse(const CFX_WideString& str);
182 int ReverseFind(const CFX_WideString& csPageText,
183 const CFX_WideString& csWord,
184 int nStartPos,
185 int& WordLength);
186 int GetCharIndex(int index) const;
187
188 private:
189 std::vector<FX_WORD> m_CharIndex;
190 const IPDF_TextPage* m_pTextPage;
191 CFX_WideString m_strText;
192 CFX_WideString m_findWhat;
193 int m_flags;
194 std::vector<CFX_WideString> m_csFindWhatArray;
195 int m_findNextStart;
196 int m_findPreStart;
197 FX_BOOL m_bMatchCase;
198 FX_BOOL m_bMatchWholeWord;
199 int m_resStart;
200 int m_resEnd;
201 CFX_RectArray m_resArray;
202 FX_BOOL m_IsFind;
203 };
204
205 class CPDF_LinkExt {
206 public:
207 CPDF_LinkExt() {}
208 int m_Start;
209 int m_Count;
210 CFX_WideString m_strUrl;
211 virtual ~CPDF_LinkExt() {}
212 };
213
214 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
215
216 class CPDF_LinkExtract : public IPDF_LinkExtract {
217 public:
218 CPDF_LinkExtract();
219 ~CPDF_LinkExtract() override;
220
221 // IPDF_LinkExtract
222 FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override;
223 int CountLinks() const override;
224 CFX_WideString GetURL(int index) const override;
225 void GetBoundedSegment(int index, int& start, int& count) const override;
226 void GetRects(int index, CFX_RectArray& rects) const override;
227
228 FX_BOOL IsExtract() const { return m_bIsParsed; }
229
230 protected:
231 void ParseLink();
232 void DeleteLinkList();
233 FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
234 bool CheckMailLink(CFX_WideString& str);
235 void AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
236
237 private:
238 LINK_InfoArray m_LinkList;
239 const CPDF_TextPage* m_pTextPage;
240 CFX_WideString m_strPageText;
241 bool m_bIsParsed;
242 };
243
244 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst);
245
246 #endif // CORE_FPDFTEXT_TEXT_INT_H_
OLDNEW
« no previous file with comments | « core/fpdftext/fpdf_text_int_unittest.cpp ('k') | core/fpdftext/unicodenormalization.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698