OLD | NEW |
| (Empty) |
1 // Copyright 2016 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
8 #define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
9 | |
10 #include <deque> | |
11 #include <vector> | |
12 | |
13 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" | |
14 #include "core/fxcrt/include/fx_basic.h" | |
15 #include "core/fxcrt/include/fx_coordinates.h" | |
16 #include "core/fxcrt/include/fx_string.h" | |
17 | |
18 class CPDF_Font; | |
19 class CPDF_FormObject; | |
20 class CPDF_Page; | |
21 class CPDF_TextObject; | |
22 | |
23 #define FPDFTEXT_MATCHCASE 0x00000001 | |
24 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | |
25 #define FPDFTEXT_CONSECUTIVE 0x00000004 | |
26 | |
27 #define FPDFTEXT_CHAR_ERROR -1 | |
28 #define FPDFTEXT_CHAR_NORMAL 0 | |
29 #define FPDFTEXT_CHAR_GENERATED 1 | |
30 #define FPDFTEXT_CHAR_UNUNICODE 2 | |
31 #define FPDFTEXT_CHAR_HYPHEN 3 | |
32 #define FPDFTEXT_CHAR_PIECE 4 | |
33 | |
34 #define TEXT_SPACE_CHAR L' ' | |
35 #define TEXT_LINEFEED_CHAR L'\n' | |
36 #define TEXT_RETURN_CHAR L'\r' | |
37 #define TEXT_EMPTY L"" | |
38 #define TEXT_SPACE L" " | |
39 #define TEXT_RETURN_LINEFEED L"\r\n" | |
40 #define TEXT_LINEFEED L"\n" | |
41 #define TEXT_CHARRATIO_GAPDELTA 0.070 | |
42 | |
43 enum class FPDFText_MarkedContent { Pass = 0, Done, Delay }; | |
44 | |
45 enum class FPDFText_Direction { Left = -1, Right = 1 }; | |
46 | |
47 struct FPDF_CHAR_INFO { | |
48 FX_WCHAR m_Unicode; | |
49 FX_WCHAR m_Charcode; | |
50 int32_t m_Flag; | |
51 FX_FLOAT m_FontSize; | |
52 FX_FLOAT m_OriginX; | |
53 FX_FLOAT m_OriginY; | |
54 CFX_FloatRect m_CharBox; | |
55 CPDF_TextObject* m_pTextObj; | |
56 CFX_Matrix m_Matrix; | |
57 }; | |
58 | |
59 struct FPDF_SEGMENT { | |
60 int m_Start; | |
61 int m_nCount; | |
62 }; | |
63 | |
64 struct PAGECHAR_INFO { | |
65 int m_CharCode; | |
66 FX_WCHAR m_Unicode; | |
67 FX_FLOAT m_OriginX; | |
68 FX_FLOAT m_OriginY; | |
69 int32_t m_Flag; | |
70 CFX_FloatRect m_CharBox; | |
71 CPDF_TextObject* m_pTextObj; | |
72 CFX_Matrix m_Matrix; | |
73 int m_Index; | |
74 }; | |
75 | |
76 struct PDFTEXT_Obj { | |
77 CPDF_TextObject* m_pTextObj; | |
78 CFX_Matrix m_formMatrix; | |
79 }; | |
80 | |
81 class CPDF_TextPage { | |
82 public: | |
83 CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags); | |
84 ~CPDF_TextPage(); | |
85 | |
86 // IPDF_TextPage: | |
87 void ParseTextPage(); | |
88 bool IsParsed() const { return m_bIsParsed; } | |
89 int CharIndexFromTextIndex(int TextIndex) const; | |
90 int TextIndexFromCharIndex(int CharIndex) const; | |
91 int CountChars() const; | |
92 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; | |
93 std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; | |
94 int GetIndexAtPos(CFX_FloatPoint point, | |
95 FX_FLOAT xTolerance, | |
96 FX_FLOAT yTolerance) const; | |
97 int GetIndexAtPos(FX_FLOAT x, | |
98 FX_FLOAT y, | |
99 FX_FLOAT xTolerance, | |
100 FX_FLOAT yTolerance) const; | |
101 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | |
102 CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | |
103 int CountRects(int start, int nCount); | |
104 void GetRect(int rectIndex, | |
105 FX_FLOAT& left, | |
106 FX_FLOAT& top, | |
107 FX_FLOAT& right, | |
108 FX_FLOAT& bottom) const; | |
109 | |
110 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | |
111 const CFX_FloatRect& rect2); | |
112 | |
113 private: | |
114 enum class TextOrientation { | |
115 Unknown, | |
116 Horizontal, | |
117 Vertical, | |
118 }; | |
119 | |
120 enum class GenerateCharacter { | |
121 None, | |
122 Space, | |
123 LineBreak, | |
124 Hyphen, | |
125 }; | |
126 | |
127 FX_BOOL IsHyphen(FX_WCHAR curChar); | |
128 bool IsControlChar(const PAGECHAR_INFO& charInfo); | |
129 void ProcessObject(); | |
130 void ProcessFormObject(CPDF_FormObject* pFormObj, | |
131 const CFX_Matrix& formMatrix); | |
132 void ProcessTextObject(PDFTEXT_Obj pObj); | |
133 void ProcessTextObject(CPDF_TextObject* pTextObj, | |
134 const CFX_Matrix& formMatrix, | |
135 const CPDF_PageObjectList* pObjList, | |
136 CPDF_PageObjectList::const_iterator ObjPos); | |
137 GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj, | |
138 const CFX_Matrix& formMatrix); | |
139 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); | |
140 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | |
141 const CPDF_PageObjectList* pObjList, | |
142 CPDF_PageObjectList::const_iterator ObjPos); | |
143 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, | |
144 CPDF_TextObject* pTextObj2); | |
145 int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const; | |
146 void CloseTempLine(); | |
147 FPDFText_MarkedContent PreMarkedContent(PDFTEXT_Obj pObj); | |
148 void ProcessMarkedContent(PDFTEXT_Obj pObj); | |
149 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; | |
150 void FindPreviousTextObject(); | |
151 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
152 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
153 TextOrientation GetTextObjectWritingMode( | |
154 const CPDF_TextObject* pTextObj) const; | |
155 TextOrientation FindTextlineFlowOrientation() const; | |
156 void AppendGeneratedCharacter(FX_WCHAR unicode, const CFX_Matrix& formMatrix); | |
157 | |
158 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | |
159 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | |
160 const CPDF_Font* pFont, | |
161 int nItems) const; | |
162 | |
163 const CPDF_Page* const m_pPage; | |
164 std::vector<uint16_t> m_CharIndex; | |
165 std::deque<PAGECHAR_INFO> m_CharList; | |
166 std::deque<PAGECHAR_INFO> m_TempCharList; | |
167 CFX_WideTextBuf m_TextBuf; | |
168 CFX_WideTextBuf m_TempTextBuf; | |
169 const FPDFText_Direction m_parserflag; | |
170 CPDF_TextObject* m_pPreTextObj; | |
171 CFX_Matrix m_perMatrix; | |
172 bool m_bIsParsed; | |
173 CFX_Matrix m_DisplayMatrix; | |
174 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; | |
175 std::vector<CFX_FloatRect> m_SelRects; | |
176 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; | |
177 TextOrientation m_TextlineDir; | |
178 CFX_FloatRect m_CurlineRect; | |
179 }; | |
180 | |
181 #endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
OLD | NEW |