OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
9 | 9 |
10 class CPDF_TextParseOptions | 10 class CPDF_TextParseOptions |
11 { | 11 { |
12 public: | 12 public: |
13 CPDF_TextParseOptions(); | 13 CPDF_TextParseOptions(); |
14 FX_BOOL» » » m_bCheckObjectOrder; | 14 bool» » » m_bCheckObjectOrder; |
15 FX_BOOL» » » m_bCheckDirection; | 15 bool» » » m_bCheckDirection; |
16 int m_nCheckSameObject; | 16 int m_nCheckSameObject; |
17 }; | 17 }; |
18 class CPDF_TextPage; | 18 class CPDF_TextPage; |
19 class CPDF_LinkExtract; | 19 class CPDF_LinkExtract; |
20 class CPDF_TextPageFind; | 20 class CPDF_TextPageFind; |
21 class CPDF_DocProgressiveSearch; | 21 class CPDF_DocProgressiveSearch; |
22 #define FPDFTEXT_CHAR_ERROR -1 | 22 #define FPDFTEXT_CHAR_ERROR -1 |
23 #define FPDFTEXT_CHAR_NORMAL 0 | 23 #define FPDFTEXT_CHAR_NORMAL 0 |
24 #define FPDFTEXT_CHAR_GENERATED 1 | 24 #define FPDFTEXT_CHAR_GENERATED 1 |
25 #define FPDFTEXT_CHAR_UNUNICODE 2 | 25 #define FPDFTEXT_CHAR_UNUNICODE 2 |
(...skipping 23 matching lines...) Expand all Loading... |
49 CPDF_TextObject* m_pTextObj; | 49 CPDF_TextObject* m_pTextObj; |
50 CFX_AffineMatrix m_formMatrix; | 50 CFX_AffineMatrix m_formMatrix; |
51 } PDFTEXT_Obj; | 51 } PDFTEXT_Obj; |
52 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 52 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
53 class CPDF_TextPage: public IPDF_TextPage | 53 class CPDF_TextPage: public IPDF_TextPage |
54 { | 54 { |
55 public: | 55 public: |
56 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 56 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
57 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 57 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
58 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 58 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
59 virtual FX_BOOL» » » » » ParseTextPage(); | 59 virtual bool» » » » » ParseTextPage(); |
60 virtual void» » » » » NormalizeObjects(FX_BOOL
bNormalize); | 60 virtual void» » » » » NormalizeObjects(bool bN
ormalize); |
61 virtual» FX_BOOL»» » » » IsParsered() const | 61 virtual» bool» » » » » IsParsered() const |
62 { | 62 { |
63 return m_IsParsered; | 63 return m_IsParsered; |
64 } | 64 } |
65 virtual ~CPDF_TextPage() {}; | 65 virtual ~CPDF_TextPage() {}; |
66 public: | 66 public: |
67 virtual int CharIndexFromTextIndex(int TextIndex)const ; | 67 virtual int CharIndexFromTextIndex(int TextIndex)const ; |
68 virtual int TextIndexFromCharIndex(int CharIndex)const; | 68 virtual int TextIndexFromCharIndex(int CharIndex)const; |
69 virtual int CountChars() const; | 69 virtual int CountChars() const; |
70 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; | 70 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; |
71 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; | 71 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; |
72 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; | 72 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; |
73 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, | 73 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, |
74 FX_FLOAT yTorelance) const; | 74 FX_FLOAT yTorelance) const; |
75 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; | 75 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; |
76 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; | 76 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; |
77 virtual int GetOrderByDirect
ion(int order, int direction) const; | 77 virtual int GetOrderByDirect
ion(int order, int direction) const; |
78 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; | 78 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; |
79 | 79 |
80 virtual int CountRects(int start, in
t nCount); | 80 virtual int CountRects(int start, in
t nCount); |
81 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top | 81 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top |
82 , FX_FLOAT& right, FX_FLOAT &bottom)
const; | 82 , FX_FLOAT& right, FX_FLOAT &bottom)
const; |
83 virtual FX_BOOL» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); | 83 virtual bool» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); |
84 virtual FX_BOOL» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); | 84 virtual bool» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); |
85 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, | 85 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, |
86 FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); | 86 FX_FLOAT right, FX_FLOAT bottom, bool bContains = false); |
87 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 87 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
88 virtual int GetWordBreak(int index,
int direction) const; | 88 virtual int GetWordBreak(int index,
int direction) const; |
89 public: | 89 public: |
90 const PAGECHAR_InfoArray* GetCharList() const | 90 const PAGECHAR_InfoArray* GetCharList() const |
91 { | 91 { |
92 return &m_charList; | 92 return &m_charList; |
93 } | 93 } |
94 static» FX_BOOL»» » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); | 94 static» bool» » » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); |
95 static» FX_BOOL»» » » » IsLetter(FX_WCHAR unicod
e); | 95 static» bool» » » » » IsLetter(FX_WCHAR unicod
e); |
96 private: | 96 private: |
97 FX_BOOL» » » » » » » IsHyphen(FX_WCHA
R curChar); | 97 bool» » » » » » » IsHyphen(FX_WCHA
R curChar); |
98 FX_BOOL» » » » » » » IsControlChar(PA
GECHAR_INFO* pCharInfo); | 98 bool» » » » » » » IsControlChar(PA
GECHAR_INFO* pCharInfo); |
99 FX_BOOL» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); | 99 bool» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); |
100 void ProcessObject(); | 100 void ProcessObject(); |
101 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); | 101 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); |
102 void ProcessTextObjec
t(PDFTEXT_Obj pObj); | 102 void ProcessTextObjec
t(PDFTEXT_Obj pObj); |
103 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); | 103 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); |
104 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); | 104 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); |
105 FX_BOOL» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); | 105 bool» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); |
106 FX_BOOL» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); | 106 bool» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
107 FX_BOOL» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); | 107 bool» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); |
108 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; | 108 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; |
109 void CloseTempLine(); | 109 void CloseTempLine(); |
110 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); | 110 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); |
111 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | 111 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
112 void ProcessMarkedContent(PDFTEXT_Obj pObj); | 112 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
113 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; | 113 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; |
114 void FindPreviousTextObject(void); | 114 void FindPreviousTextObject(void); |
115 void AddCharInfoByLRDirection(CFX_WideString& str, int i); | 115 void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
116 void AddCharInfoByRLDirection(CFX_WideString& str, int i); | 116 void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
117 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 117 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
118 int32_t FindTextlineFlowDirection(); | 118 int32_t FindTextlineFlowDirection(); |
119 void SwapTempTextBuf(int32_t iCharListStartAppend, | 119 void SwapTempTextBuf(int32_t iCharListStartAppend, |
120 int32_t iBufStartAppend); | 120 int32_t iBufStartAppend); |
121 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 121 bool IsRightToLeft(const CPDF_TextObject* pTextObj, |
122 const CPDF_Font* pFont, | 122 const CPDF_Font* pFont, |
123 int nItems) const; | 123 int nItems) const; |
124 protected: | 124 protected: |
125 CPDFText_ParseOptions m_ParseOptions; | 125 CPDFText_ParseOptions m_ParseOptions; |
126 CFX_WordArray m_CharIndex; | 126 CFX_WordArray m_CharIndex; |
127 const CPDF_PageObjects* m_pPage; | 127 const CPDF_PageObjects* m_pPage; |
128 PAGECHAR_InfoArray m_charList; | 128 PAGECHAR_InfoArray m_charList; |
129 CFX_WideTextBuf m_TextBuf; | 129 CFX_WideTextBuf m_TextBuf; |
130 PAGECHAR_InfoArray m_TempCharList; | 130 PAGECHAR_InfoArray m_TempCharList; |
131 CFX_WideTextBuf m_TempTextBuf; | 131 CFX_WideTextBuf m_TempTextBuf; |
132 int m_parserflag; | 132 int m_parserflag; |
133 CPDF_TextObject* m_pPreTextObj; | 133 CPDF_TextObject* m_pPreTextObj; |
134 CFX_AffineMatrix m_perMatrix; | 134 CFX_AffineMatrix m_perMatrix; |
135 FX_BOOL» » » » » » » m_IsParsered; | 135 bool» » » » » » » m_IsParsered; |
136 CFX_AffineMatrix m_DisplayMatrix; | 136 CFX_AffineMatrix m_DisplayMatrix; |
137 | 137 |
138 SEGMENT_Array m_Segment; | 138 SEGMENT_Array m_Segment; |
139 CFX_RectArray m_SelRects; | 139 CFX_RectArray m_SelRects; |
140 LINEOBJ m_LineObj; | 140 LINEOBJ m_LineObj; |
141 FX_BOOL» » » » » » » m_TextlineDir; | 141 bool» » » » » » » m_TextlineDir; |
142 CFX_FloatRect m_CurlineRect; | 142 CFX_FloatRect m_CurlineRect; |
143 }; | 143 }; |
144 class CPDF_TextPageFind: public IPDF_TextPageFind | 144 class CPDF_TextPageFind: public IPDF_TextPageFind |
145 { | 145 { |
146 public: | 146 public: |
147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
148 virtual ~CPDF_TextPageFi
nd() {}; | 148 virtual ~CPDF_TextPageFi
nd() {}; |
149 public: | 149 public: |
150 virtual» FX_BOOL»» » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); | 150 virtual» bool» » » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); |
151 virtual» FX_BOOL»» » » » FindNext(); | 151 virtual» bool» » » » » FindNext(); |
152 virtual» FX_BOOL»» » » » FindPrev(); | 152 virtual» bool» » » » » FindPrev(); |
153 | 153 |
154 virtual void GetRectArray(CFX_RectArr
ay& rects) const; | 154 virtual void GetRectArray(CFX_RectArr
ay& rects) const; |
155 virtual int GetCurOrder() const; | 155 virtual int GetCurOrder() const; |
156 virtual int GetMatchedCount()const; | 156 virtual int GetMatchedCount()const; |
157 protected: | 157 protected: |
158 void ExtractFindWhat(
const CFX_WideString& findwhat); | 158 void ExtractFindWhat(
const CFX_WideString& findwhat); |
159 FX_BOOL» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); | 159 bool» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); |
160 FX_BOOL» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, | 160 bool» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, |
161 int iSubString, FX_WCHAR chSep); | 161 int iSubString, FX_WCHAR chSep); |
162 CFX_WideString MakeReverse(const CFX_Wi
deString& str); | 162 CFX_WideString MakeReverse(const CFX_Wi
deString& str); |
163 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); | 163 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); |
164 int GetCharIndex(int
index) const; | 164 int GetCharIndex(int
index) const; |
165 private: | 165 private: |
166 CFX_WordArray m_CharIndex; | 166 CFX_WordArray m_CharIndex; |
167 const IPDF_TextPage* m_pTextPage; | 167 const IPDF_TextPage* m_pTextPage; |
168 CFX_WideString m_strText; | 168 CFX_WideString m_strText; |
169 CFX_WideString m_findWhat; | 169 CFX_WideString m_findWhat; |
170 int m_flags; | 170 int m_flags; |
171 CFX_WideStringArray m_csFindWhatArray; | 171 CFX_WideStringArray m_csFindWhatArray; |
172 int m_findNextStart; | 172 int m_findNextStart; |
173 int m_findPreStart; | 173 int m_findPreStart; |
174 FX_BOOL» » » » » » » m_bMatchCase; | 174 bool» » » » » » » m_bMatchCase; |
175 FX_BOOL» » » » » » » m_bMatchWholeWor
d; | 175 bool» » » » » » » m_bMatchWholeWor
d; |
176 int m_resStart; | 176 int m_resStart; |
177 int m_resEnd; | 177 int m_resEnd; |
178 CFX_RectArray m_resArray; | 178 CFX_RectArray m_resArray; |
179 FX_BOOL» » » » » » » m_IsFind; | 179 bool» » » » » » » m_IsFind; |
180 }; | 180 }; |
181 class CPDF_LinkExt | 181 class CPDF_LinkExt |
182 { | 182 { |
183 public: | 183 public: |
184 CPDF_LinkExt() {}; | 184 CPDF_LinkExt() {}; |
185 int m_Start; | 185 int m_Start; |
186 int m_Count; | 186 int m_Count; |
187 CFX_WideString m_strUrl; | 187 CFX_WideString m_strUrl; |
188 virtual ~CPDF_LinkExt()
{}; | 188 virtual ~CPDF_LinkExt()
{}; |
189 }; | 189 }; |
190 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 190 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
191 class CPDF_LinkExtract: public IPDF_LinkExtract | 191 class CPDF_LinkExtract: public IPDF_LinkExtract |
192 { | 192 { |
193 public: | 193 public: |
194 CPDF_LinkExtract(); | 194 CPDF_LinkExtract(); |
195 virtual ~CPDF_LinkExtrac
t(); | 195 virtual ~CPDF_LinkExtrac
t(); |
196 virtual FX_BOOL» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); | 196 virtual bool» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); |
197 virtual» FX_BOOL»» » » » IsExtract() const | 197 virtual» bool» » » » » IsExtract() const |
198 { | 198 { |
199 return m_IsParserd; | 199 return m_IsParserd; |
200 } | 200 } |
201 public: | 201 public: |
202 virtual int CountLinks() const; | 202 virtual int CountLinks() const; |
203 virtual CFX_WideString GetURL(int index) const; | 203 virtual CFX_WideString GetURL(int index) const; |
204 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 204 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
205 virtual void GetRects(int index, CFX_
RectArray& rects)const; | 205 virtual void GetRects(int index, CFX_
RectArray& rects)const; |
206 protected: | 206 protected: |
207 void parserLink(); | 207 void parserLink(); |
208 void DeleteLinkList()
; | 208 void DeleteLinkList()
; |
209 FX_BOOL» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); | 209 bool» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); |
210 FX_BOOL» » » » » » » CheckMailLink(CF
X_WideString& str); | 210 bool» » » » » » » CheckMailLink(CF
X_WideString& str); |
211 FX_BOOL» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); | 211 bool» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); |
212 private: | 212 private: |
213 LINK_InfoArray m_LinkList; | 213 LINK_InfoArray m_LinkList; |
214 const CPDF_TextPage* m_pTextPage; | 214 const CPDF_TextPage* m_pTextPage; |
215 CFX_WideString m_strPageText; | 215 CFX_WideString m_strPageText; |
216 FX_BOOL» » » » » » » m_IsParserd; | 216 bool» » » » » » » m_IsParserd; |
217 }; | 217 }; |
218 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 218 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
219 void NormalizeString(CFX_WideString& str); | 219 void NormalizeString(CFX_WideString& str); |
220 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 220 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
221 | 221 |
222 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 222 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
OLD | NEW |