OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
9 | 9 |
10 class CPDF_TextPage; | 10 class CPDF_TextPage; |
(...skipping 30 matching lines...) Expand all Loading... |
41 CPDF_TextObject* m_pTextObj; | 41 CPDF_TextObject* m_pTextObj; |
42 CFX_AffineMatrix m_formMatrix; | 42 CFX_AffineMatrix m_formMatrix; |
43 } PDFTEXT_Obj; | 43 } PDFTEXT_Obj; |
44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
45 class CPDF_TextPage: public IPDF_TextPage | 45 class CPDF_TextPage: public IPDF_TextPage |
46 { | 46 { |
47 public: | 47 public: |
48 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
49 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 49 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
50 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 50 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
51 virtual FX_BOOL» » » » » ParseTextPage(); | 51 virtual bool» » » » » ParseTextPage(); |
52 virtual void» » » » » NormalizeObjects(FX_BOOL
bNormalize); | 52 virtual void» » » » » NormalizeObjects(bool bN
ormalize); |
53 virtual» FX_BOOL»» » » » IsParsered() const | 53 virtual» bool» » » » » IsParsered() const |
54 { | 54 { |
55 return m_IsParsered; | 55 return m_IsParsered; |
56 } | 56 } |
57 virtual ~CPDF_TextPage() {}; | 57 virtual ~CPDF_TextPage() {}; |
58 public: | 58 public: |
59 virtual int CharIndexFromTextIndex(int TextIndex)const ; | 59 virtual int CharIndexFromTextIndex(int TextIndex)const ; |
60 virtual int TextIndexFromCharIndex(int CharIndex)const; | 60 virtual int TextIndexFromCharIndex(int CharIndex)const; |
61 virtual int CountChars() const; | 61 virtual int CountChars() const; |
62 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; | 62 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; |
63 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; | 63 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; |
64 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; | 64 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; |
65 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, | 65 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, |
66 FX_FLOAT yTorelance) const; | 66 FX_FLOAT yTorelance) const; |
67 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; | 67 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; |
68 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; | 68 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; |
69 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; | 69 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; |
70 | 70 |
71 virtual int CountRects(int start, in
t nCount); | 71 virtual int CountRects(int start, in
t nCount); |
72 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top | 72 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top |
73 , FX_FLOAT& right, FX_FLOAT &bottom)
const; | 73 , FX_FLOAT& right, FX_FLOAT &bottom)
const; |
74 virtual FX_BOOL» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); | 74 virtual bool» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); |
75 virtual FX_BOOL» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); | 75 virtual bool» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); |
76 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, | 76 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, |
77 FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); | 77 FX_FLOAT right, FX_FLOAT bottom, bool bContains = false); |
78 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 78 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
79 virtual int GetWordBreak(int index,
int direction) const; | 79 virtual int GetWordBreak(int index,
int direction) const; |
80 public: | 80 public: |
81 const PAGECHAR_InfoArray* GetCharList() const | 81 const PAGECHAR_InfoArray* GetCharList() const |
82 { | 82 { |
83 return &m_charList; | 83 return &m_charList; |
84 } | 84 } |
85 static» FX_BOOL»» » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); | 85 static» bool» » » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); |
86 static» FX_BOOL»» » » » IsLetter(FX_WCHAR unicod
e); | 86 static» bool» » » » » IsLetter(FX_WCHAR unicod
e); |
87 private: | 87 private: |
88 FX_BOOL» » » » » » » IsHyphen(FX_WCHA
R curChar); | 88 bool» » » » » » » IsHyphen(FX_WCHA
R curChar); |
89 bool IsControlChar(co
nst PAGECHAR_INFO& charInfo); | 89 bool IsControlChar(co
nst PAGECHAR_INFO& charInfo); |
90 FX_BOOL» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); | 90 bool» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); |
91 void ProcessObject(); | 91 void ProcessObject(); |
92 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); | 92 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); |
93 void ProcessTextObjec
t(PDFTEXT_Obj pObj); | 93 void ProcessTextObjec
t(PDFTEXT_Obj pObj); |
94 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); | 94 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); |
95 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); | 95 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); |
96 FX_BOOL» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); | 96 bool» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); |
97 FX_BOOL» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); | 97 bool» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
98 FX_BOOL» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); | 98 bool» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); |
99 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; | 99 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; |
100 void CloseTempLine(); | 100 void CloseTempLine(); |
101 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); | 101 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); |
102 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | 102 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
103 void ProcessMarkedContent(PDFTEXT_Obj pObj); | 103 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
104 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; | 104 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; |
105 void FindPreviousTextObject(void); | 105 void FindPreviousTextObject(void); |
106 void AddCharInfoByLRDirection(CFX_WideString& str, int i); | 106 void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
107 void AddCharInfoByRLDirection(CFX_WideString& str, int i); | 107 void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
108 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 108 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
109 int32_t FindTextlineFlowDirection(); | 109 int32_t FindTextlineFlowDirection(); |
110 void SwapTempTextBuf(int32_t iCharListStartAppend, | 110 void SwapTempTextBuf(int32_t iCharListStartAppend, |
111 int32_t iBufStartAppend); | 111 int32_t iBufStartAppend); |
112 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 112 bool IsRightToLeft(const CPDF_TextObject* pTextObj, |
113 const CPDF_Font* pFont, | 113 const CPDF_Font* pFont, |
114 int nItems) const; | 114 int nItems) const; |
115 protected: | 115 protected: |
116 CPDFText_ParseOptions m_ParseOptions; | 116 CPDFText_ParseOptions m_ParseOptions; |
117 CFX_WordArray m_CharIndex; | 117 CFX_WordArray m_CharIndex; |
118 const CPDF_PageObjects* m_pPage; | 118 const CPDF_PageObjects* m_pPage; |
119 PAGECHAR_InfoArray m_charList; | 119 PAGECHAR_InfoArray m_charList; |
120 CFX_WideTextBuf m_TextBuf; | 120 CFX_WideTextBuf m_TextBuf; |
121 PAGECHAR_InfoArray m_TempCharList; | 121 PAGECHAR_InfoArray m_TempCharList; |
122 CFX_WideTextBuf m_TempTextBuf; | 122 CFX_WideTextBuf m_TempTextBuf; |
123 int m_parserflag; | 123 int m_parserflag; |
124 CPDF_TextObject* m_pPreTextObj; | 124 CPDF_TextObject* m_pPreTextObj; |
125 CFX_AffineMatrix m_perMatrix; | 125 CFX_AffineMatrix m_perMatrix; |
126 FX_BOOL» » » » » » » m_IsParsered; | 126 bool» » » » » » » m_IsParsered; |
127 CFX_AffineMatrix m_DisplayMatrix; | 127 CFX_AffineMatrix m_DisplayMatrix; |
128 | 128 |
129 SEGMENT_Array m_Segment; | 129 SEGMENT_Array m_Segment; |
130 CFX_RectArray m_SelRects; | 130 CFX_RectArray m_SelRects; |
131 LINEOBJ m_LineObj; | 131 LINEOBJ m_LineObj; |
132 FX_BOOL» » » » » » » m_TextlineDir; | 132 bool» » » » » » » m_TextlineDir; |
133 CFX_FloatRect m_CurlineRect; | 133 CFX_FloatRect m_CurlineRect; |
134 }; | 134 }; |
135 class CPDF_TextPageFind: public IPDF_TextPageFind | 135 class CPDF_TextPageFind: public IPDF_TextPageFind |
136 { | 136 { |
137 public: | 137 public: |
138 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 138 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
139 virtual ~CPDF_TextPageFi
nd() {}; | 139 virtual ~CPDF_TextPageFi
nd() {}; |
140 public: | 140 public: |
141 virtual» FX_BOOL»» » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); | 141 virtual» bool» » » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); |
142 virtual» FX_BOOL»» » » » FindNext(); | 142 virtual» bool» » » » » FindNext(); |
143 virtual» FX_BOOL»» » » » FindPrev(); | 143 virtual» bool» » » » » FindPrev(); |
144 | 144 |
145 virtual void GetRectArray(CFX_RectArr
ay& rects) const; | 145 virtual void GetRectArray(CFX_RectArr
ay& rects) const; |
146 virtual int GetCurOrder() const; | 146 virtual int GetCurOrder() const; |
147 virtual int GetMatchedCount()const; | 147 virtual int GetMatchedCount()const; |
148 protected: | 148 protected: |
149 void ExtractFindWhat(
const CFX_WideString& findwhat); | 149 void ExtractFindWhat(
const CFX_WideString& findwhat); |
150 FX_BOOL» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); | 150 bool» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); |
151 FX_BOOL» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, | 151 bool» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, |
152 int iSubString, FX_WCHAR chSep); | 152 int iSubString, FX_WCHAR chSep); |
153 CFX_WideString MakeReverse(const CFX_Wi
deString& str); | 153 CFX_WideString MakeReverse(const CFX_Wi
deString& str); |
154 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); | 154 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); |
155 int GetCharIndex(int
index) const; | 155 int GetCharIndex(int
index) const; |
156 private: | 156 private: |
157 CFX_WordArray m_CharIndex; | 157 CFX_WordArray m_CharIndex; |
158 const IPDF_TextPage* m_pTextPage; | 158 const IPDF_TextPage* m_pTextPage; |
159 CFX_WideString m_strText; | 159 CFX_WideString m_strText; |
160 CFX_WideString m_findWhat; | 160 CFX_WideString m_findWhat; |
161 int m_flags; | 161 int m_flags; |
162 CFX_WideStringArray m_csFindWhatArray; | 162 CFX_WideStringArray m_csFindWhatArray; |
163 int m_findNextStart; | 163 int m_findNextStart; |
164 int m_findPreStart; | 164 int m_findPreStart; |
165 FX_BOOL» » » » » » » m_bMatchCase; | 165 bool» » » » » » » m_bMatchCase; |
166 FX_BOOL» » » » » » » m_bMatchWholeWor
d; | 166 bool» » » » » » » m_bMatchWholeWor
d; |
167 int m_resStart; | 167 int m_resStart; |
168 int m_resEnd; | 168 int m_resEnd; |
169 CFX_RectArray m_resArray; | 169 CFX_RectArray m_resArray; |
170 FX_BOOL» » » » » » » m_IsFind; | 170 bool» » » » » » » m_IsFind; |
171 }; | 171 }; |
172 class CPDF_LinkExt | 172 class CPDF_LinkExt |
173 { | 173 { |
174 public: | 174 public: |
175 CPDF_LinkExt() {}; | 175 CPDF_LinkExt() {}; |
176 int m_Start; | 176 int m_Start; |
177 int m_Count; | 177 int m_Count; |
178 CFX_WideString m_strUrl; | 178 CFX_WideString m_strUrl; |
179 virtual ~CPDF_LinkExt()
{}; | 179 virtual ~CPDF_LinkExt()
{}; |
180 }; | 180 }; |
181 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 181 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
182 class CPDF_LinkExtract: public IPDF_LinkExtract | 182 class CPDF_LinkExtract: public IPDF_LinkExtract |
183 { | 183 { |
184 public: | 184 public: |
185 CPDF_LinkExtract(); | 185 CPDF_LinkExtract(); |
186 virtual ~CPDF_LinkExtrac
t(); | 186 virtual ~CPDF_LinkExtrac
t(); |
187 virtual FX_BOOL» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); | 187 virtual bool» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); |
188 virtual» FX_BOOL»» » » » IsExtract() const | 188 virtual» bool» » » » » IsExtract() const |
189 { | 189 { |
190 return m_IsParserd; | 190 return m_IsParserd; |
191 } | 191 } |
192 public: | 192 public: |
193 virtual int CountLinks() const; | 193 virtual int CountLinks() const; |
194 virtual CFX_WideString GetURL(int index) const; | 194 virtual CFX_WideString GetURL(int index) const; |
195 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 195 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
196 virtual void GetRects(int index, CFX_
RectArray& rects)const; | 196 virtual void GetRects(int index, CFX_
RectArray& rects)const; |
197 protected: | 197 protected: |
198 void parserLink(); | 198 void parserLink(); |
199 void DeleteLinkList()
; | 199 void DeleteLinkList()
; |
200 FX_BOOL» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); | 200 bool» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); |
201 FX_BOOL» » » » » » » CheckMailLink(CF
X_WideString& str); | 201 bool» » » » » » » CheckMailLink(CF
X_WideString& str); |
202 FX_BOOL» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); | 202 bool» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); |
203 private: | 203 private: |
204 LINK_InfoArray m_LinkList; | 204 LINK_InfoArray m_LinkList; |
205 const CPDF_TextPage* m_pTextPage; | 205 const CPDF_TextPage* m_pTextPage; |
206 CFX_WideString m_strPageText; | 206 CFX_WideString m_strPageText; |
207 FX_BOOL» » » » » » » m_IsParserd; | 207 bool» » » » » » » m_IsParserd; |
208 }; | 208 }; |
209 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 209 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
210 void NormalizeString(CFX_WideString& str); | 210 void NormalizeString(CFX_WideString& str); |
211 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 211 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
212 | 212 |
213 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 213 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
OLD | NEW |