| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 9 | 9 |
| 10 class CPDF_TextParseOptions | 10 class CPDF_TextParseOptions |
| 11 { | 11 { |
| 12 public: | 12 public: |
| 13 CPDF_TextParseOptions(); | 13 CPDF_TextParseOptions(); |
| 14 FX_BOOL» » » m_bCheckObjectOrder; | 14 bool» » » m_bCheckObjectOrder; |
| 15 FX_BOOL» » » m_bCheckDirection; | 15 bool» » » m_bCheckDirection; |
| 16 int m_nCheckSameObject; | 16 int m_nCheckSameObject; |
| 17 }; | 17 }; |
| 18 class CPDF_TextPage; | 18 class CPDF_TextPage; |
| 19 class CPDF_LinkExtract; | 19 class CPDF_LinkExtract; |
| 20 class CPDF_TextPageFind; | 20 class CPDF_TextPageFind; |
| 21 class CPDF_DocProgressiveSearch; | 21 class CPDF_DocProgressiveSearch; |
| 22 #define FPDFTEXT_CHAR_ERROR -1 | 22 #define FPDFTEXT_CHAR_ERROR -1 |
| 23 #define FPDFTEXT_CHAR_NORMAL 0 | 23 #define FPDFTEXT_CHAR_NORMAL 0 |
| 24 #define FPDFTEXT_CHAR_GENERATED 1 | 24 #define FPDFTEXT_CHAR_GENERATED 1 |
| 25 #define FPDFTEXT_CHAR_UNUNICODE 2 | 25 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 49 CPDF_TextObject* m_pTextObj; | 49 CPDF_TextObject* m_pTextObj; |
| 50 CFX_AffineMatrix m_formMatrix; | 50 CFX_AffineMatrix m_formMatrix; |
| 51 } PDFTEXT_Obj; | 51 } PDFTEXT_Obj; |
| 52 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 52 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 53 class CPDF_TextPage: public IPDF_TextPage | 53 class CPDF_TextPage: public IPDF_TextPage |
| 54 { | 54 { |
| 55 public: | 55 public: |
| 56 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 56 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
| 57 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 57 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
| 58 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 58 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
| 59 virtual FX_BOOL» » » » » ParseTextPage(); | 59 virtual bool» » » » » ParseTextPage(); |
| 60 virtual void» » » » » NormalizeObjects(FX_BOOL
bNormalize); | 60 virtual void» » » » » NormalizeObjects(bool bN
ormalize); |
| 61 virtual» FX_BOOL»» » » » IsParsered() const | 61 virtual» bool» » » » » IsParsered() const |
| 62 { | 62 { |
| 63 return m_IsParsered; | 63 return m_IsParsered; |
| 64 } | 64 } |
| 65 virtual ~CPDF_TextPage() {}; | 65 virtual ~CPDF_TextPage() {}; |
| 66 public: | 66 public: |
| 67 virtual int CharIndexFromTextIndex(int TextIndex)const ; | 67 virtual int CharIndexFromTextIndex(int TextIndex)const ; |
| 68 virtual int TextIndexFromCharIndex(int CharIndex)const; | 68 virtual int TextIndexFromCharIndex(int CharIndex)const; |
| 69 virtual int CountChars() const; | 69 virtual int CountChars() const; |
| 70 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; | 70 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; |
| 71 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; | 71 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; |
| 72 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; | 72 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; |
| 73 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, | 73 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, |
| 74 FX_FLOAT yTorelance) const; | 74 FX_FLOAT yTorelance) const; |
| 75 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; | 75 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; |
| 76 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; | 76 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; |
| 77 virtual int GetOrderByDirect
ion(int order, int direction) const; | 77 virtual int GetOrderByDirect
ion(int order, int direction) const; |
| 78 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; | 78 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; |
| 79 | 79 |
| 80 virtual int CountRects(int start, in
t nCount); | 80 virtual int CountRects(int start, in
t nCount); |
| 81 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top | 81 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top |
| 82 , FX_FLOAT& right, FX_FLOAT &bottom)
const; | 82 , FX_FLOAT& right, FX_FLOAT &bottom)
const; |
| 83 virtual FX_BOOL» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); | 83 virtual bool» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); |
| 84 virtual FX_BOOL» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); | 84 virtual bool» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); |
| 85 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, | 85 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, |
| 86 FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); | 86 FX_FLOAT right, FX_FLOAT bottom, bool bContains = false); |
| 87 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 87 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
| 88 virtual int GetWordBreak(int index,
int direction) const; | 88 virtual int GetWordBreak(int index,
int direction) const; |
| 89 public: | 89 public: |
| 90 const PAGECHAR_InfoArray* GetCharList() const | 90 const PAGECHAR_InfoArray* GetCharList() const |
| 91 { | 91 { |
| 92 return &m_charList; | 92 return &m_charList; |
| 93 } | 93 } |
| 94 static» FX_BOOL»» » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); | 94 static» bool» » » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); |
| 95 static» FX_BOOL»» » » » IsLetter(FX_WCHAR unicod
e); | 95 static» bool» » » » » IsLetter(FX_WCHAR unicod
e); |
| 96 private: | 96 private: |
| 97 FX_BOOL» » » » » » » IsHyphen(FX_WCHA
R curChar); | 97 bool» » » » » » » IsHyphen(FX_WCHA
R curChar); |
| 98 FX_BOOL» » » » » » » IsControlChar(PA
GECHAR_INFO* pCharInfo); | 98 bool» » » » » » » IsControlChar(PA
GECHAR_INFO* pCharInfo); |
| 99 FX_BOOL» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); | 99 bool» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); |
| 100 void ProcessObject(); | 100 void ProcessObject(); |
| 101 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); | 101 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); |
| 102 void ProcessTextObjec
t(PDFTEXT_Obj pObj); | 102 void ProcessTextObjec
t(PDFTEXT_Obj pObj); |
| 103 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); | 103 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); |
| 104 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); | 104 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); |
| 105 FX_BOOL» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); | 105 bool» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); |
| 106 FX_BOOL» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); | 106 bool» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
| 107 FX_BOOL» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); | 107 bool» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); |
| 108 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; | 108 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; |
| 109 void CloseTempLine(); | 109 void CloseTempLine(); |
| 110 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); | 110 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); |
| 111 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | 111 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
| 112 void ProcessMarkedContent(PDFTEXT_Obj pObj); | 112 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
| 113 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; | 113 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; |
| 114 void FindPreviousTextObject(void); | 114 void FindPreviousTextObject(void); |
| 115 void AddCharInfoByLRDirection(CFX_WideString& str, int i); | 115 void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
| 116 void AddCharInfoByRLDirection(CFX_WideString& str, int i); | 116 void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
| 117 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 117 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
| 118 int32_t FindTextlineFlowDirection(); | 118 int32_t FindTextlineFlowDirection(); |
| 119 void SwapTempTextBuf(int32_t iCharListStartAppend, | 119 void SwapTempTextBuf(int32_t iCharListStartAppend, |
| 120 int32_t iBufStartAppend); | 120 int32_t iBufStartAppend); |
| 121 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 121 bool IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 122 const CPDF_Font* pFont, | 122 const CPDF_Font* pFont, |
| 123 int nItems) const; | 123 int nItems) const; |
| 124 protected: | 124 protected: |
| 125 CPDFText_ParseOptions m_ParseOptions; | 125 CPDFText_ParseOptions m_ParseOptions; |
| 126 CFX_WordArray m_CharIndex; | 126 CFX_WordArray m_CharIndex; |
| 127 const CPDF_PageObjects* m_pPage; | 127 const CPDF_PageObjects* m_pPage; |
| 128 PAGECHAR_InfoArray m_charList; | 128 PAGECHAR_InfoArray m_charList; |
| 129 CFX_WideTextBuf m_TextBuf; | 129 CFX_WideTextBuf m_TextBuf; |
| 130 PAGECHAR_InfoArray m_TempCharList; | 130 PAGECHAR_InfoArray m_TempCharList; |
| 131 CFX_WideTextBuf m_TempTextBuf; | 131 CFX_WideTextBuf m_TempTextBuf; |
| 132 int m_parserflag; | 132 int m_parserflag; |
| 133 CPDF_TextObject* m_pPreTextObj; | 133 CPDF_TextObject* m_pPreTextObj; |
| 134 CFX_AffineMatrix m_perMatrix; | 134 CFX_AffineMatrix m_perMatrix; |
| 135 FX_BOOL» » » » » » » m_IsParsered; | 135 bool» » » » » » » m_IsParsered; |
| 136 CFX_AffineMatrix m_DisplayMatrix; | 136 CFX_AffineMatrix m_DisplayMatrix; |
| 137 | 137 |
| 138 SEGMENT_Array m_Segment; | 138 SEGMENT_Array m_Segment; |
| 139 CFX_RectArray m_SelRects; | 139 CFX_RectArray m_SelRects; |
| 140 LINEOBJ m_LineObj; | 140 LINEOBJ m_LineObj; |
| 141 FX_BOOL» » » » » » » m_TextlineDir; | 141 bool» » » » » » » m_TextlineDir; |
| 142 CFX_FloatRect m_CurlineRect; | 142 CFX_FloatRect m_CurlineRect; |
| 143 }; | 143 }; |
| 144 class CPDF_TextPageFind: public IPDF_TextPageFind | 144 class CPDF_TextPageFind: public IPDF_TextPageFind |
| 145 { | 145 { |
| 146 public: | 146 public: |
| 147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 147 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
| 148 virtual ~CPDF_TextPageFi
nd() {}; | 148 virtual ~CPDF_TextPageFi
nd() {}; |
| 149 public: | 149 public: |
| 150 virtual» FX_BOOL»» » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); | 150 virtual» bool» » » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); |
| 151 virtual» FX_BOOL»» » » » FindNext(); | 151 virtual» bool» » » » » FindNext(); |
| 152 virtual» FX_BOOL»» » » » FindPrev(); | 152 virtual» bool» » » » » FindPrev(); |
| 153 | 153 |
| 154 virtual void GetRectArray(CFX_RectArr
ay& rects) const; | 154 virtual void GetRectArray(CFX_RectArr
ay& rects) const; |
| 155 virtual int GetCurOrder() const; | 155 virtual int GetCurOrder() const; |
| 156 virtual int GetMatchedCount()const; | 156 virtual int GetMatchedCount()const; |
| 157 protected: | 157 protected: |
| 158 void ExtractFindWhat(
const CFX_WideString& findwhat); | 158 void ExtractFindWhat(
const CFX_WideString& findwhat); |
| 159 FX_BOOL» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); | 159 bool» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); |
| 160 FX_BOOL» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, | 160 bool» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, |
| 161 int iSubString, FX_WCHAR chSep); | 161 int iSubString, FX_WCHAR chSep); |
| 162 CFX_WideString MakeReverse(const CFX_Wi
deString& str); | 162 CFX_WideString MakeReverse(const CFX_Wi
deString& str); |
| 163 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); | 163 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); |
| 164 int GetCharIndex(int
index) const; | 164 int GetCharIndex(int
index) const; |
| 165 private: | 165 private: |
| 166 CFX_WordArray m_CharIndex; | 166 CFX_WordArray m_CharIndex; |
| 167 const IPDF_TextPage* m_pTextPage; | 167 const IPDF_TextPage* m_pTextPage; |
| 168 CFX_WideString m_strText; | 168 CFX_WideString m_strText; |
| 169 CFX_WideString m_findWhat; | 169 CFX_WideString m_findWhat; |
| 170 int m_flags; | 170 int m_flags; |
| 171 CFX_WideStringArray m_csFindWhatArray; | 171 CFX_WideStringArray m_csFindWhatArray; |
| 172 int m_findNextStart; | 172 int m_findNextStart; |
| 173 int m_findPreStart; | 173 int m_findPreStart; |
| 174 FX_BOOL» » » » » » » m_bMatchCase; | 174 bool» » » » » » » m_bMatchCase; |
| 175 FX_BOOL» » » » » » » m_bMatchWholeWor
d; | 175 bool» » » » » » » m_bMatchWholeWor
d; |
| 176 int m_resStart; | 176 int m_resStart; |
| 177 int m_resEnd; | 177 int m_resEnd; |
| 178 CFX_RectArray m_resArray; | 178 CFX_RectArray m_resArray; |
| 179 FX_BOOL» » » » » » » m_IsFind; | 179 bool» » » » » » » m_IsFind; |
| 180 }; | 180 }; |
| 181 class CPDF_LinkExt | 181 class CPDF_LinkExt |
| 182 { | 182 { |
| 183 public: | 183 public: |
| 184 CPDF_LinkExt() {}; | 184 CPDF_LinkExt() {}; |
| 185 int m_Start; | 185 int m_Start; |
| 186 int m_Count; | 186 int m_Count; |
| 187 CFX_WideString m_strUrl; | 187 CFX_WideString m_strUrl; |
| 188 virtual ~CPDF_LinkExt()
{}; | 188 virtual ~CPDF_LinkExt()
{}; |
| 189 }; | 189 }; |
| 190 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 190 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 191 class CPDF_LinkExtract: public IPDF_LinkExtract | 191 class CPDF_LinkExtract: public IPDF_LinkExtract |
| 192 { | 192 { |
| 193 public: | 193 public: |
| 194 CPDF_LinkExtract(); | 194 CPDF_LinkExtract(); |
| 195 virtual ~CPDF_LinkExtrac
t(); | 195 virtual ~CPDF_LinkExtrac
t(); |
| 196 virtual FX_BOOL» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); | 196 virtual bool» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); |
| 197 virtual» FX_BOOL»» » » » IsExtract() const | 197 virtual» bool» » » » » IsExtract() const |
| 198 { | 198 { |
| 199 return m_IsParserd; | 199 return m_IsParserd; |
| 200 } | 200 } |
| 201 public: | 201 public: |
| 202 virtual int CountLinks() const; | 202 virtual int CountLinks() const; |
| 203 virtual CFX_WideString GetURL(int index) const; | 203 virtual CFX_WideString GetURL(int index) const; |
| 204 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 204 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
| 205 virtual void GetRects(int index, CFX_
RectArray& rects)const; | 205 virtual void GetRects(int index, CFX_
RectArray& rects)const; |
| 206 protected: | 206 protected: |
| 207 void parserLink(); | 207 void parserLink(); |
| 208 void DeleteLinkList()
; | 208 void DeleteLinkList()
; |
| 209 FX_BOOL» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); | 209 bool» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); |
| 210 FX_BOOL» » » » » » » CheckMailLink(CF
X_WideString& str); | 210 bool» » » » » » » CheckMailLink(CF
X_WideString& str); |
| 211 FX_BOOL» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); | 211 bool» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); |
| 212 private: | 212 private: |
| 213 LINK_InfoArray m_LinkList; | 213 LINK_InfoArray m_LinkList; |
| 214 const CPDF_TextPage* m_pTextPage; | 214 const CPDF_TextPage* m_pTextPage; |
| 215 CFX_WideString m_strPageText; | 215 CFX_WideString m_strPageText; |
| 216 FX_BOOL» » » » » » » m_IsParserd; | 216 bool» » » » » » » m_IsParserd; |
| 217 }; | 217 }; |
| 218 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 218 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
| 219 void NormalizeString(CFX_WideString& str); | 219 void NormalizeString(CFX_WideString& str); |
| 220 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 220 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
| 221 | 221 |
| 222 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 222 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| OLD | NEW |