| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 9 | 9 |
| 10 class CPDF_TextPage; | 10 class CPDF_TextPage; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 CPDF_TextObject* m_pTextObj; | 41 CPDF_TextObject* m_pTextObj; |
| 42 CFX_AffineMatrix m_formMatrix; | 42 CFX_AffineMatrix m_formMatrix; |
| 43 } PDFTEXT_Obj; | 43 } PDFTEXT_Obj; |
| 44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | 44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
| 45 class CPDF_TextPage: public IPDF_TextPage | 45 class CPDF_TextPage: public IPDF_TextPage |
| 46 { | 46 { |
| 47 public: | 47 public: |
| 48 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); | 48 CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
| 49 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); | 49 CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
| 50 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); | 50 CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
| 51 virtual FX_BOOL» » » » » ParseTextPage(); | 51 virtual bool» » » » » ParseTextPage(); |
| 52 virtual void» » » » » NormalizeObjects(FX_BOOL
bNormalize); | 52 virtual void» » » » » NormalizeObjects(bool bN
ormalize); |
| 53 virtual» FX_BOOL»» » » » IsParsered() const | 53 virtual» bool» » » » » IsParsered() const |
| 54 { | 54 { |
| 55 return m_IsParsered; | 55 return m_IsParsered; |
| 56 } | 56 } |
| 57 virtual ~CPDF_TextPage() {}; | 57 virtual ~CPDF_TextPage() {}; |
| 58 public: | 58 public: |
| 59 virtual int CharIndexFromTextIndex(int TextIndex)const ; | 59 virtual int CharIndexFromTextIndex(int TextIndex)const ; |
| 60 virtual int TextIndexFromCharIndex(int CharIndex)const; | 60 virtual int TextIndexFromCharIndex(int CharIndex)const; |
| 61 virtual int CountChars() const; | 61 virtual int CountChars() const; |
| 62 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; | 62 virtual void GetCharInfo(int index, F
PDF_CHAR_INFO & info) const; |
| 63 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; | 63 virtual void GetRectArray(int start,
int nCount, CFX_RectArray& rectArray) const; |
| 64 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; | 64 virtual int GetIndexAtPos(CPDF_Point
point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; |
| 65 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, | 65 virtual int GetIndexAtPos(FX_FLOAT x
, FX_FLOAT y, FX_FLOAT xTorelance, |
| 66 FX_FLOAT yTorelance) const; | 66 FX_FLOAT yTorelance) const; |
| 67 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; | 67 virtual CFX_WideString GetTextByRect(const CFX_FloatRec
t& rect) const; |
| 68 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; | 68 virtual void GetRectsArrayByRect(cons
t CFX_FloatRect& rect, CFX_RectArray& resRectArray) const; |
| 69 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; | 69 virtual CFX_WideString GetPageText(int start = 0, int n
Count = -1) const; |
| 70 | 70 |
| 71 virtual int CountRects(int start, in
t nCount); | 71 virtual int CountRects(int start, in
t nCount); |
| 72 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top | 72 virtual void GetRect(int rectIndex, F
X_FLOAT& left, FX_FLOAT& top |
| 73 , FX_FLOAT& right, FX_FLOAT &bottom)
const; | 73 , FX_FLOAT& right, FX_FLOAT &bottom)
const; |
| 74 virtual FX_BOOL» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); | 74 virtual bool» » » » » GetBaselineRotate(int re
ctIndex, int& Rotate); |
| 75 virtual FX_BOOL» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); | 75 virtual bool» » » » » GetBaselineRotate(const
CFX_FloatRect& rect, int& Rotate); |
| 76 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, | 76 virtual int CountBoundedSegm
ents(FX_FLOAT left, FX_FLOAT top, |
| 77 FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); | 77 FX_FLOAT right, FX_FLOAT bottom, bool bContains = false); |
| 78 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 78 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
| 79 virtual int GetWordBreak(int index,
int direction) const; | 79 virtual int GetWordBreak(int index,
int direction) const; |
| 80 public: | 80 public: |
| 81 const PAGECHAR_InfoArray* GetCharList() const | 81 const PAGECHAR_InfoArray* GetCharList() const |
| 82 { | 82 { |
| 83 return &m_charList; | 83 return &m_charList; |
| 84 } | 84 } |
| 85 static» FX_BOOL»» » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); | 85 static» bool» » » » » IsRectIntersect(const CF
X_FloatRect& rect1, const CFX_FloatRect& rect2); |
| 86 static» FX_BOOL»» » » » IsLetter(FX_WCHAR unicod
e); | 86 static» bool» » » » » IsLetter(FX_WCHAR unicod
e); |
| 87 private: | 87 private: |
| 88 FX_BOOL» » » » » » » IsHyphen(FX_WCHA
R curChar); | 88 bool» » » » » » » IsHyphen(FX_WCHA
R curChar); |
| 89 bool IsControlChar(co
nst PAGECHAR_INFO& charInfo); | 89 bool IsControlChar(co
nst PAGECHAR_INFO& charInfo); |
| 90 FX_BOOL» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); | 90 bool» » » » » » » GetBaselineRotat
e(int start, int end, int& Rotate); |
| 91 void ProcessObject(); | 91 void ProcessObject(); |
| 92 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); | 92 void ProcessFormObjec
t(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix); |
| 93 void ProcessTextObjec
t(PDFTEXT_Obj pObj); | 93 void ProcessTextObjec
t(PDFTEXT_Obj pObj); |
| 94 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); | 94 void ProcessTextObjec
t(CPDF_TextObject* pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITIO
N ObjPos); |
| 95 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); | 95 int ProcessInsertObj
ect(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix); |
| 96 FX_BOOL» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); | 96 bool» » » » » » » GenerateCharInfo
(FX_WCHAR unicode, PAGECHAR_INFO& info); |
| 97 FX_BOOL» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); | 97 bool» » » » » » » IsSameAsPreTextO
bject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
| 98 FX_BOOL» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); | 98 bool» » » » » » » IsSameTextObject
(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); |
| 99 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; | 99 int GetCharWidth(FX_
DWORD charCode, CPDF_Font* pFont) const; |
| 100 void CloseTempLine(); | 100 void CloseTempLine(); |
| 101 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); | 101 void OnPiece(IFX_Bidi
Char* pBidi, CFX_WideString& str); |
| 102 int32_t PreMarkedContent(PDFTEXT_Obj pObj); | 102 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
| 103 void ProcessMarkedContent(PDFTEXT_Obj pObj); | 103 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
| 104 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; | 104 void CheckMarkedContentObject(int32_t& start, int32_t& nCount
) const; |
| 105 void FindPreviousTextObject(void); | 105 void FindPreviousTextObject(void); |
| 106 void AddCharInfoByLRDirection(CFX_WideString& str, int i); | 106 void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
| 107 void AddCharInfoByRLDirection(CFX_WideString& str, int i); | 107 void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
| 108 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 108 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
| 109 int32_t FindTextlineFlowDirection(); | 109 int32_t FindTextlineFlowDirection(); |
| 110 void SwapTempTextBuf(int32_t iCharListStartAppend, | 110 void SwapTempTextBuf(int32_t iCharListStartAppend, |
| 111 int32_t iBufStartAppend); | 111 int32_t iBufStartAppend); |
| 112 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 112 bool IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 113 const CPDF_Font* pFont, | 113 const CPDF_Font* pFont, |
| 114 int nItems) const; | 114 int nItems) const; |
| 115 protected: | 115 protected: |
| 116 CPDFText_ParseOptions m_ParseOptions; | 116 CPDFText_ParseOptions m_ParseOptions; |
| 117 CFX_WordArray m_CharIndex; | 117 CFX_WordArray m_CharIndex; |
| 118 const CPDF_PageObjects* m_pPage; | 118 const CPDF_PageObjects* m_pPage; |
| 119 PAGECHAR_InfoArray m_charList; | 119 PAGECHAR_InfoArray m_charList; |
| 120 CFX_WideTextBuf m_TextBuf; | 120 CFX_WideTextBuf m_TextBuf; |
| 121 PAGECHAR_InfoArray m_TempCharList; | 121 PAGECHAR_InfoArray m_TempCharList; |
| 122 CFX_WideTextBuf m_TempTextBuf; | 122 CFX_WideTextBuf m_TempTextBuf; |
| 123 int m_parserflag; | 123 int m_parserflag; |
| 124 CPDF_TextObject* m_pPreTextObj; | 124 CPDF_TextObject* m_pPreTextObj; |
| 125 CFX_AffineMatrix m_perMatrix; | 125 CFX_AffineMatrix m_perMatrix; |
| 126 FX_BOOL» » » » » » » m_IsParsered; | 126 bool» » » » » » » m_IsParsered; |
| 127 CFX_AffineMatrix m_DisplayMatrix; | 127 CFX_AffineMatrix m_DisplayMatrix; |
| 128 | 128 |
| 129 SEGMENT_Array m_Segment; | 129 SEGMENT_Array m_Segment; |
| 130 CFX_RectArray m_SelRects; | 130 CFX_RectArray m_SelRects; |
| 131 LINEOBJ m_LineObj; | 131 LINEOBJ m_LineObj; |
| 132 FX_BOOL» » » » » » » m_TextlineDir; | 132 bool» » » » » » » m_TextlineDir; |
| 133 CFX_FloatRect m_CurlineRect; | 133 CFX_FloatRect m_CurlineRect; |
| 134 }; | 134 }; |
| 135 class CPDF_TextPageFind: public IPDF_TextPageFind | 135 class CPDF_TextPageFind: public IPDF_TextPageFind |
| 136 { | 136 { |
| 137 public: | 137 public: |
| 138 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 138 CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
| 139 virtual ~CPDF_TextPageFi
nd() {}; | 139 virtual ~CPDF_TextPageFi
nd() {}; |
| 140 public: | 140 public: |
| 141 virtual» FX_BOOL»» » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); | 141 virtual» bool» » » » » FindFirst(const CFX_Wide
String& findwhat, int flags, int startPos = 0); |
| 142 virtual» FX_BOOL»» » » » FindNext(); | 142 virtual» bool» » » » » FindNext(); |
| 143 virtual» FX_BOOL»» » » » FindPrev(); | 143 virtual» bool» » » » » FindPrev(); |
| 144 | 144 |
| 145 virtual void GetRectArray(CFX_RectArr
ay& rects) const; | 145 virtual void GetRectArray(CFX_RectArr
ay& rects) const; |
| 146 virtual int GetCurOrder() const; | 146 virtual int GetCurOrder() const; |
| 147 virtual int GetMatchedCount()const; | 147 virtual int GetMatchedCount()const; |
| 148 protected: | 148 protected: |
| 149 void ExtractFindWhat(
const CFX_WideString& findwhat); | 149 void ExtractFindWhat(
const CFX_WideString& findwhat); |
| 150 FX_BOOL» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); | 150 bool» » » » » » » IsMatchWholeWord
(const CFX_WideString& csPageText, int startPos, int endPos); |
| 151 FX_BOOL» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, | 151 bool» » » » » » » ExtractSubString
(CFX_WideString& rString, const FX_WCHAR* lpszFullString, |
| 152 int iSubString, FX_WCHAR chSep); | 152 int iSubString, FX_WCHAR chSep); |
| 153 CFX_WideString MakeReverse(const CFX_Wi
deString& str); | 153 CFX_WideString MakeReverse(const CFX_Wi
deString& str); |
| 154 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); | 154 int ReverseFind(cons
t CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int&
WordLength); |
| 155 int GetCharIndex(int
index) const; | 155 int GetCharIndex(int
index) const; |
| 156 private: | 156 private: |
| 157 CFX_WordArray m_CharIndex; | 157 CFX_WordArray m_CharIndex; |
| 158 const IPDF_TextPage* m_pTextPage; | 158 const IPDF_TextPage* m_pTextPage; |
| 159 CFX_WideString m_strText; | 159 CFX_WideString m_strText; |
| 160 CFX_WideString m_findWhat; | 160 CFX_WideString m_findWhat; |
| 161 int m_flags; | 161 int m_flags; |
| 162 CFX_WideStringArray m_csFindWhatArray; | 162 CFX_WideStringArray m_csFindWhatArray; |
| 163 int m_findNextStart; | 163 int m_findNextStart; |
| 164 int m_findPreStart; | 164 int m_findPreStart; |
| 165 FX_BOOL» » » » » » » m_bMatchCase; | 165 bool» » » » » » » m_bMatchCase; |
| 166 FX_BOOL» » » » » » » m_bMatchWholeWor
d; | 166 bool» » » » » » » m_bMatchWholeWor
d; |
| 167 int m_resStart; | 167 int m_resStart; |
| 168 int m_resEnd; | 168 int m_resEnd; |
| 169 CFX_RectArray m_resArray; | 169 CFX_RectArray m_resArray; |
| 170 FX_BOOL» » » » » » » m_IsFind; | 170 bool» » » » » » » m_IsFind; |
| 171 }; | 171 }; |
| 172 class CPDF_LinkExt | 172 class CPDF_LinkExt |
| 173 { | 173 { |
| 174 public: | 174 public: |
| 175 CPDF_LinkExt() {}; | 175 CPDF_LinkExt() {}; |
| 176 int m_Start; | 176 int m_Start; |
| 177 int m_Count; | 177 int m_Count; |
| 178 CFX_WideString m_strUrl; | 178 CFX_WideString m_strUrl; |
| 179 virtual ~CPDF_LinkExt()
{}; | 179 virtual ~CPDF_LinkExt()
{}; |
| 180 }; | 180 }; |
| 181 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; | 181 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
| 182 class CPDF_LinkExtract: public IPDF_LinkExtract | 182 class CPDF_LinkExtract: public IPDF_LinkExtract |
| 183 { | 183 { |
| 184 public: | 184 public: |
| 185 CPDF_LinkExtract(); | 185 CPDF_LinkExtract(); |
| 186 virtual ~CPDF_LinkExtrac
t(); | 186 virtual ~CPDF_LinkExtrac
t(); |
| 187 virtual FX_BOOL» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); | 187 virtual bool» » » » » ExtractLinks(const IPDF_
TextPage* pTextPage); |
| 188 virtual» FX_BOOL»» » » » IsExtract() const | 188 virtual» bool» » » » » IsExtract() const |
| 189 { | 189 { |
| 190 return m_IsParserd; | 190 return m_IsParserd; |
| 191 } | 191 } |
| 192 public: | 192 public: |
| 193 virtual int CountLinks() const; | 193 virtual int CountLinks() const; |
| 194 virtual CFX_WideString GetURL(int index) const; | 194 virtual CFX_WideString GetURL(int index) const; |
| 195 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; | 195 virtual void GetBoundedSegment(int in
dex, int& start, int& count) const; |
| 196 virtual void GetRects(int index, CFX_
RectArray& rects)const; | 196 virtual void GetRects(int index, CFX_
RectArray& rects)const; |
| 197 protected: | 197 protected: |
| 198 void parserLink(); | 198 void parserLink(); |
| 199 void DeleteLinkList()
; | 199 void DeleteLinkList()
; |
| 200 FX_BOOL» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); | 200 bool» » » » » » » CheckWebLink(CFX
_WideString& strBeCheck); |
| 201 FX_BOOL» » » » » » » CheckMailLink(CF
X_WideString& str); | 201 bool» » » » » » » CheckMailLink(CF
X_WideString& str); |
| 202 FX_BOOL» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); | 202 bool» » » » » » » AppendToLinkList
(int start, int count, const CFX_WideString& strUrl); |
| 203 private: | 203 private: |
| 204 LINK_InfoArray m_LinkList; | 204 LINK_InfoArray m_LinkList; |
| 205 const CPDF_TextPage* m_pTextPage; | 205 const CPDF_TextPage* m_pTextPage; |
| 206 CFX_WideString m_strPageText; | 206 CFX_WideString m_strPageText; |
| 207 FX_BOOL» » » » » » » m_IsParserd; | 207 bool» » » » » » » m_IsParserd; |
| 208 }; | 208 }; |
| 209 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 209 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
| 210 void NormalizeString(CFX_WideString& str); | 210 void NormalizeString(CFX_WideString& str); |
| 211 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 211 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
| 212 | 212 |
| 213 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 213 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| OLD | NEW |