Index: core/src/fpdftext/text_int.h |
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h |
index 39dc721eee44354f7b5634b9317c03a9ce6a2a37..d670134622715b4c494c0c4fcd13d88c4daf7b47 100644 |
--- a/core/src/fpdftext/text_int.h |
+++ b/core/src/fpdftext/text_int.h |
@@ -1,213 +1,234 @@ |
// Copyright 2014 PDFium Authors. All rights reserved. |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
- |
+ |
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
#ifndef _PDF_TEXT_INT_H_ |
#define _PDF_TEXT_INT_H_ |
-class CPDF_TextParseOptions : public CFX_Object |
-{ |
-public: |
- CPDF_TextParseOptions(); |
- FX_BOOL m_bCheckObjectOrder; |
- FX_BOOL m_bCheckDirection; |
- int m_nCheckSameObject; |
+class CPDF_TextParseOptions : public CFX_Object { |
+ public: |
+ CPDF_TextParseOptions(); |
+ FX_BOOL m_bCheckObjectOrder; |
+ FX_BOOL m_bCheckDirection; |
+ int m_nCheckSameObject; |
}; |
class CPDF_TextPage; |
class CPDF_LinkExtract; |
class CPDF_TextPageFind; |
class CPDF_DocProgressiveSearch; |
-#define FPDFTEXT_CHAR_ERROR -1 |
-#define FPDFTEXT_CHAR_NORMAL 0 |
-#define FPDFTEXT_CHAR_GENERATED 1 |
-#define FPDFTEXT_CHAR_UNUNICODE 2 |
-#define FPDFTEXT_CHAR_HYPHEN 3 |
-#define FPDFTEXT_CHAR_PIECE 4 |
-#define FPDFTEXT_MC_PASS 0 |
-#define FPDFTEXT_MC_DONE 1 |
-#define FPDFTEXT_MC_DELAY 2 |
-typedef struct _PAGECHAR_INFO: public CFX_Object { |
- int m_CharCode; |
- FX_WCHAR m_Unicode; |
- FX_FLOAT m_OriginX; |
- FX_FLOAT m_OriginY; |
- FX_INT32 m_Flag; |
- CFX_FloatRect m_CharBox; |
- CPDF_TextObject* m_pTextObj; |
- CFX_AffineMatrix m_Matrix; |
- int m_Index; |
+#define FPDFTEXT_CHAR_ERROR -1 |
+#define FPDFTEXT_CHAR_NORMAL 0 |
+#define FPDFTEXT_CHAR_GENERATED 1 |
+#define FPDFTEXT_CHAR_UNUNICODE 2 |
+#define FPDFTEXT_CHAR_HYPHEN 3 |
+#define FPDFTEXT_CHAR_PIECE 4 |
+#define FPDFTEXT_MC_PASS 0 |
+#define FPDFTEXT_MC_DONE 1 |
+#define FPDFTEXT_MC_DELAY 2 |
+typedef struct _PAGECHAR_INFO : public CFX_Object { |
+ int m_CharCode; |
+ FX_WCHAR m_Unicode; |
+ FX_FLOAT m_OriginX; |
+ FX_FLOAT m_OriginY; |
+ FX_INT32 m_Flag; |
+ CFX_FloatRect m_CharBox; |
+ CPDF_TextObject* m_pTextObj; |
+ CFX_AffineMatrix m_Matrix; |
+ int m_Index; |
} PAGECHAR_INFO; |
-typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; |
+typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; |
typedef struct { |
- int m_Start; |
- int m_nCount; |
+ int m_Start; |
+ int m_nCount; |
} FPDF_SEGMENT; |
typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; |
typedef struct { |
- CPDF_TextObject* m_pTextObj; |
- CFX_AffineMatrix m_formMatrix; |
+ CPDF_TextObject* m_pTextObj; |
+ CFX_AffineMatrix m_formMatrix; |
} PDFTEXT_Obj; |
typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; |
-class CPDF_TextPage: public IPDF_TextPage |
-{ |
-public: |
- CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
- CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
- CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
- virtual FX_BOOL ParseTextPage(); |
- virtual void NormalizeObjects(FX_BOOL bNormalize); |
- virtual FX_BOOL IsParsered() const |
- { |
- return m_IsParsered; |
- } |
- virtual ~CPDF_TextPage() {}; |
-public: |
- virtual int CharIndexFromTextIndex(int TextIndex)const ; |
- virtual int TextIndexFromCharIndex(int CharIndex)const; |
- virtual int CountChars() const; |
- virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const; |
- virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; |
- virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; |
- virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, |
- FX_FLOAT yTorelance) const; |
- virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const; |
- virtual void GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const; |
- virtual int GetOrderByDirection(int order, int direction) const; |
- virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
+class CPDF_TextPage : public IPDF_TextPage { |
+ public: |
+ CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); |
+ CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); |
+ CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); |
+ virtual FX_BOOL ParseTextPage(); |
+ virtual void NormalizeObjects(FX_BOOL bNormalize); |
+ virtual FX_BOOL IsParsered() const { return m_IsParsered; } |
+ virtual ~CPDF_TextPage(){}; |
+ |
+ public: |
+ virtual int CharIndexFromTextIndex(int TextIndex) const; |
+ virtual int TextIndexFromCharIndex(int CharIndex) const; |
+ virtual int CountChars() const; |
+ virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const; |
+ virtual void GetRectArray(int start, |
+ int nCount, |
+ CFX_RectArray& rectArray) const; |
+ virtual int GetIndexAtPos(CPDF_Point point, |
+ FX_FLOAT xTorelance, |
+ FX_FLOAT yTorelance) const; |
+ virtual int GetIndexAtPos(FX_FLOAT x, |
+ FX_FLOAT y, |
+ FX_FLOAT xTorelance, |
+ FX_FLOAT yTorelance) const; |
+ virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const; |
+ virtual void GetRectsArrayByRect(CFX_FloatRect rect, |
+ CFX_RectArray& resRectArray) const; |
+ virtual int GetOrderByDirection(int order, int direction) const; |
+ virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
+ |
+ virtual int CountRects(int start, int nCount); |
+ virtual void GetRect(int rectIndex, |
+ FX_FLOAT& left, |
+ FX_FLOAT& top, |
+ FX_FLOAT& right, |
+ FX_FLOAT& bottom) const; |
+ virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
+ virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate); |
+ virtual int CountBoundedSegments(FX_FLOAT left, |
+ FX_FLOAT top, |
+ FX_FLOAT right, |
+ FX_FLOAT bottom, |
+ FX_BOOL bContains = FALSE); |
+ virtual void GetBoundedSegment(int index, int& start, int& count) const; |
+ virtual int GetWordBreak(int index, int direction) const; |
- virtual int CountRects(int start, int nCount); |
- virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top |
- , FX_FLOAT& right, FX_FLOAT &bottom) const; |
- virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
- virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate); |
- virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, |
- FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); |
- virtual void GetBoundedSegment(int index, int& start, int& count) const; |
- virtual int GetWordBreak(int index, int direction) const; |
-public: |
- const PAGECHAR_InfoArray* GetCharList() const |
- { |
- return &m_charList; |
- } |
- static FX_BOOL IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2); |
- static FX_BOOL IsLetter(FX_WCHAR unicode); |
-private: |
- FX_BOOL IsHyphen(FX_WCHAR curChar); |
- FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo); |
- FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
- void ProcessObject(); |
- void ProcessFormObject(CPDF_FormObject* pFormObj, CFX_AffineMatrix formMatrix); |
- void ProcessTextObject(PDFTEXT_Obj pObj); |
- void ProcessTextObject(CPDF_TextObject* pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos); |
- int ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix); |
- FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); |
- FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
- FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); |
- int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; |
- void CloseTempLine(); |
- void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str); |
- FX_INT32 PreMarkedContent(PDFTEXT_Obj pObj); |
- void ProcessMarkedContent(PDFTEXT_Obj pObj); |
- void CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const; |
- void FindPreviousTextObject(void); |
- void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
- void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
- FX_INT32 GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
- FX_INT32 FindTextlineFlowDirection(); |
-protected: |
- CPDFText_ParseOptions m_ParseOptions; |
- CFX_WordArray m_CharIndex; |
- const CPDF_PageObjects* m_pPage; |
- PAGECHAR_InfoArray m_charList; |
- CFX_WideTextBuf m_TextBuf; |
- PAGECHAR_InfoArray m_TempCharList; |
- CFX_WideTextBuf m_TempTextBuf; |
- int m_parserflag; |
- CPDF_TextObject* m_pPreTextObj; |
- CFX_AffineMatrix m_perMatrix; |
- FX_BOOL m_IsParsered; |
- CFX_AffineMatrix m_DisplayMatrix; |
+ public: |
+ const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } |
+ static FX_BOOL IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2); |
+ static FX_BOOL IsLetter(FX_WCHAR unicode); |
- SEGMENT_Array m_Segment; |
- CFX_RectArray m_SelRects; |
- LINEOBJ m_LineObj; |
- FX_BOOL m_TextlineDir; |
- CFX_FloatRect m_CurlineRect; |
+ private: |
+ FX_BOOL IsHyphen(FX_WCHAR curChar); |
+ FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo); |
+ FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
+ void ProcessObject(); |
+ void ProcessFormObject(CPDF_FormObject* pFormObj, |
+ CFX_AffineMatrix formMatrix); |
+ void ProcessTextObject(PDFTEXT_Obj pObj); |
+ void ProcessTextObject(CPDF_TextObject* pTextObj, |
+ CFX_AffineMatrix formMatrix, |
+ FX_POSITION ObjPos); |
+ int ProcessInsertObject(const CPDF_TextObject* pObj, |
+ CFX_AffineMatrix formMatrix); |
+ FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); |
+ FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); |
+ FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, |
+ CPDF_TextObject* pTextObj2); |
+ int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; |
+ void CloseTempLine(); |
+ void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str); |
+ FX_INT32 PreMarkedContent(PDFTEXT_Obj pObj); |
+ void ProcessMarkedContent(PDFTEXT_Obj pObj); |
+ void CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const; |
+ void FindPreviousTextObject(void); |
+ void AddCharInfoByLRDirection(CFX_WideString& str, int i); |
+ void AddCharInfoByRLDirection(CFX_WideString& str, int i); |
+ FX_INT32 GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
+ FX_INT32 FindTextlineFlowDirection(); |
+ |
+ protected: |
+ CPDFText_ParseOptions m_ParseOptions; |
+ CFX_WordArray m_CharIndex; |
+ const CPDF_PageObjects* m_pPage; |
+ PAGECHAR_InfoArray m_charList; |
+ CFX_WideTextBuf m_TextBuf; |
+ PAGECHAR_InfoArray m_TempCharList; |
+ CFX_WideTextBuf m_TempTextBuf; |
+ int m_parserflag; |
+ CPDF_TextObject* m_pPreTextObj; |
+ CFX_AffineMatrix m_perMatrix; |
+ FX_BOOL m_IsParsered; |
+ CFX_AffineMatrix m_DisplayMatrix; |
+ |
+ SEGMENT_Array m_Segment; |
+ CFX_RectArray m_SelRects; |
+ LINEOBJ m_LineObj; |
+ FX_BOOL m_TextlineDir; |
+ CFX_FloatRect m_CurlineRect; |
}; |
-class CPDF_TextPageFind: public IPDF_TextPageFind |
-{ |
-public: |
- CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
- virtual ~CPDF_TextPageFind() {}; |
-public: |
- virtual FX_BOOL FindFirst(CFX_WideString findwhat, int flags, int startPos = 0); |
- virtual FX_BOOL FindNext(); |
- virtual FX_BOOL FindPrev(); |
+class CPDF_TextPageFind : public IPDF_TextPageFind { |
+ public: |
+ CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
+ virtual ~CPDF_TextPageFind(){}; |
+ |
+ public: |
+ virtual FX_BOOL FindFirst(CFX_WideString findwhat, |
+ int flags, |
+ int startPos = 0); |
+ virtual FX_BOOL FindNext(); |
+ virtual FX_BOOL FindPrev(); |
- virtual void GetRectArray(CFX_RectArray& rects) const; |
- virtual int GetCurOrder() const; |
- virtual int GetMatchedCount()const; |
-protected: |
- void ExtractFindWhat(CFX_WideString findwhat); |
- FX_BOOL IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos); |
- FX_BOOL ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString, |
- int iSubString, FX_WCHAR chSep); |
- CFX_WideString MakeReverse(const CFX_WideString str); |
- int ReverseFind(CFX_WideString csPageText, CFX_WideString csWord, int nStartPos, int& WordLength); |
- int GetCharIndex(int index) const; |
-private: |
- CFX_WordArray m_CharIndex; |
- const IPDF_TextPage* m_pTextPage; |
- CFX_WideString m_strText; |
- CFX_WideString m_findWhat; |
- int m_flags; |
- CFX_WideStringArray m_csFindWhatArray; |
- int m_findNextStart; |
- int m_findPreStart; |
- FX_BOOL m_bMatchCase; |
- FX_BOOL m_bMatchWholeWord; |
- int m_resStart; |
- int m_resEnd; |
- CFX_RectArray m_resArray; |
- FX_BOOL m_IsFind; |
+ virtual void GetRectArray(CFX_RectArray& rects) const; |
+ virtual int GetCurOrder() const; |
+ virtual int GetMatchedCount() const; |
+ |
+ protected: |
+ void ExtractFindWhat(CFX_WideString findwhat); |
+ FX_BOOL IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos); |
+ FX_BOOL ExtractSubString(CFX_WideString& rString, |
+ FX_LPCWSTR lpszFullString, |
+ int iSubString, |
+ FX_WCHAR chSep); |
+ CFX_WideString MakeReverse(const CFX_WideString str); |
+ int ReverseFind(CFX_WideString csPageText, |
+ CFX_WideString csWord, |
+ int nStartPos, |
+ int& WordLength); |
+ int GetCharIndex(int index) const; |
+ |
+ private: |
+ CFX_WordArray m_CharIndex; |
+ const IPDF_TextPage* m_pTextPage; |
+ CFX_WideString m_strText; |
+ CFX_WideString m_findWhat; |
+ int m_flags; |
+ CFX_WideStringArray m_csFindWhatArray; |
+ int m_findNextStart; |
+ int m_findPreStart; |
+ FX_BOOL m_bMatchCase; |
+ FX_BOOL m_bMatchWholeWord; |
+ int m_resStart; |
+ int m_resEnd; |
+ CFX_RectArray m_resArray; |
+ FX_BOOL m_IsFind; |
}; |
-class CPDF_LinkExt: public CFX_Object |
-{ |
-public: |
- CPDF_LinkExt() {}; |
- int m_Start; |
- int m_Count; |
- CFX_WideString m_strUrl; |
- virtual ~CPDF_LinkExt() {}; |
+class CPDF_LinkExt : public CFX_Object { |
+ public: |
+ CPDF_LinkExt(){}; |
+ int m_Start; |
+ int m_Count; |
+ CFX_WideString m_strUrl; |
+ virtual ~CPDF_LinkExt(){}; |
}; |
typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; |
-class CPDF_LinkExtract: public IPDF_LinkExtract |
-{ |
-public: |
- CPDF_LinkExtract(); |
- virtual ~CPDF_LinkExtract(); |
- virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); |
- virtual FX_BOOL IsExtract() const |
- { |
- return m_IsParserd; |
- } |
-public: |
- virtual int CountLinks() const; |
- virtual CFX_WideString GetURL(int index) const; |
- virtual void GetBoundedSegment(int index, int& start, int& count) const; |
- virtual void GetRects(int index, CFX_RectArray& rects)const; |
-protected: |
- void parserLink(); |
- void DeleteLinkList(); |
- FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
- FX_BOOL CheckMailLink(CFX_WideString& str); |
- FX_BOOL AppendToLinkList(int start, int count, CFX_WideString strUrl); |
-private: |
- LINK_InfoArray m_LinkList; |
- const CPDF_TextPage* m_pTextPage; |
- CFX_WideString m_strPageText; |
- FX_BOOL m_IsParserd; |
+class CPDF_LinkExtract : public IPDF_LinkExtract { |
+ public: |
+ CPDF_LinkExtract(); |
+ virtual ~CPDF_LinkExtract(); |
+ virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); |
+ virtual FX_BOOL IsExtract() const { return m_IsParserd; } |
+ |
+ public: |
+ virtual int CountLinks() const; |
+ virtual CFX_WideString GetURL(int index) const; |
+ virtual void GetBoundedSegment(int index, int& start, int& count) const; |
+ virtual void GetRects(int index, CFX_RectArray& rects) const; |
+ |
+ protected: |
+ void parserLink(); |
+ void DeleteLinkList(); |
+ FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); |
+ FX_BOOL CheckMailLink(CFX_WideString& str); |
+ FX_BOOL AppendToLinkList(int start, int count, CFX_WideString strUrl); |
+ |
+ private: |
+ LINK_InfoArray m_LinkList; |
+ const CPDF_TextPage* m_pTextPage; |
+ CFX_WideString m_strPageText; |
+ FX_BOOL m_IsParserd; |
}; |
FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst); |
void NormalizeString(CFX_WideString& str); |