Index: core/fpdftext/include/cpdf_textpage.h |
diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..19e8791b5a42f938eac3785fbcbd5f098a4e19cc |
--- /dev/null |
+++ b/core/fpdftext/include/cpdf_textpage.h |
@@ -0,0 +1,157 @@ |
+// Copyright 2016 PDFium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
+ |
+#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
+#define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
+ |
+#include <deque> |
+#include <vector> |
+ |
+#include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" |
+#include "core/fxcrt/include/fx_basic.h" |
+#include "core/fxcrt/include/fx_coordinates.h" |
+#include "core/fxcrt/include/fx_string.h" |
+ |
+class CFX_BidiChar; |
+class CPDF_Font; |
+class CPDF_FormObject; |
+class CPDF_Page; |
+class CPDF_TextObject; |
+ |
+struct FPDF_CHAR_INFO { |
+ FX_WCHAR m_Unicode; |
+ FX_WCHAR m_Charcode; |
+ int32_t m_Flag; |
+ FX_FLOAT m_FontSize; |
+ FX_FLOAT m_OriginX; |
+ FX_FLOAT m_OriginY; |
+ CFX_FloatRect m_CharBox; |
+ CPDF_TextObject* m_pTextObj; |
+ CFX_Matrix m_Matrix; |
+}; |
+ |
+struct FPDF_SEGMENT { |
+ int m_Start; |
+ int m_nCount; |
+}; |
+ |
+struct PAGECHAR_INFO { |
+ int m_CharCode; |
+ FX_WCHAR m_Unicode; |
+ FX_FLOAT m_OriginX; |
+ FX_FLOAT m_OriginY; |
+ int32_t m_Flag; |
+ CFX_FloatRect m_CharBox; |
+ CPDF_TextObject* m_pTextObj; |
+ CFX_Matrix m_Matrix; |
+ int m_Index; |
+}; |
+ |
+struct PDFTEXT_Obj { |
+ CPDF_TextObject* m_pTextObj; |
+ CFX_Matrix m_formMatrix; |
+}; |
+ |
+class CPDF_TextPage { |
+ public: |
+ CPDF_TextPage(const CPDF_Page* pPage, int flags); |
+ ~CPDF_TextPage() {} |
+ |
+ // IPDF_TextPage: |
+ void ParseTextPage(); |
+ bool IsParsed() const { return m_bIsParsed; } |
+ int CharIndexFromTextIndex(int TextIndex) const; |
+ int TextIndexFromCharIndex(int CharIndex) const; |
+ int CountChars() const; |
+ void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; |
+ void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; |
+ int GetIndexAtPos(CFX_FloatPoint point, |
+ FX_FLOAT xTolerance, |
+ FX_FLOAT yTolerance) const; |
+ int GetIndexAtPos(FX_FLOAT x, |
+ FX_FLOAT y, |
+ FX_FLOAT xTolerance, |
+ FX_FLOAT yTolerance) const; |
+ CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; |
+ void GetRectsArrayByRect(const CFX_FloatRect& rect, |
+ CFX_RectArray& resRectArray) const; |
+ CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
+ int CountRects(int start, int nCount); |
+ void GetRect(int rectIndex, |
+ FX_FLOAT& left, |
+ FX_FLOAT& top, |
+ FX_FLOAT& right, |
+ FX_FLOAT& bottom) const; |
+ FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
+ FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); |
+ int CountBoundedSegments(FX_FLOAT left, |
+ FX_FLOAT top, |
+ FX_FLOAT right, |
+ FX_FLOAT bottom, |
+ FX_BOOL bContains = FALSE); |
+ void GetBoundedSegment(int index, int& start, int& count) const; |
+ int GetWordBreak(int index, int direction) const; |
+ |
+ static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
+ const CFX_FloatRect& rect2); |
+ static FX_BOOL IsLetter(FX_WCHAR unicode); |
+ |
+ private: |
+ FX_BOOL IsHyphen(FX_WCHAR curChar); |
+ bool IsControlChar(const PAGECHAR_INFO& charInfo); |
+ FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
+ void ProcessObject(); |
+ void ProcessFormObject(CPDF_FormObject* pFormObj, |
+ const CFX_Matrix& formMatrix); |
+ void ProcessTextObject(PDFTEXT_Obj pObj); |
+ void ProcessTextObject(CPDF_TextObject* pTextObj, |
+ const CFX_Matrix& formMatrix, |
+ const CPDF_PageObjectList* pObjList, |
+ CPDF_PageObjectList::const_iterator ObjPos); |
+ int ProcessInsertObject(const CPDF_TextObject* pObj, |
+ const CFX_Matrix& formMatrix); |
+ FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); |
+ FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
+ const CPDF_PageObjectList* pObjList, |
+ CPDF_PageObjectList::const_iterator ObjPos); |
+ FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, |
+ CPDF_TextObject* pTextObj2); |
+ int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const; |
+ void CloseTempLine(); |
+ void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); |
+ int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
+ void ProcessMarkedContent(PDFTEXT_Obj pObj); |
+ void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; |
+ void FindPreviousTextObject(void); |
+ void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
+ void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
+ int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
+ int32_t FindTextlineFlowDirection(); |
+ |
+ void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); |
+ FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, |
+ const CPDF_Font* pFont, |
+ int nItems) const; |
+ |
+ const CPDF_Page* const m_pPage; |
+ std::vector<uint16_t> m_CharIndex; |
+ std::deque<PAGECHAR_INFO> m_CharList; |
+ std::deque<PAGECHAR_INFO> m_TempCharList; |
+ CFX_WideTextBuf m_TextBuf; |
+ CFX_WideTextBuf m_TempTextBuf; |
+ const int m_parserflag; |
+ CPDF_TextObject* m_pPreTextObj; |
+ CFX_Matrix m_perMatrix; |
+ bool m_bIsParsed; |
+ CFX_Matrix m_DisplayMatrix; |
+ CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
+ CFX_RectArray m_SelRects; |
+ CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
+ int32_t m_TextlineDir; |
+ CFX_FloatRect m_CurlineRect; |
+}; |
+ |
+#endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |