OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 |
| 7 #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 8 #define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 9 |
| 10 #include <deque> |
| 11 #include <vector> |
| 12 |
| 13 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" |
| 14 #include "core/fxcrt/include/fx_basic.h" |
| 15 #include "core/fxcrt/include/fx_coordinates.h" |
| 16 #include "core/fxcrt/include/fx_string.h" |
| 17 |
| 18 class CFX_BidiChar; |
| 19 class CPDF_Font; |
| 20 class CPDF_FormObject; |
| 21 class CPDF_Page; |
| 22 class CPDF_TextObject; |
| 23 |
| 24 struct FPDF_CHAR_INFO { |
| 25 FX_WCHAR m_Unicode; |
| 26 FX_WCHAR m_Charcode; |
| 27 int32_t m_Flag; |
| 28 FX_FLOAT m_FontSize; |
| 29 FX_FLOAT m_OriginX; |
| 30 FX_FLOAT m_OriginY; |
| 31 CFX_FloatRect m_CharBox; |
| 32 CPDF_TextObject* m_pTextObj; |
| 33 CFX_Matrix m_Matrix; |
| 34 }; |
| 35 |
| 36 struct FPDF_SEGMENT { |
| 37 int m_Start; |
| 38 int m_nCount; |
| 39 }; |
| 40 |
| 41 struct PAGECHAR_INFO { |
| 42 int m_CharCode; |
| 43 FX_WCHAR m_Unicode; |
| 44 FX_FLOAT m_OriginX; |
| 45 FX_FLOAT m_OriginY; |
| 46 int32_t m_Flag; |
| 47 CFX_FloatRect m_CharBox; |
| 48 CPDF_TextObject* m_pTextObj; |
| 49 CFX_Matrix m_Matrix; |
| 50 int m_Index; |
| 51 }; |
| 52 |
| 53 struct PDFTEXT_Obj { |
| 54 CPDF_TextObject* m_pTextObj; |
| 55 CFX_Matrix m_formMatrix; |
| 56 }; |
| 57 |
| 58 class CPDF_TextPage { |
| 59 public: |
| 60 CPDF_TextPage(const CPDF_Page* pPage, int flags); |
| 61 ~CPDF_TextPage() {} |
| 62 |
| 63 // IPDF_TextPage: |
| 64 void ParseTextPage(); |
| 65 bool IsParsed() const { return m_bIsParsed; } |
| 66 int CharIndexFromTextIndex(int TextIndex) const; |
| 67 int TextIndexFromCharIndex(int CharIndex) const; |
| 68 int CountChars() const; |
| 69 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; |
| 70 void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; |
| 71 int GetIndexAtPos(CFX_FloatPoint point, |
| 72 FX_FLOAT xTolerance, |
| 73 FX_FLOAT yTolerance) const; |
| 74 int GetIndexAtPos(FX_FLOAT x, |
| 75 FX_FLOAT y, |
| 76 FX_FLOAT xTolerance, |
| 77 FX_FLOAT yTolerance) const; |
| 78 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; |
| 79 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 80 CFX_RectArray& resRectArray) const; |
| 81 CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
| 82 int CountRects(int start, int nCount); |
| 83 void GetRect(int rectIndex, |
| 84 FX_FLOAT& left, |
| 85 FX_FLOAT& top, |
| 86 FX_FLOAT& right, |
| 87 FX_FLOAT& bottom) const; |
| 88 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
| 89 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); |
| 90 int CountBoundedSegments(FX_FLOAT left, |
| 91 FX_FLOAT top, |
| 92 FX_FLOAT right, |
| 93 FX_FLOAT bottom, |
| 94 FX_BOOL bContains = FALSE); |
| 95 void GetBoundedSegment(int index, int& start, int& count) const; |
| 96 int GetWordBreak(int index, int direction) const; |
| 97 |
| 98 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 99 const CFX_FloatRect& rect2); |
| 100 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 101 |
| 102 private: |
| 103 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 104 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 105 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 106 void ProcessObject(); |
| 107 void ProcessFormObject(CPDF_FormObject* pFormObj, |
| 108 const CFX_Matrix& formMatrix); |
| 109 void ProcessTextObject(PDFTEXT_Obj pObj); |
| 110 void ProcessTextObject(CPDF_TextObject* pTextObj, |
| 111 const CFX_Matrix& formMatrix, |
| 112 const CPDF_PageObjectList* pObjList, |
| 113 CPDF_PageObjectList::const_iterator ObjPos); |
| 114 int ProcessInsertObject(const CPDF_TextObject* pObj, |
| 115 const CFX_Matrix& formMatrix); |
| 116 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); |
| 117 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| 118 const CPDF_PageObjectList* pObjList, |
| 119 CPDF_PageObjectList::const_iterator ObjPos); |
| 120 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, |
| 121 CPDF_TextObject* pTextObj2); |
| 122 int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const; |
| 123 void CloseTempLine(); |
| 124 void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); |
| 125 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
| 126 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
| 127 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; |
| 128 void FindPreviousTextObject(void); |
| 129 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
| 130 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
| 131 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
| 132 int32_t FindTextlineFlowDirection(); |
| 133 |
| 134 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); |
| 135 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 136 const CPDF_Font* pFont, |
| 137 int nItems) const; |
| 138 |
| 139 const CPDF_Page* const m_pPage; |
| 140 std::vector<uint16_t> m_CharIndex; |
| 141 std::deque<PAGECHAR_INFO> m_CharList; |
| 142 std::deque<PAGECHAR_INFO> m_TempCharList; |
| 143 CFX_WideTextBuf m_TextBuf; |
| 144 CFX_WideTextBuf m_TempTextBuf; |
| 145 const int m_parserflag; |
| 146 CPDF_TextObject* m_pPreTextObj; |
| 147 CFX_Matrix m_perMatrix; |
| 148 bool m_bIsParsed; |
| 149 CFX_Matrix m_DisplayMatrix; |
| 150 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
| 151 CFX_RectArray m_SelRects; |
| 152 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
| 153 int32_t m_TextlineDir; |
| 154 CFX_FloatRect m_CurlineRect; |
| 155 }; |
| 156 |
| 157 #endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
OLD | NEW |