| OLD | NEW |
| (Empty) | |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 |
| 7 #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 8 #define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| 9 |
| 10 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" |
| 11 #include "core/fxcrt/include/fx_basic.h" |
| 12 #include "core/fxcrt/include/fx_coordinates.h" |
| 13 #include "core/fxcrt/include/fx_string.h" |
| 14 |
| 15 class CFX_BidiChar; |
| 16 class CPDF_Font; |
| 17 class CPDF_FormObject; |
| 18 class CPDF_Page; |
| 19 class CPDF_TextObject; |
| 20 |
| 21 struct FPDF_CHAR_INFO { |
| 22 FX_WCHAR m_Unicode; |
| 23 FX_WCHAR m_Charcode; |
| 24 int32_t m_Flag; |
| 25 FX_FLOAT m_FontSize; |
| 26 FX_FLOAT m_OriginX; |
| 27 FX_FLOAT m_OriginY; |
| 28 CFX_FloatRect m_CharBox; |
| 29 CPDF_TextObject* m_pTextObj; |
| 30 CFX_Matrix m_Matrix; |
| 31 }; |
| 32 |
| 33 struct FPDF_SEGMENT { |
| 34 int m_Start; |
| 35 int m_nCount; |
| 36 }; |
| 37 |
| 38 struct PAGECHAR_INFO { |
| 39 int m_CharCode; |
| 40 FX_WCHAR m_Unicode; |
| 41 FX_FLOAT m_OriginX; |
| 42 FX_FLOAT m_OriginY; |
| 43 int32_t m_Flag; |
| 44 CFX_FloatRect m_CharBox; |
| 45 CPDF_TextObject* m_pTextObj; |
| 46 CFX_Matrix m_Matrix; |
| 47 int m_Index; |
| 48 }; |
| 49 |
| 50 struct PDFTEXT_Obj { |
| 51 CPDF_TextObject* m_pTextObj; |
| 52 CFX_Matrix m_formMatrix; |
| 53 }; |
| 54 |
| 55 class CPDF_TextPage { |
| 56 public: |
| 57 CPDF_TextPage(const CPDF_Page* pPage, int flags); |
| 58 ~CPDF_TextPage() {} |
| 59 |
| 60 // IPDF_TextPage: |
| 61 void ParseTextPage(); |
| 62 bool IsParsed() const { return m_bIsParsed; } |
| 63 int CharIndexFromTextIndex(int TextIndex) const; |
| 64 int TextIndexFromCharIndex(int CharIndex) const; |
| 65 int CountChars() const; |
| 66 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; |
| 67 void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; |
| 68 int GetIndexAtPos(CFX_FloatPoint point, |
| 69 FX_FLOAT xTolerance, |
| 70 FX_FLOAT yTolerance) const; |
| 71 int GetIndexAtPos(FX_FLOAT x, |
| 72 FX_FLOAT y, |
| 73 FX_FLOAT xTolerance, |
| 74 FX_FLOAT yTolerance) const; |
| 75 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; |
| 76 void GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 77 CFX_RectArray& resRectArray) const; |
| 78 CFX_WideString GetPageText(int start = 0, int nCount = -1) const; |
| 79 int CountRects(int start, int nCount); |
| 80 void GetRect(int rectIndex, |
| 81 FX_FLOAT& left, |
| 82 FX_FLOAT& top, |
| 83 FX_FLOAT& right, |
| 84 FX_FLOAT& bottom) const; |
| 85 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); |
| 86 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); |
| 87 int CountBoundedSegments(FX_FLOAT left, |
| 88 FX_FLOAT top, |
| 89 FX_FLOAT right, |
| 90 FX_FLOAT bottom, |
| 91 FX_BOOL bContains = FALSE); |
| 92 void GetBoundedSegment(int index, int& start, int& count) const; |
| 93 int GetWordBreak(int index, int direction) const; |
| 94 |
| 95 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 96 const CFX_FloatRect& rect2); |
| 97 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 98 |
| 99 private: |
| 100 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 101 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 102 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 103 void ProcessObject(); |
| 104 void ProcessFormObject(CPDF_FormObject* pFormObj, |
| 105 const CFX_Matrix& formMatrix); |
| 106 void ProcessTextObject(PDFTEXT_Obj pObj); |
| 107 void ProcessTextObject(CPDF_TextObject* pTextObj, |
| 108 const CFX_Matrix& formMatrix, |
| 109 const CPDF_PageObjectList* pObjList, |
| 110 CPDF_PageObjectList::const_iterator ObjPos); |
| 111 int ProcessInsertObject(const CPDF_TextObject* pObj, |
| 112 const CFX_Matrix& formMatrix); |
| 113 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); |
| 114 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| 115 const CPDF_PageObjectList* pObjList, |
| 116 CPDF_PageObjectList::const_iterator ObjPos); |
| 117 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, |
| 118 CPDF_TextObject* pTextObj2); |
| 119 int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const; |
| 120 void CloseTempLine(); |
| 121 void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); |
| 122 int32_t PreMarkedContent(PDFTEXT_Obj pObj); |
| 123 void ProcessMarkedContent(PDFTEXT_Obj pObj); |
| 124 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; |
| 125 void FindPreviousTextObject(void); |
| 126 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
| 127 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); |
| 128 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
| 129 int32_t FindTextlineFlowDirection(); |
| 130 |
| 131 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); |
| 132 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 133 const CPDF_Font* pFont, |
| 134 int nItems) const; |
| 135 |
| 136 const CPDF_Page* const m_pPage; |
| 137 std::vector<uint16_t> m_CharIndex; |
| 138 std::deque<PAGECHAR_INFO> m_CharList; |
| 139 std::deque<PAGECHAR_INFO> m_TempCharList; |
| 140 CFX_WideTextBuf m_TextBuf; |
| 141 CFX_WideTextBuf m_TempTextBuf; |
| 142 const int m_parserflag; |
| 143 CPDF_TextObject* m_pPreTextObj; |
| 144 CFX_Matrix m_perMatrix; |
| 145 bool m_bIsParsed; |
| 146 CFX_Matrix m_DisplayMatrix; |
| 147 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
| 148 CFX_RectArray m_SelRects; |
| 149 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
| 150 int32_t m_TextlineDir; |
| 151 CFX_FloatRect m_CurlineRect; |
| 152 }; |
| 153 |
| 154 #endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ |
| OLD | NEW |