| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
| 8 #define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
| 9 | |
| 10 #include <deque> | |
| 11 #include <vector> | |
| 12 | |
| 13 #include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h" | |
| 14 #include "core/fxcrt/include/fx_basic.h" | |
| 15 #include "core/fxcrt/include/fx_coordinates.h" | |
| 16 #include "core/fxcrt/include/fx_string.h" | |
| 17 | |
| 18 class CPDF_Font; | |
| 19 class CPDF_FormObject; | |
| 20 class CPDF_Page; | |
| 21 class CPDF_TextObject; | |
| 22 | |
| 23 #define FPDFTEXT_MATCHCASE 0x00000001 | |
| 24 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | |
| 25 #define FPDFTEXT_CONSECUTIVE 0x00000004 | |
| 26 | |
| 27 #define FPDFTEXT_CHAR_ERROR -1 | |
| 28 #define FPDFTEXT_CHAR_NORMAL 0 | |
| 29 #define FPDFTEXT_CHAR_GENERATED 1 | |
| 30 #define FPDFTEXT_CHAR_UNUNICODE 2 | |
| 31 #define FPDFTEXT_CHAR_HYPHEN 3 | |
| 32 #define FPDFTEXT_CHAR_PIECE 4 | |
| 33 | |
| 34 #define TEXT_SPACE_CHAR L' ' | |
| 35 #define TEXT_LINEFEED_CHAR L'\n' | |
| 36 #define TEXT_RETURN_CHAR L'\r' | |
| 37 #define TEXT_EMPTY L"" | |
| 38 #define TEXT_SPACE L" " | |
| 39 #define TEXT_RETURN_LINEFEED L"\r\n" | |
| 40 #define TEXT_LINEFEED L"\n" | |
| 41 #define TEXT_CHARRATIO_GAPDELTA 0.070 | |
| 42 | |
| 43 enum class FPDFText_MarkedContent { Pass = 0, Done, Delay }; | |
| 44 | |
| 45 enum class FPDFText_Direction { Left = -1, Right = 1 }; | |
| 46 | |
| 47 struct FPDF_CHAR_INFO { | |
| 48 FX_WCHAR m_Unicode; | |
| 49 FX_WCHAR m_Charcode; | |
| 50 int32_t m_Flag; | |
| 51 FX_FLOAT m_FontSize; | |
| 52 FX_FLOAT m_OriginX; | |
| 53 FX_FLOAT m_OriginY; | |
| 54 CFX_FloatRect m_CharBox; | |
| 55 CPDF_TextObject* m_pTextObj; | |
| 56 CFX_Matrix m_Matrix; | |
| 57 }; | |
| 58 | |
| 59 struct FPDF_SEGMENT { | |
| 60 int m_Start; | |
| 61 int m_nCount; | |
| 62 }; | |
| 63 | |
| 64 struct PAGECHAR_INFO { | |
| 65 int m_CharCode; | |
| 66 FX_WCHAR m_Unicode; | |
| 67 FX_FLOAT m_OriginX; | |
| 68 FX_FLOAT m_OriginY; | |
| 69 int32_t m_Flag; | |
| 70 CFX_FloatRect m_CharBox; | |
| 71 CPDF_TextObject* m_pTextObj; | |
| 72 CFX_Matrix m_Matrix; | |
| 73 int m_Index; | |
| 74 }; | |
| 75 | |
| 76 struct PDFTEXT_Obj { | |
| 77 CPDF_TextObject* m_pTextObj; | |
| 78 CFX_Matrix m_formMatrix; | |
| 79 }; | |
| 80 | |
| 81 class CPDF_TextPage { | |
| 82 public: | |
| 83 CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags); | |
| 84 ~CPDF_TextPage(); | |
| 85 | |
| 86 // IPDF_TextPage: | |
| 87 void ParseTextPage(); | |
| 88 bool IsParsed() const { return m_bIsParsed; } | |
| 89 int CharIndexFromTextIndex(int TextIndex) const; | |
| 90 int TextIndexFromCharIndex(int CharIndex) const; | |
| 91 int CountChars() const; | |
| 92 void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; | |
| 93 std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; | |
| 94 int GetIndexAtPos(CFX_FloatPoint point, | |
| 95 FX_FLOAT xTolerance, | |
| 96 FX_FLOAT yTolerance) const; | |
| 97 int GetIndexAtPos(FX_FLOAT x, | |
| 98 FX_FLOAT y, | |
| 99 FX_FLOAT xTolerance, | |
| 100 FX_FLOAT yTolerance) const; | |
| 101 CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; | |
| 102 CFX_WideString GetPageText(int start = 0, int nCount = -1) const; | |
| 103 int CountRects(int start, int nCount); | |
| 104 void GetRect(int rectIndex, | |
| 105 FX_FLOAT& left, | |
| 106 FX_FLOAT& top, | |
| 107 FX_FLOAT& right, | |
| 108 FX_FLOAT& bottom) const; | |
| 109 | |
| 110 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | |
| 111 const CFX_FloatRect& rect2); | |
| 112 | |
| 113 private: | |
| 114 enum class TextOrientation { | |
| 115 Unknown, | |
| 116 Horizontal, | |
| 117 Vertical, | |
| 118 }; | |
| 119 | |
| 120 enum class GenerateCharacter { | |
| 121 None, | |
| 122 Space, | |
| 123 LineBreak, | |
| 124 Hyphen, | |
| 125 }; | |
| 126 | |
| 127 FX_BOOL IsHyphen(FX_WCHAR curChar); | |
| 128 bool IsControlChar(const PAGECHAR_INFO& charInfo); | |
| 129 void ProcessObject(); | |
| 130 void ProcessFormObject(CPDF_FormObject* pFormObj, | |
| 131 const CFX_Matrix& formMatrix); | |
| 132 void ProcessTextObject(PDFTEXT_Obj pObj); | |
| 133 void ProcessTextObject(CPDF_TextObject* pTextObj, | |
| 134 const CFX_Matrix& formMatrix, | |
| 135 const CPDF_PageObjectList* pObjList, | |
| 136 CPDF_PageObjectList::const_iterator ObjPos); | |
| 137 GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj, | |
| 138 const CFX_Matrix& formMatrix); | |
| 139 FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); | |
| 140 FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | |
| 141 const CPDF_PageObjectList* pObjList, | |
| 142 CPDF_PageObjectList::const_iterator ObjPos); | |
| 143 FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, | |
| 144 CPDF_TextObject* pTextObj2); | |
| 145 int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const; | |
| 146 void CloseTempLine(); | |
| 147 FPDFText_MarkedContent PreMarkedContent(PDFTEXT_Obj pObj); | |
| 148 void ProcessMarkedContent(PDFTEXT_Obj pObj); | |
| 149 void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; | |
| 150 void FindPreviousTextObject(); | |
| 151 void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
| 152 void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); | |
| 153 TextOrientation GetTextObjectWritingMode( | |
| 154 const CPDF_TextObject* pTextObj) const; | |
| 155 TextOrientation FindTextlineFlowOrientation() const; | |
| 156 void AppendGeneratedCharacter(FX_WCHAR unicode, const CFX_Matrix& formMatrix); | |
| 157 | |
| 158 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | |
| 159 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | |
| 160 const CPDF_Font* pFont, | |
| 161 int nItems) const; | |
| 162 | |
| 163 const CPDF_Page* const m_pPage; | |
| 164 std::vector<uint16_t> m_CharIndex; | |
| 165 std::deque<PAGECHAR_INFO> m_CharList; | |
| 166 std::deque<PAGECHAR_INFO> m_TempCharList; | |
| 167 CFX_WideTextBuf m_TextBuf; | |
| 168 CFX_WideTextBuf m_TempTextBuf; | |
| 169 const FPDFText_Direction m_parserflag; | |
| 170 CPDF_TextObject* m_pPreTextObj; | |
| 171 CFX_Matrix m_perMatrix; | |
| 172 bool m_bIsParsed; | |
| 173 CFX_Matrix m_DisplayMatrix; | |
| 174 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; | |
| 175 std::vector<CFX_FloatRect> m_SelRects; | |
| 176 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; | |
| 177 TextOrientation m_TextlineDir; | |
| 178 CFX_FloatRect m_CurlineRect; | |
| 179 }; | |
| 180 | |
| 181 #endif // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_ | |
| OLD | NEW |