| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| 9 | 9 |
| 10 #include <deque> |
| 11 |
| 10 #include "core/include/fpdftext/fpdf_text.h" | 12 #include "core/include/fpdftext/fpdf_text.h" |
| 11 #include "core/include/fxcrt/fx_basic.h" | 13 #include "core/include/fxcrt/fx_basic.h" |
| 12 | 14 |
| 13 class CFX_BidiChar; | 15 class CFX_BidiChar; |
| 14 class CPDF_DocProgressiveSearch; | 16 class CPDF_DocProgressiveSearch; |
| 15 class CPDF_FormObject; | 17 class CPDF_FormObject; |
| 16 class CPDF_LinkExtract; | 18 class CPDF_LinkExtract; |
| 17 class CPDF_TextPageFind; | 19 class CPDF_TextPageFind; |
| 18 | 20 |
| 19 #define FPDFTEXT_CHAR_ERROR -1 | 21 #define FPDFTEXT_CHAR_ERROR -1 |
| 20 #define FPDFTEXT_CHAR_NORMAL 0 | 22 #define FPDFTEXT_CHAR_NORMAL 0 |
| 21 #define FPDFTEXT_CHAR_GENERATED 1 | 23 #define FPDFTEXT_CHAR_GENERATED 1 |
| 22 #define FPDFTEXT_CHAR_UNUNICODE 2 | 24 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 23 #define FPDFTEXT_CHAR_HYPHEN 3 | 25 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 24 #define FPDFTEXT_CHAR_PIECE 4 | 26 #define FPDFTEXT_CHAR_PIECE 4 |
| 25 #define FPDFTEXT_MC_PASS 0 | 27 #define FPDFTEXT_MC_PASS 0 |
| 26 #define FPDFTEXT_MC_DONE 1 | 28 #define FPDFTEXT_MC_DONE 1 |
| 27 #define FPDFTEXT_MC_DELAY 2 | 29 #define FPDFTEXT_MC_DELAY 2 |
| 28 | 30 |
| 29 typedef struct _PAGECHAR_INFO { | 31 struct PAGECHAR_INFO { |
| 30 int m_CharCode; | 32 int m_CharCode; |
| 31 FX_WCHAR m_Unicode; | 33 FX_WCHAR m_Unicode; |
| 32 FX_FLOAT m_OriginX; | 34 FX_FLOAT m_OriginX; |
| 33 FX_FLOAT m_OriginY; | 35 FX_FLOAT m_OriginY; |
| 34 int32_t m_Flag; | 36 int32_t m_Flag; |
| 35 CFX_FloatRect m_CharBox; | 37 CFX_FloatRect m_CharBox; |
| 36 CPDF_TextObject* m_pTextObj; | 38 CPDF_TextObject* m_pTextObj; |
| 37 CFX_Matrix m_Matrix; | 39 CFX_Matrix m_Matrix; |
| 38 int m_Index; | 40 int m_Index; |
| 39 } PAGECHAR_INFO; | 41 }; |
| 40 typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; | 42 |
| 41 typedef struct { | 43 struct FPDF_SEGMENT { |
| 42 int m_Start; | 44 int m_Start; |
| 43 int m_nCount; | 45 int m_nCount; |
| 44 } FPDF_SEGMENT; | 46 }; |
| 45 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 47 |
| 46 typedef struct { | 48 struct PDFTEXT_Obj { |
| 47 CPDF_TextObject* m_pTextObj; | 49 CPDF_TextObject* m_pTextObj; |
| 48 CFX_Matrix m_formMatrix; | 50 CFX_Matrix m_formMatrix; |
| 49 } PDFTEXT_Obj; | 51 }; |
| 50 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | |
| 51 | 52 |
| 52 class CPDF_TextPage : public IPDF_TextPage { | 53 class CPDF_TextPage : public IPDF_TextPage { |
| 53 public: | 54 public: |
| 54 CPDF_TextPage(const CPDF_Page* pPage, int flags); | 55 CPDF_TextPage(const CPDF_Page* pPage, int flags); |
| 55 ~CPDF_TextPage() override {} | 56 ~CPDF_TextPage() override {} |
| 56 | 57 |
| 57 // IPDF_TextPage | 58 // IPDF_TextPage |
| 58 FX_BOOL ParseTextPage() override; | 59 FX_BOOL ParseTextPage() override; |
| 59 void NormalizeObjects(FX_BOOL bNormalize) override; | 60 void NormalizeObjects(FX_BOOL bNormalize) override; |
| 60 bool IsParsed() const override { return m_bIsParsed; } | 61 bool IsParsed() const override { return m_bIsParsed; } |
| (...skipping 24 matching lines...) Expand all Loading... |
| 85 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; | 86 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
| 86 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; | 87 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
| 87 int CountBoundedSegments(FX_FLOAT left, | 88 int CountBoundedSegments(FX_FLOAT left, |
| 88 FX_FLOAT top, | 89 FX_FLOAT top, |
| 89 FX_FLOAT right, | 90 FX_FLOAT right, |
| 90 FX_FLOAT bottom, | 91 FX_FLOAT bottom, |
| 91 FX_BOOL bContains = FALSE) override; | 92 FX_BOOL bContains = FALSE) override; |
| 92 void GetBoundedSegment(int index, int& start, int& count) const override; | 93 void GetBoundedSegment(int index, int& start, int& count) const override; |
| 93 int GetWordBreak(int index, int direction) const override; | 94 int GetWordBreak(int index, int direction) const override; |
| 94 | 95 |
| 95 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | |
| 96 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 96 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
| 97 const CFX_FloatRect& rect2); | 97 const CFX_FloatRect& rect2); |
| 98 static FX_BOOL IsLetter(FX_WCHAR unicode); | 98 static FX_BOOL IsLetter(FX_WCHAR unicode); |
| 99 | 99 |
| 100 private: | 100 private: |
| 101 FX_BOOL IsHyphen(FX_WCHAR curChar); | 101 FX_BOOL IsHyphen(FX_WCHAR curChar); |
| 102 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 102 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
| 103 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 103 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
| 104 void ProcessObject(); | 104 void ProcessObject(); |
| 105 void ProcessFormObject(CPDF_FormObject* pFormObj, | 105 void ProcessFormObject(CPDF_FormObject* pFormObj, |
| (...skipping 20 matching lines...) Expand all Loading... |
| 126 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 126 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
| 127 int32_t FindTextlineFlowDirection(); | 127 int32_t FindTextlineFlowDirection(); |
| 128 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | 128 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); |
| 129 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 129 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 130 const CPDF_Font* pFont, | 130 const CPDF_Font* pFont, |
| 131 int nItems) const; | 131 int nItems) const; |
| 132 | 132 |
| 133 CPDFText_ParseOptions m_ParseOptions; | 133 CPDFText_ParseOptions m_ParseOptions; |
| 134 CFX_WordArray m_CharIndex; | 134 CFX_WordArray m_CharIndex; |
| 135 const CPDF_PageObjectList* const m_pPage; | 135 const CPDF_PageObjectList* const m_pPage; |
| 136 PAGECHAR_InfoArray m_charList; | 136 std::deque<PAGECHAR_INFO> m_CharList; |
| 137 std::deque<PAGECHAR_INFO> m_TempCharList; |
| 137 CFX_WideTextBuf m_TextBuf; | 138 CFX_WideTextBuf m_TextBuf; |
| 138 PAGECHAR_InfoArray m_TempCharList; | |
| 139 CFX_WideTextBuf m_TempTextBuf; | 139 CFX_WideTextBuf m_TempTextBuf; |
| 140 const int m_parserflag; | 140 const int m_parserflag; |
| 141 CPDF_TextObject* m_pPreTextObj; | 141 CPDF_TextObject* m_pPreTextObj; |
| 142 CFX_Matrix m_perMatrix; | 142 CFX_Matrix m_perMatrix; |
| 143 bool m_bIsParsed; | 143 bool m_bIsParsed; |
| 144 CFX_Matrix m_DisplayMatrix; | 144 CFX_Matrix m_DisplayMatrix; |
| 145 SEGMENT_Array m_Segment; | 145 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
| 146 CFX_RectArray m_SelRects; | 146 CFX_RectArray m_SelRects; |
| 147 LINEOBJ m_LineObj; | 147 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
| 148 int32_t m_TextlineDir; | 148 int32_t m_TextlineDir; |
| 149 CFX_FloatRect m_CurlineRect; | 149 CFX_FloatRect m_CurlineRect; |
| 150 }; | 150 }; |
| 151 | 151 |
| 152 class CPDF_TextPageFind : public IPDF_TextPageFind { | 152 class CPDF_TextPageFind : public IPDF_TextPageFind { |
| 153 public: | 153 public: |
| 154 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 154 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
| 155 ~CPDF_TextPageFind() override {} | 155 ~CPDF_TextPageFind() override {} |
| 156 | 156 |
| 157 // IPDF_TextPageFind | 157 // IPDF_TextPageFind |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 238 | 238 |
| 239 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 239 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
| 240 void NormalizeString(CFX_WideString& str); | 240 void NormalizeString(CFX_WideString& str); |
| 241 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 241 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
| 242 void GetTextStream_Unicode(CFX_WideTextBuf& buffer, | 242 void GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 243 CPDF_PageObjectList* pPage, | 243 CPDF_PageObjectList* pPage, |
| 244 FX_BOOL bUseLF, | 244 FX_BOOL bUseLF, |
| 245 CFX_PtrArray* pObjArray); | 245 CFX_PtrArray* pObjArray); |
| 246 | 246 |
| 247 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 247 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
| OLD | NEW |