OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
9 | 9 |
| 10 #include <deque> |
| 11 |
10 #include "core/include/fpdftext/fpdf_text.h" | 12 #include "core/include/fpdftext/fpdf_text.h" |
11 #include "core/include/fxcrt/fx_basic.h" | 13 #include "core/include/fxcrt/fx_basic.h" |
12 | 14 |
13 class CFX_BidiChar; | 15 class CFX_BidiChar; |
14 class CPDF_DocProgressiveSearch; | 16 class CPDF_DocProgressiveSearch; |
15 class CPDF_FormObject; | 17 class CPDF_FormObject; |
16 class CPDF_LinkExtract; | 18 class CPDF_LinkExtract; |
17 class CPDF_TextPageFind; | 19 class CPDF_TextPageFind; |
18 | 20 |
19 #define FPDFTEXT_CHAR_ERROR -1 | 21 #define FPDFTEXT_CHAR_ERROR -1 |
20 #define FPDFTEXT_CHAR_NORMAL 0 | 22 #define FPDFTEXT_CHAR_NORMAL 0 |
21 #define FPDFTEXT_CHAR_GENERATED 1 | 23 #define FPDFTEXT_CHAR_GENERATED 1 |
22 #define FPDFTEXT_CHAR_UNUNICODE 2 | 24 #define FPDFTEXT_CHAR_UNUNICODE 2 |
23 #define FPDFTEXT_CHAR_HYPHEN 3 | 25 #define FPDFTEXT_CHAR_HYPHEN 3 |
24 #define FPDFTEXT_CHAR_PIECE 4 | 26 #define FPDFTEXT_CHAR_PIECE 4 |
25 #define FPDFTEXT_MC_PASS 0 | 27 #define FPDFTEXT_MC_PASS 0 |
26 #define FPDFTEXT_MC_DONE 1 | 28 #define FPDFTEXT_MC_DONE 1 |
27 #define FPDFTEXT_MC_DELAY 2 | 29 #define FPDFTEXT_MC_DELAY 2 |
28 | 30 |
29 typedef struct _PAGECHAR_INFO { | 31 struct PAGECHAR_INFO { |
30 int m_CharCode; | 32 int m_CharCode; |
31 FX_WCHAR m_Unicode; | 33 FX_WCHAR m_Unicode; |
32 FX_FLOAT m_OriginX; | 34 FX_FLOAT m_OriginX; |
33 FX_FLOAT m_OriginY; | 35 FX_FLOAT m_OriginY; |
34 int32_t m_Flag; | 36 int32_t m_Flag; |
35 CFX_FloatRect m_CharBox; | 37 CFX_FloatRect m_CharBox; |
36 CPDF_TextObject* m_pTextObj; | 38 CPDF_TextObject* m_pTextObj; |
37 CFX_Matrix m_Matrix; | 39 CFX_Matrix m_Matrix; |
38 int m_Index; | 40 int m_Index; |
39 } PAGECHAR_INFO; | 41 }; |
40 typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; | 42 |
41 typedef struct { | 43 struct FPDF_SEGMENT { |
42 int m_Start; | 44 int m_Start; |
43 int m_nCount; | 45 int m_nCount; |
44 } FPDF_SEGMENT; | 46 }; |
45 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; | 47 |
46 typedef struct { | 48 struct PDFTEXT_Obj { |
47 CPDF_TextObject* m_pTextObj; | 49 CPDF_TextObject* m_pTextObj; |
48 CFX_Matrix m_formMatrix; | 50 CFX_Matrix m_formMatrix; |
49 } PDFTEXT_Obj; | 51 }; |
50 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; | |
51 | 52 |
52 class CPDF_TextPage : public IPDF_TextPage { | 53 class CPDF_TextPage : public IPDF_TextPage { |
53 public: | 54 public: |
54 CPDF_TextPage(const CPDF_Page* pPage, int flags); | 55 CPDF_TextPage(const CPDF_Page* pPage, int flags); |
55 ~CPDF_TextPage() override {} | 56 ~CPDF_TextPage() override {} |
56 | 57 |
57 // IPDF_TextPage | 58 // IPDF_TextPage |
58 FX_BOOL ParseTextPage() override; | 59 FX_BOOL ParseTextPage() override; |
59 void NormalizeObjects(FX_BOOL bNormalize) override; | 60 void NormalizeObjects(FX_BOOL bNormalize) override; |
60 bool IsParsed() const override { return m_bIsParsed; } | 61 bool IsParsed() const override { return m_bIsParsed; } |
(...skipping 24 matching lines...) Expand all Loading... |
85 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; | 86 FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override; |
86 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; | 87 FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override; |
87 int CountBoundedSegments(FX_FLOAT left, | 88 int CountBoundedSegments(FX_FLOAT left, |
88 FX_FLOAT top, | 89 FX_FLOAT top, |
89 FX_FLOAT right, | 90 FX_FLOAT right, |
90 FX_FLOAT bottom, | 91 FX_FLOAT bottom, |
91 FX_BOOL bContains = FALSE) override; | 92 FX_BOOL bContains = FALSE) override; |
92 void GetBoundedSegment(int index, int& start, int& count) const override; | 93 void GetBoundedSegment(int index, int& start, int& count) const override; |
93 int GetWordBreak(int index, int direction) const override; | 94 int GetWordBreak(int index, int direction) const override; |
94 | 95 |
95 const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; } | |
96 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, | 96 static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, |
97 const CFX_FloatRect& rect2); | 97 const CFX_FloatRect& rect2); |
98 static FX_BOOL IsLetter(FX_WCHAR unicode); | 98 static FX_BOOL IsLetter(FX_WCHAR unicode); |
99 | 99 |
100 private: | 100 private: |
101 FX_BOOL IsHyphen(FX_WCHAR curChar); | 101 FX_BOOL IsHyphen(FX_WCHAR curChar); |
102 bool IsControlChar(const PAGECHAR_INFO& charInfo); | 102 bool IsControlChar(const PAGECHAR_INFO& charInfo); |
103 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); | 103 FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); |
104 void ProcessObject(); | 104 void ProcessObject(); |
105 void ProcessFormObject(CPDF_FormObject* pFormObj, | 105 void ProcessFormObject(CPDF_FormObject* pFormObj, |
(...skipping 20 matching lines...) Expand all Loading... |
126 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); | 126 int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); |
127 int32_t FindTextlineFlowDirection(); | 127 int32_t FindTextlineFlowDirection(); |
128 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); | 128 void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); |
129 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, | 129 FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, |
130 const CPDF_Font* pFont, | 130 const CPDF_Font* pFont, |
131 int nItems) const; | 131 int nItems) const; |
132 | 132 |
133 CPDFText_ParseOptions m_ParseOptions; | 133 CPDFText_ParseOptions m_ParseOptions; |
134 CFX_WordArray m_CharIndex; | 134 CFX_WordArray m_CharIndex; |
135 const CPDF_PageObjectList* const m_pPage; | 135 const CPDF_PageObjectList* const m_pPage; |
136 PAGECHAR_InfoArray m_charList; | 136 std::deque<PAGECHAR_INFO> m_CharList; |
| 137 std::deque<PAGECHAR_INFO> m_TempCharList; |
137 CFX_WideTextBuf m_TextBuf; | 138 CFX_WideTextBuf m_TextBuf; |
138 PAGECHAR_InfoArray m_TempCharList; | |
139 CFX_WideTextBuf m_TempTextBuf; | 139 CFX_WideTextBuf m_TempTextBuf; |
140 const int m_parserflag; | 140 const int m_parserflag; |
141 CPDF_TextObject* m_pPreTextObj; | 141 CPDF_TextObject* m_pPreTextObj; |
142 CFX_Matrix m_perMatrix; | 142 CFX_Matrix m_perMatrix; |
143 bool m_bIsParsed; | 143 bool m_bIsParsed; |
144 CFX_Matrix m_DisplayMatrix; | 144 CFX_Matrix m_DisplayMatrix; |
145 SEGMENT_Array m_Segment; | 145 CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; |
146 CFX_RectArray m_SelRects; | 146 CFX_RectArray m_SelRects; |
147 LINEOBJ m_LineObj; | 147 CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; |
148 int32_t m_TextlineDir; | 148 int32_t m_TextlineDir; |
149 CFX_FloatRect m_CurlineRect; | 149 CFX_FloatRect m_CurlineRect; |
150 }; | 150 }; |
151 | 151 |
152 class CPDF_TextPageFind : public IPDF_TextPageFind { | 152 class CPDF_TextPageFind : public IPDF_TextPageFind { |
153 public: | 153 public: |
154 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); | 154 explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); |
155 ~CPDF_TextPageFind() override {} | 155 ~CPDF_TextPageFind() override {} |
156 | 156 |
157 // IPDF_TextPageFind | 157 // IPDF_TextPageFind |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
238 | 238 |
239 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); | 239 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |
240 void NormalizeString(CFX_WideString& str); | 240 void NormalizeString(CFX_WideString& str); |
241 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); | 241 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); |
242 void GetTextStream_Unicode(CFX_WideTextBuf& buffer, | 242 void GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
243 CPDF_PageObjectList* pPage, | 243 CPDF_PageObjectList* pPage, |
244 FX_BOOL bUseLF, | 244 FX_BOOL bUseLF, |
245 CFX_PtrArray* pObjArray); | 245 CFX_PtrArray* pObjArray); |
246 | 246 |
247 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ | 247 #endif // CORE_SRC_FPDFTEXT_TEXT_INT_H_ |
OLD | NEW |