OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef _FPDF_TEXT_H_ | 7 #ifndef _FPDF_TEXT_H_ |
8 #define _FPDF_TEXT_H_ | 8 #define _FPDF_TEXT_H_ |
9 #ifndef _FPDF_PARSER_ | 9 #ifndef _FPDF_PARSER_ |
10 #include "../fpdfapi/fpdf_parser.h" | 10 #include "../fpdfapi/fpdf_parser.h" |
11 #endif | 11 #endif |
12 #ifndef _FPDF_PAGEOBJ_H_ | 12 #ifndef _FPDF_PAGEOBJ_H_ |
13 #include "../fpdfapi/fpdf_pageobj.h" | 13 #include "../fpdfapi/fpdf_pageobj.h" |
14 #endif | 14 #endif |
15 #ifndef _FPDF_PAGE_ | 15 #ifndef _FPDF_PAGE_ |
16 #include "../fpdfapi/fpdf_page.h" | 16 #include "../fpdfapi/fpdf_page.h" |
17 #endif | 17 #endif |
18 class CPDF_PageObjects; | 18 class CPDF_PageObjects; |
19 #define PDF2TXT_AUTO_ROTATE» » 1 | 19 #define PDF2TXT_AUTO_ROTATE 1 |
20 #define PDF2TXT_AUTO_WIDTH» » 2 | 20 #define PDF2TXT_AUTO_WIDTH 2 |
21 #define PDF2TXT_KEEP_COLUMN» » 4 | 21 #define PDF2TXT_KEEP_COLUMN 4 |
22 #define PDF2TXT_USE_OCR»» » 8 | 22 #define PDF2TXT_USE_OCR 8 |
23 #define PDF2TXT_INCLUDE_INVISIBLE» 16 | 23 #define PDF2TXT_INCLUDE_INVISIBLE 16 |
24 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dicti
onary* pPage, | 24 void PDF_GetPageText(CFX_ByteStringArray& lines, |
25 int iMinWidth, FX_DWORD flags); | 25 CPDF_Document* pDoc, |
26 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CP
DF_Dictionary* pPage, | 26 CPDF_Dictionary* pPage, |
27 int iMinWidth, FX_DWORD flags); | 27 int iMinWidth, |
28 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPD
F_Dictionary* pPage, | 28 FX_DWORD flags); |
| 29 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
| 30 CPDF_Document* pDoc, |
| 31 CPDF_Dictionary* pPage, |
| 32 int iMinWidth, |
| 33 FX_DWORD flags); |
| 34 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 35 CPDF_Document* pDoc, |
| 36 CPDF_Dictionary* pPage, |
29 FX_DWORD flags); | 37 FX_DWORD flags); |
30 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage); | 38 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, |
| 39 CPDF_Dictionary* pPage); |
31 class IPDF_TextPage; | 40 class IPDF_TextPage; |
32 class IPDF_LinkExtract; | 41 class IPDF_LinkExtract; |
33 class IPDF_TextPageFind; | 42 class IPDF_TextPageFind; |
34 #define CHAR_ERROR» » » -1 | 43 #define CHAR_ERROR -1 |
35 #define CHAR_NORMAL» » » 0 | 44 #define CHAR_NORMAL 0 |
36 #define CHAR_GENERATED» » 1 | 45 #define CHAR_GENERATED 1 |
37 #define CHAR_UNUNICODE» » 2 | 46 #define CHAR_UNUNICODE 2 |
38 typedef struct { | 47 typedef struct { |
39 FX_WCHAR» » » m_Unicode; | 48 FX_WCHAR m_Unicode; |
40 FX_WCHAR» » » m_Charcode; | 49 FX_WCHAR m_Charcode; |
41 FX_INT32» » » m_Flag; | 50 FX_INT32 m_Flag; |
42 FX_FLOAT» » » m_FontSize; | 51 FX_FLOAT m_FontSize; |
43 FX_FLOAT» » » m_OriginX; | 52 FX_FLOAT m_OriginX; |
44 FX_FLOAT» » » m_OriginY; | 53 FX_FLOAT m_OriginY; |
45 CFX_FloatRect» » m_CharBox; | 54 CFX_FloatRect m_CharBox; |
46 CPDF_TextObject*» m_pTextObj; | 55 CPDF_TextObject* m_pTextObj; |
47 CFX_AffineMatrix» m_Matrix; | 56 CFX_AffineMatrix m_Matrix; |
48 } FPDF_CHAR_INFO; | 57 } FPDF_CHAR_INFO; |
49 typedef»CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; | 58 typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; |
50 #define FPDFTEXT_LRTB» 0 | 59 #define FPDFTEXT_LRTB 0 |
51 #define FPDFTEXT_RLTB» 1 | 60 #define FPDFTEXT_RLTB 1 |
52 #define FPDFTEXT_TBRL» 2 | 61 #define FPDFTEXT_TBRL 2 |
53 #define FPDFTEXT_LEFT» » » -1 | 62 #define FPDFTEXT_LEFT -1 |
54 #define FPDFTEXT_RIGHT» » » 1 | 63 #define FPDFTEXT_RIGHT 1 |
55 #define FPDFTEXT_UP» » » » -2 | 64 #define FPDFTEXT_UP -2 |
56 #define FPDFTEXT_DOWN» » » 2 | 65 #define FPDFTEXT_DOWN 2 |
57 class IPDF_ReflowedPage; | 66 class IPDF_ReflowedPage; |
58 #define FPDFTEXT_WRITINGMODE_UNKNOW» 0 | 67 #define FPDFTEXT_WRITINGMODE_UNKNOW 0 |
59 #define FPDFTEXT_WRITINGMODE_LRTB» 1 | 68 #define FPDFTEXT_WRITINGMODE_LRTB 1 |
60 #define FPDFTEXT_WRITINGMODE_RLTB» 2 | 69 #define FPDFTEXT_WRITINGMODE_RLTB 2 |
61 #define FPDFTEXT_WRITINGMODE_TBRL» 3 | 70 #define FPDFTEXT_WRITINGMODE_TBRL 3 |
62 class CPDFText_ParseOptions : public CFX_Object | 71 class CPDFText_ParseOptions : public CFX_Object { |
63 { | 72 public: |
64 public: | 73 CPDFText_ParseOptions(); |
| 74 FX_BOOL m_bGetCharCodeOnly; |
| 75 FX_BOOL m_bNormalizeObjs; |
| 76 FX_BOOL m_bOutputHyphen; |
| 77 }; |
| 78 class IPDF_TextPage : public CFX_Object { |
| 79 public: |
| 80 virtual ~IPDF_TextPage() {} |
| 81 static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, |
| 82 CPDFText_ParseOptions ParserOptions); |
| 83 static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0); |
| 84 static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs, |
| 85 int flags = 0); |
| 86 static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage); |
65 | 87 |
66 CPDFText_ParseOptions(); | 88 virtual void NormalizeObjects(FX_BOOL bNormalize) = 0; |
67 FX_BOOL» » » m_bGetCharCodeOnly; | 89 |
68 FX_BOOL» » » m_bNormalizeObjs; | 90 virtual FX_BOOL ParseTextPage() = 0; |
69 FX_BOOL» » » m_bOutputHyphen; | 91 |
| 92 virtual FX_BOOL IsParsered() const = 0; |
| 93 |
| 94 public: |
| 95 virtual int CharIndexFromTextIndex(int TextIndex) const = 0; |
| 96 |
| 97 virtual int TextIndexFromCharIndex(int CharIndex) const = 0; |
| 98 |
| 99 virtual int CountChars() const = 0; |
| 100 |
| 101 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0; |
| 102 |
| 103 virtual void GetRectArray(int start, |
| 104 int nCount, |
| 105 CFX_RectArray& rectArray) const = 0; |
| 106 |
| 107 virtual int GetIndexAtPos(CPDF_Point point, |
| 108 FX_FLOAT xTorelance, |
| 109 FX_FLOAT yTorelance) const = 0; |
| 110 |
| 111 virtual int GetIndexAtPos(FX_FLOAT x, |
| 112 FX_FLOAT y, |
| 113 FX_FLOAT xTorelance, |
| 114 FX_FLOAT yTorelance) const = 0; |
| 115 |
| 116 virtual int GetOrderByDirection(int index, int direction) const = 0; |
| 117 |
| 118 virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const = 0; |
| 119 |
| 120 virtual void GetRectsArrayByRect(CFX_FloatRect rect, |
| 121 CFX_RectArray& resRectArray) const = 0; |
| 122 |
| 123 virtual int CountRects(int start, int nCount) = 0; |
| 124 |
| 125 virtual void GetRect(int rectIndex, |
| 126 FX_FLOAT& left, |
| 127 FX_FLOAT& top, |
| 128 FX_FLOAT& right, |
| 129 FX_FLOAT& bottom) const = 0; |
| 130 |
| 131 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0; |
| 132 |
| 133 virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0; |
| 134 |
| 135 virtual int CountBoundedSegments(FX_FLOAT left, |
| 136 FX_FLOAT top, |
| 137 FX_FLOAT right, |
| 138 FX_FLOAT bottom, |
| 139 FX_BOOL bContains = FALSE) = 0; |
| 140 |
| 141 virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; |
| 142 |
| 143 virtual int GetWordBreak(int index, int direction) const = 0; |
| 144 |
| 145 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0; |
70 }; | 146 }; |
71 class IPDF_TextPage : public CFX_Object | 147 #define FPDFTEXT_MATCHCASE 0x00000001 |
72 { | 148 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
73 public: | 149 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| 150 class IPDF_TextPageFind : public CFX_Object { |
| 151 public: |
| 152 virtual ~IPDF_TextPageFind() {} |
74 | 153 |
75 virtual ~IPDF_TextPage() {} | 154 static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage); |
76 static IPDF_TextPage*» CreateTextPage(const CPDF_Page* pPage, CPDFText_
ParseOptions ParserOptions); | |
77 static IPDF_TextPage*» CreateTextPage(const CPDF_Page* pPage, int flags
= 0); | |
78 static IPDF_TextPage*» CreateTextPage(const CPDF_PageObjects* pObjs, in
t flags = 0); | |
79 static IPDF_TextPage*» CreateReflowTextPage(IPDF_ReflowedPage* pRefPage
); | |
80 | 155 |
81 virtual void» » » NormalizeObjects(FX_BOOL bNormalize) = 0
; | 156 public: |
| 157 virtual FX_BOOL FindFirst(CFX_WideString findwhat, |
| 158 int flags, |
| 159 int startPos = 0) = 0; |
82 | 160 |
83 virtual FX_BOOL» » » ParseTextPage() = 0; | 161 virtual FX_BOOL FindNext() = 0; |
84 | 162 |
| 163 virtual FX_BOOL FindPrev() = 0; |
85 | 164 |
86 virtual FX_BOOL» » » IsParsered() const = 0; | 165 virtual void GetRectArray(CFX_RectArray& rects) const = 0; |
87 public: | |
88 | 166 |
89 virtual int CharIndexFromTextIndex(int TextIndex) const = 0; | 167 virtual int GetCurOrder() const = 0; |
90 | 168 |
91 virtual int TextIndexFromCharIndex(int CharIndex) const = 0; | 169 virtual int GetMatchedCount() const = 0; |
| 170 }; |
| 171 class IPDF_LinkExtract : public CFX_Object { |
| 172 public: |
| 173 virtual ~IPDF_LinkExtract() {} |
92 | 174 |
| 175 static IPDF_LinkExtract* CreateLinkExtract(); |
93 | 176 |
94 virtual int»» » » CountChars() const = 0; | 177 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0; |
95 | 178 |
96 virtual» void» » » GetCharInfo(int index, FPDF_CHAR_INFO &
info) const = 0; | 179 public: |
| 180 virtual int CountLinks() const = 0; |
97 | 181 |
98 virtual void» » » GetRectArray(int start, int nCount, CFX_
RectArray& rectArray) const = 0; | 182 virtual CFX_WideString GetURL(int index) const = 0; |
99 | 183 |
| 184 virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; |
100 | 185 |
101 | 186 virtual void GetRects(int index, CFX_RectArray& rects) const = 0; |
102 virtual int»» » » GetIndexAtPos(CPDF_Point point, FX_FLOAT
xTorelance, FX_FLOAT yTorelance) const = 0; | |
103 | |
104 virtual int»» » » GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX
_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0; | |
105 | |
106 virtual» int» » » » GetOrderByDirection(int index, i
nt direction) const = 0; | |
107 | |
108 virtual CFX_WideString» GetTextByRect(CFX_FloatRect rect) const = 0; | |
109 | |
110 virtual void» » » GetRectsArrayByRect(CFX_FloatRect rect,
CFX_RectArray& resRectArray) const = 0; | |
111 | |
112 | |
113 virtual int»» » » CountRects(int start, int nCount) = 0; | |
114 | |
115 virtual» void» » » GetRect(int rectIndex, FX_FLOAT& left, F
X_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0; | |
116 | |
117 virtual FX_BOOL» » » GetBaselineRotate(int rectIndex, int& Ro
tate) = 0; | |
118 | |
119 virtual FX_BOOL» » » GetBaselineRotate(CFX_FloatRect rect, in
t& Rotate) = 0; | |
120 | |
121 virtual» int» » » » CountBoundedSegments(FX_FLOAT le
ft, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) =
0; | |
122 | |
123 virtual» void» » » GetBoundedSegment(int index, int& start,
int& count) const = 0; | |
124 | |
125 | |
126 virtual int»» » » GetWordBreak(int index, int direction) c
onst = 0; | |
127 | |
128 virtual CFX_WideString» GetPageText(int start = 0, int nCount = -1 ) con
st = 0; | |
129 }; | |
130 #define FPDFTEXT_MATCHCASE 0x00000001 | |
131 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | |
132 #define FPDFTEXT_CONSECUTIVE» 0x00000004 | |
133 class IPDF_TextPageFind : public CFX_Object | |
134 { | |
135 public: | |
136 | |
137 virtual» ~IPDF_TextPageFind() {} | |
138 | |
139 static» IPDF_TextPageFind*» CreatePageFind(const IPDF_TextPage* pTex
tPage); | |
140 public: | |
141 | |
142 virtual» FX_BOOL»» » » FindFirst(CFX_WideString findwha
t, int flags, int startPos = 0) = 0; | |
143 | |
144 virtual» FX_BOOL»» » » FindNext() = 0; | |
145 | |
146 virtual» FX_BOOL»» » » FindPrev() = 0; | |
147 | |
148 virtual void» » » » GetRectArray(CFX_RectArray& rect
s) const = 0; | |
149 | |
150 virtual int»» » » » GetCurOrder() const = 0; | |
151 | |
152 virtual int»» » » » GetMatchedCount() const = 0; | |
153 }; | |
154 class IPDF_LinkExtract : public CFX_Object | |
155 { | |
156 public: | |
157 | |
158 virtual» ~IPDF_LinkExtract() {} | |
159 | |
160 static» IPDF_LinkExtract*» CreateLinkExtract(); | |
161 | |
162 virtual FX_BOOL» » » » ExtractLinks(const IPDF_TextPage
* pTextPage) = 0; | |
163 public: | |
164 | |
165 virtual int»» » » » CountLinks() const = 0; | |
166 | |
167 virtual CFX_WideString» » GetURL(int index) const = 0; | |
168 | |
169 virtual» void» » » » GetBoundedSegment(int index, int
& start, int& count) const = 0; | |
170 | |
171 virtual void» » » » GetRects(int index, CFX_RectArra
y& rects) const = 0; | |
172 }; | 187 }; |
173 #endif | 188 #endif |
OLD | NEW |