OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #ifndef _FPDFTEXT_H_ | |
8 #define _FPDFTEXT_H_ | |
9 | |
10 #include "fpdfview.h" | |
11 | |
12 // Exported Functions | |
13 #ifdef __cplusplus | |
14 extern "C" { | |
15 #endif | |
16 | |
17 // Function: FPDFText_LoadPage | |
18 // Prepare information about all characters in a page. | |
19 // Parameters: | |
20 // page - Handle to the page. Returned by FPDF_Loa
dPage function (in FPDFVIEW module). | |
21 // Return value: | |
22 // A handle to the text page information structure. | |
23 // NULL if something goes wrong. | |
24 // Comments: | |
25 // Application must call FPDFText_ClosePage to release the
text page information. | |
26 // Notes: | |
27 // The method can not support to load out FPDF_TEXTPAGE for
the document consists of dynamic fields. | |
28 // | |
29 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); | |
30 | |
31 // Function: FPDFText_ClosePage | |
32 // Release all resources allocated for a text page informat
ion structure. | |
33 // Parameters: | |
34 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
35 // Return Value: | |
36 // None. | |
37 // | |
38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); | |
39 | |
40 // Function: FPDFText_CountChars | |
41 // Get number of characters in a page. | |
42 // Parameters: | |
43 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
44 // Return value: | |
45 // Number of characters in the page. Return -1 for error. | |
46 // Generated characters, like additional space characters,
new line characters, are also counted. | |
47 // Comments: | |
48 // Characters in a page form a "stream", inside the stream,
each character has an index. | |
49 // We will use the index parameters in many of FPDFTEXT fun
ctions. The first character in the page | |
50 // has an index value of zero. | |
51 // | |
52 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); | |
53 | |
54 // Function: FPDFText_GetUnicode | |
55 // Get Unicode of a character in a page. | |
56 // Parameters: | |
57 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
58 // index - Zero-based index of the characte
r. | |
59 // Return value: | |
60 // The Unicode of the particular character. | |
61 // If a character is not encoded in Unicode and Foxit engin
e can't convert to Unicode, | |
62 // the return value will be zero. | |
63 // | |
64 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
index); | |
65 | |
66 // Function: FPDFText_GetFontSize | |
67 // Get the font size of a particular character. | |
68 // Parameters: | |
69 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
70 // index - Zero-based index of the characte
r. | |
71 // Return value: | |
72 // The font size of the particular character, measured in p
oints (about 1/72 inch). | |
73 // This is the typographic size of the font (so called "em
size"). | |
74 // | |
75 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
); | |
76 | |
77 // Function: FPDFText_GetCharBox | |
78 // Get bounding box of a particular character. | |
79 // Parameters: | |
80 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
81 // index - Zero-based index of the characte
r. | |
82 // left - Pointer to a double number recei
ving left position of the character box. | |
83 // right - Pointer to a double number recei
ving right position of the character box. | |
84 // bottom - Pointer to a double number recei
ving bottom position of the character box. | |
85 // top - Pointer to a double numb
er receiving top position of the character box. | |
86 // Return Value: | |
87 // None. | |
88 // Comments: | |
89 // All positions are measured in PDF "user space". | |
90 // | |
91 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, d
ouble* left, | |
92
double* right, double* bottom, double* top); | |
93 | |
94 // Function: FPDFText_GetCharIndexAtPos | |
95 // Get the index of a character at or nearby a certain posi
tion on the page. | |
96 // Parameters: | |
97 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
98 // x - X position in PDF "user
space". | |
99 // y - Y position in PDF "user
space". | |
100 // xTolerance - An x-axis tolerance value for ch
aracter hit detection, in point unit. | |
101 // yTolerance - A y-axis tolerance value for cha
racter hit detection, in point unit. | |
102 // Return Value: | |
103 // The zero-based index of the character at, or nearby the
point (x,y). | |
104 // If there is no character at or nearby the point, return
value will be -1. | |
105 // If an error occurs, -3 will be returned. | |
106 // | |
107 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, | |
108
double x, double y, double xTorelance, double yTolerance); | |
109 | |
110 // Function: FPDFText_GetText | |
111 // Extract unicode text string from the page. | |
112 // Parameters: | |
113 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
114 // start_index - Index for the start characters. | |
115 // count - Number of characters to be extra
cted. | |
116 // result - A buffer (allocated by applicati
on) receiving the extracted unicodes. | |
117 // The size of the buffer m
ust be able to hold the number of characters plus a terminator. | |
118 // Return Value: | |
119 // Number of characters written into the result buffer, inc
luding the trailing terminator. | |
120 // Comments: | |
121 // This function ignores characters without unicode informa
tion. | |
122 // | |
123 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index,
int count, unsigned short* result); | |
124 | |
125 // Function: FPDFText_CountRects | |
126 // Count number of rectangular areas occupied by a segment
of texts. | |
127 // Parameters: | |
128 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
129 // start_index - Index for the start characters. | |
130 // count - Number of characters. | |
131 // Return value: | |
132 // Number of rectangles. Zero for error. | |
133 // Comments: | |
134 // This function, along with FPDFText_GetRect can be used b
y applications to detect the position | |
135 // on the page for a text segment, so proper areas can be h
ighlighted or something. | |
136 // FPDFTEXT will automatically merge small character boxes
into bigger one if those characters | |
137 // are on the same line and use same font settings. | |
138 // | |
139 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_ind
ex, int count); | |
140 | |
141 // Function: FPDFText_GetRect | |
142 // Get a rectangular area from the result generated by FPDF
Text_CountRects. | |
143 // Parameters: | |
144 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
145 // rect_index - Zero-based index for the rectang
le. | |
146 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
147 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
148 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
149 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
150 // Return Value: | |
151 // None. | |
152 // | |
153 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index,
double* left, double* top, | |
154
double* right, double* bottom); | |
155 | |
156 // Function: FPDFText_GetBoundedText | |
157 // Extract unicode text within a rectangular boundary on th
e page. | |
158 // Parameters: | |
159 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
160 // left - Left boundary. | |
161 // top - Top boundary. | |
162 // right - Right boundary. | |
163 // bottom - Bottom boundary. | |
164 // buffer - A unicode buffer. | |
165 // buflen - Number of characters (not bytes)
for the buffer, excluding an additional terminator. | |
166 // Return Value: | |
167 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes) of text present within | |
168 // the rectangle, excluding a terminating NUL. Generally y
ou should pass a buffer at least one larger | |
169 // than this if you want a terminating NUL, which will be p
rovided if space is available. | |
170 // Otherwise, return number of characters copied into the b
uffer, including the terminating NUL | |
171 // when space for it is available. | |
172 // Comment: | |
173 // If the buffer is too small, as much text as will fit is
copied into it. | |
174 // | |
175 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double lef
t, double top, | |
176
double right, double bottom,unsigned short* buffer,int buflen); | |
177 | |
178 | |
179 // Flags used by FPDFText_FindStart function. | |
180 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match
case by default. | |
181 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match
the whole word by default. | |
182 | |
183 // Function: FPDFText_FindStart | |
184 // Start a search. | |
185 // Parameters: | |
186 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
187 // findwhat - A unicode match pattern. | |
188 // flags - Option flags. | |
189 // start_index - Start from this character. -1 fo
r end of the page. | |
190 // Return Value: | |
191 // A handle for the search context. FPDFText_FindClose must
be called to release this handle. | |
192 // | |
193 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPD
F_WIDESTRING findwhat, | |
194
unsigned long flags, int start_index); | |
195 | |
196 // Function: FPDFText_FindNext | |
197 // Search in the direction from page start to end. | |
198 // Parameters: | |
199 // handle - A search context handle returned
by FPDFText_FindStart. | |
200 // Return Value: | |
201 // Whether a match is found. | |
202 // | |
203 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); | |
204 | |
205 // Function: FPDFText_FindPrev | |
206 // Search in the direction from page end to start. | |
207 // Parameters: | |
208 // handle - A search context handle returned
by FPDFText_FindStart. | |
209 // Return Value: | |
210 // Whether a match is found. | |
211 // | |
212 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); | |
213 | |
214 // Function: FPDFText_GetSchResultIndex | |
215 // Get the starting character index of the search result. | |
216 // Parameters: | |
217 // handle - A search context handle returned
by FPDFText_FindStart. | |
218 // Return Value: | |
219 // Index for the starting character. | |
220 // | |
221 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); | |
222 | |
223 // Function: FPDFText_GetSchCount | |
224 // Get the number of matched characters in the search resul
t. | |
225 // Parameters: | |
226 // handle - A search context handle returned
by FPDFText_FindStart. | |
227 // Return Value: | |
228 // Number of matched characters. | |
229 // | |
230 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); | |
231 | |
232 // Function: FPDFText_FindClose | |
233 // Release a search context. | |
234 // Parameters: | |
235 // handle - A search context handle returned
by FPDFText_FindStart. | |
236 // Return Value: | |
237 // None. | |
238 // | |
239 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); | |
240 | |
241 // Function: FPDFLink_LoadWebLinks | |
242 // Prepare information about weblinks in a page. | |
243 // Parameters: | |
244 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
245 // Return Value: | |
246 // A handle to the page's links information structure. | |
247 // NULL if something goes wrong. | |
248 // Comments: | |
249 // Weblinks are those links implicitly embedded in PDF page
s. PDF also has a type of | |
250 // annotation called "link", FPDFTEXT doesn't deal with tha
t kind of link. | |
251 // FPDFTEXT weblink feature is useful for automatically det
ecting links in the page | |
252 // contents. For example, things like "http://www.foxitsoft
ware.com" will be detected, | |
253 // so applications can allow user to click on those charact
ers to activate the link, | |
254 // even the PDF doesn't come with link annotations. | |
255 // | |
256 // FPDFLink_CloseWebLinks must be called to release resourc
es. | |
257 // | |
258 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); | |
259 | |
260 // Function: FPDFLink_CountWebLinks | |
261 // Count number of detected web links. | |
262 // Parameters: | |
263 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
264 // Return Value: | |
265 // Number of detected web links. | |
266 // | |
267 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); | |
268 | |
269 // Function: FPDFLink_GetURL | |
270 // Fetch the URL information for a detected web link. | |
271 // Parameters: | |
272 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
273 // link_index - Zero-based index for the link. | |
274 // buffer - A unicode buffer. | |
275 // buflen - Number of characters (not bytes)
for the buffer, including an additional terminator. | |
276 // Return Value: | |
277 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes and an additional terminator is also counted) needed, | |
278 // otherwise, return number of characters copied into the b
uffer. | |
279 // | |
280 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, u
nsigned short* buffer,int buflen); | |
281 | |
282 // Function: FPDFLink_CountRects | |
283 // Count number of rectangular areas for the link. | |
284 // Parameters: | |
285 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
286 // link_index - Zero-based index for the link. | |
287 // Return Value: | |
288 // Number of rectangular areas for the link. | |
289 // | |
290 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_inde
x); | |
291 | |
292 // Function: FPDFLink_GetRect | |
293 // Fetch the boundaries of a rectangle for a link. | |
294 // Parameters: | |
295 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
296 // link_index - Zero-based index for the link. | |
297 // rect_index - Zero-based index for a rectangle
. | |
298 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
299 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
300 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
301 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
302 // Return Value: | |
303 // None. | |
304 // | |
305 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index,
int rect_index, | |
306
double* left, double* top,double* right, double* bottom); | |
307 | |
308 // Function: FPDFLink_CloseWebLinks | |
309 // Release resources used by weblink feature. | |
310 // Parameters: | |
311 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
312 // Return Value: | |
313 // None. | |
314 // | |
315 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); | |
316 | |
317 | |
318 #ifdef __cplusplus | |
319 }; | |
320 #endif | |
321 | |
322 #endif//_FPDFTEXT_H_ | |
OLD | NEW |