OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #ifndef _FPDFTEXT_H_ | |
8 #define _FPDFTEXT_H_ | |
9 | |
10 #include "fpdfview.h" | |
11 | |
12 // Exported Functions | |
13 #ifdef __cplusplus | |
14 extern "C" { | |
15 #endif | |
16 | |
17 // Function: FPDFText_LoadPage | |
18 // Prepare information about all characters in a page. | |
19 // Parameters: | |
20 // page - Handle to the page. Returned by FPDF_Loa
dPage function (in FPDFVIEW module). | |
21 // Return value: | |
22 // A handle to the text page information structure. | |
23 // NULL if something goes wrong. | |
24 // Comments: | |
25 // Application must call FPDFText_ClosePage to release the
text page information. | |
26 // | |
27 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); | |
28 | |
29 // Function: FPDFText_ClosePage | |
30 // Release all resources allocated for a text page informat
ion structure. | |
31 // Parameters: | |
32 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
33 // Return Value: | |
34 // None. | |
35 // | |
36 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); | |
37 | |
38 // Function: FPDFText_CountChars | |
39 // Get number of characters in a page. | |
40 // Parameters: | |
41 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
42 // Return value: | |
43 // Number of characters in the page. Return -1 for error. | |
44 // Generated characters, like additional space characters,
new line characters, are also counted. | |
45 // Comments: | |
46 // Characters in a page form a "stream", inside the stream,
each character has an index. | |
47 // We will use the index parameters in many of FPDFTEXT fun
ctions. The first character in the page | |
48 // has an index value of zero. | |
49 // | |
50 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); | |
51 | |
52 // Function: FPDFText_GetUnicode | |
53 // Get Unicode of a character in a page. | |
54 // Parameters: | |
55 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
56 // index - Zero-based index of the characte
r. | |
57 // Return value: | |
58 // The Unicode of the particular character. | |
59 // If a character is not encoded in Unicode and Foxit engin
e can't convert to Unicode, | |
60 // the return value will be zero. | |
61 // | |
62 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
index); | |
63 | |
64 // Function: FPDFText_GetFontSize | |
65 // Get the font size of a particular character. | |
66 // Parameters: | |
67 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
68 // index - Zero-based index of the characte
r. | |
69 // Return value: | |
70 // The font size of the particular character, measured in p
oints (about 1/72 inch). | |
71 // This is the typographic size of the font (so called "em
size"). | |
72 // | |
73 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
); | |
74 | |
75 // Function: FPDFText_GetCharBox | |
76 // Get bounding box of a particular character. | |
77 // Parameters: | |
78 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
79 // index - Zero-based index of the characte
r. | |
80 // left - Pointer to a double number recei
ving left position of the character box. | |
81 // right - Pointer to a double number recei
ving right position of the character box. | |
82 // bottom - Pointer to a double number recei
ving bottom position of the character box. | |
83 // top - Pointer to a double numb
er receiving top position of the character box. | |
84 // Return Value: | |
85 // None. | |
86 // Comments: | |
87 // All positions are measured in PDF "user space". | |
88 // | |
89 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, d
ouble* left, | |
90
double* right, double* bottom, double* top); | |
91 | |
92 // Function: FPDFText_GetCharIndexAtPos | |
93 // Get the index of a character at or nearby a certain posi
tion on the page. | |
94 // Parameters: | |
95 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
96 // x - X position in PDF "user
space". | |
97 // y - Y position in PDF "user
space". | |
98 // xTolerance - An x-axis tolerance value for ch
aracter hit detection, in point unit. | |
99 // yTolerance - A y-axis tolerance value for cha
racter hit detection, in point unit. | |
100 // Return Value: | |
101 // The zero-based index of the character at, or nearby the
point (x,y). | |
102 // If there is no character at or nearby the point, return
value will be -1. | |
103 // If an error occurs, -3 will be returned. | |
104 // | |
105 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, | |
106
double x, double y, double xTorelance, double yTolerance); | |
107 | |
108 // Function: FPDFText_GetText | |
109 // Extract unicode text string from the page. | |
110 // Parameters: | |
111 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
112 // start_index - Index for the start characters. | |
113 // count - Number of characters to be extra
cted. | |
114 // result - A buffer (allocated by applicati
on) receiving the extracted unicodes. | |
115 // The size of the buffer m
ust be able to hold the number of characters plus a terminator. | |
116 // Return Value: | |
117 // Number of characters written into the result buffer, inc
luding the trailing terminator. | |
118 // Comments: | |
119 // This function ignores characters without unicode informa
tion. | |
120 // | |
121 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index,
int count, unsigned short* result); | |
122 | |
123 // Function: FPDFText_CountRects | |
124 // Count number of rectangular areas occupied by a segment
of texts. | |
125 // Parameters: | |
126 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
127 // start_index - Index for the start characters. | |
128 // count - Number of characters. | |
129 // Return value: | |
130 // Number of rectangles. Zero for error. | |
131 // Comments: | |
132 // This function, along with FPDFText_GetRect can be used b
y applications to detect the position | |
133 // on the page for a text segment, so proper areas can be h
ighlighted or something. | |
134 // FPDFTEXT will automatically merge small character boxes
into bigger one if those characters | |
135 // are on the same line and use same font settings. | |
136 // | |
137 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_ind
ex, int count); | |
138 | |
139 // Function: FPDFText_GetRect | |
140 // Get a rectangular area from the result generated by FPDF
Text_CountRects. | |
141 // Parameters: | |
142 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
143 // rect_index - Zero-based index for the rectang
le. | |
144 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
145 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
146 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
147 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
148 // Return Value: | |
149 // None. | |
150 // | |
151 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index,
double* left, double* top, | |
152
double* right, double* bottom); | |
153 | |
154 // Function: FPDFText_GetBoundedText | |
155 // Extract unicode text within a rectangular boundary on th
e page. | |
156 // Parameters: | |
157 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
158 // left - Left boundary. | |
159 // top - Top boundary. | |
160 // right - Right boundary. | |
161 // bottom - Bottom boundary. | |
162 // buffer - A unicode buffer. | |
163 // buflen - Number of characters (not bytes)
for the buffer, excluding an additional terminator. | |
164 // Return Value: | |
165 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes) of text present within | |
166 // the rectangle, excluding a terminating NUL. Generally y
ou should pass a buffer at least one larger | |
167 // than this if you want a terminating NUL, which will be p
rovided if space is available. | |
168 // Otherwise, return number of characters copied into the b
uffer, including the terminating NUL | |
169 // when space for it is available. | |
170 // Comment: | |
171 // If the buffer is too small, as much text as will fit is
copied into it. | |
172 // | |
173 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double lef
t, double top, | |
174
double right, double bottom,unsigned short* buffer,int buflen); | |
175 | |
176 | |
177 // Flags used by FPDFText_FindStart function. | |
178 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match
case by default. | |
179 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match
the whole word by default. | |
180 | |
181 // Function: FPDFText_FindStart | |
182 // Start a search. | |
183 // Parameters: | |
184 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
185 // findwhat - A unicode match pattern. | |
186 // flags - Option flags. | |
187 // start_index - Start from this character. -1 fo
r end of the page. | |
188 // Return Value: | |
189 // A handle for the search context. FPDFText_FindClose must
be called to release this handle. | |
190 // | |
191 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPD
F_WIDESTRING findwhat, | |
192
unsigned long flags, int start_index); | |
193 | |
194 // Function: FPDFText_FindNext | |
195 // Search in the direction from page start to end. | |
196 // Parameters: | |
197 // handle - A search context handle returned
by FPDFText_FindStart. | |
198 // Return Value: | |
199 // Whether a match is found. | |
200 // | |
201 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); | |
202 | |
203 // Function: FPDFText_FindPrev | |
204 // Search in the direction from page end to start. | |
205 // Parameters: | |
206 // handle - A search context handle returned
by FPDFText_FindStart. | |
207 // Return Value: | |
208 // Whether a match is found. | |
209 // | |
210 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); | |
211 | |
212 // Function: FPDFText_GetSchResultIndex | |
213 // Get the starting character index of the search result. | |
214 // Parameters: | |
215 // handle - A search context handle returned
by FPDFText_FindStart. | |
216 // Return Value: | |
217 // Index for the starting character. | |
218 // | |
219 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); | |
220 | |
221 // Function: FPDFText_GetSchCount | |
222 // Get the number of matched characters in the search resul
t. | |
223 // Parameters: | |
224 // handle - A search context handle returned
by FPDFText_FindStart. | |
225 // Return Value: | |
226 // Number of matched characters. | |
227 // | |
228 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); | |
229 | |
230 // Function: FPDFText_FindClose | |
231 // Release a search context. | |
232 // Parameters: | |
233 // handle - A search context handle returned
by FPDFText_FindStart. | |
234 // Return Value: | |
235 // None. | |
236 // | |
237 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); | |
238 | |
239 // Function: FPDFLink_LoadWebLinks | |
240 // Prepare information about weblinks in a page. | |
241 // Parameters: | |
242 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
243 // Return Value: | |
244 // A handle to the page's links information structure. | |
245 // NULL if something goes wrong. | |
246 // Comments: | |
247 // Weblinks are those links implicitly embedded in PDF page
s. PDF also has a type of | |
248 // annotation called "link", FPDFTEXT doesn't deal with tha
t kind of link. | |
249 // FPDFTEXT weblink feature is useful for automatically det
ecting links in the page | |
250 // contents. For example, things like "http://www.foxitsoft
ware.com" will be detected, | |
251 // so applications can allow user to click on those charact
ers to activate the link, | |
252 // even the PDF doesn't come with link annotations. | |
253 // | |
254 // FPDFLink_CloseWebLinks must be called to release resourc
es. | |
255 // | |
256 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); | |
257 | |
258 // Function: FPDFLink_CountWebLinks | |
259 // Count number of detected web links. | |
260 // Parameters: | |
261 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
262 // Return Value: | |
263 // Number of detected web links. | |
264 // | |
265 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); | |
266 | |
267 // Function: FPDFLink_GetURL | |
268 // Fetch the URL information for a detected web link. | |
269 // Parameters: | |
270 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
271 // link_index - Zero-based index for the link. | |
272 // buffer - A unicode buffer. | |
273 // buflen - Number of characters (not bytes)
for the buffer, including an additional terminator. | |
274 // Return Value: | |
275 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes and an additional terminator is also counted) needed, | |
276 // otherwise, return number of characters copied into the b
uffer. | |
277 // | |
278 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, u
nsigned short* buffer,int buflen); | |
279 | |
280 // Function: FPDFLink_CountRects | |
281 // Count number of rectangular areas for the link. | |
282 // Parameters: | |
283 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
284 // link_index - Zero-based index for the link. | |
285 // Return Value: | |
286 // Number of rectangular areas for the link. | |
287 // | |
288 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_inde
x); | |
289 | |
290 // Function: FPDFLink_GetRect | |
291 // Fetch the boundaries of a rectangle for a link. | |
292 // Parameters: | |
293 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
294 // link_index - Zero-based index for the link. | |
295 // rect_index - Zero-based index for a rectangle
. | |
296 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
297 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
298 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
299 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
300 // Return Value: | |
301 // None. | |
302 // | |
303 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index,
int rect_index, | |
304
double* left, double* top,double* right, double* bottom); | |
305 | |
306 // Function: FPDFLink_CloseWebLinks | |
307 // Release resources used by weblink feature. | |
308 // Parameters: | |
309 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
310 // Return Value: | |
311 // None. | |
312 // | |
313 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); | |
314 | |
315 | |
316 #ifdef __cplusplus | |
317 }; | |
318 #endif | |
319 | |
320 #endif//_FPDFTEXT_H_ | |
OLD | NEW |