OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef _FPDFTEXT_H_ | 7 #ifndef _FPDFTEXT_H_ |
8 #define _FPDFTEXT_H_ | 8 #define _FPDFTEXT_H_ |
9 | 9 |
10 #include "fpdfview.h" | 10 #include "fpdfview.h" |
11 | 11 |
12 // Exported Functions | 12 // Exported Functions |
13 #ifdef __cplusplus | 13 #ifdef __cplusplus |
14 extern "C" { | 14 extern "C" { |
15 #endif | 15 #endif |
16 | 16 |
17 // Function: FPDFText_LoadPage | 17 // Function: FPDFText_LoadPage |
18 // Prepare information about all characters in a page. | 18 // Prepare information about all characters in a page. |
19 // Parameters: | 19 // Parameters: |
20 //» » » page» -» Handle to the page. Returned by FPDF_Loa
dPage function (in FPDFVIEW module).» | 20 //» » » page» -» Handle to the page. Returned by FPDF_Loa
dPage function (in FPDFVIEW module). |
21 // Return value: | 21 // Return value: |
22 // A handle to the text page information structure. | 22 // A handle to the text page information structure. |
23 // NULL if something goes wrong. | 23 // NULL if something goes wrong. |
24 // Comments: | 24 // Comments: |
25 // Application must call FPDFText_ClosePage to release the
text page information. | 25 // Application must call FPDFText_ClosePage to release the
text page information. |
26 //» » » If you don't purchase Text Module , this function will r
eturn NULL. | 26 // |
27 //» | |
28 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); | 27 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); |
29 | 28 |
30 // Function: FPDFText_ClosePage | 29 // Function: FPDFText_ClosePage |
31 // Release all resources allocated for a text page informat
ion structure. | 30 // Release all resources allocated for a text page informat
ion structure. |
32 // Parameters: | 31 // Parameters: |
33 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 32 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
34 // Return Value: | 33 // Return Value: |
35 // None. | 34 // None. |
36 // | 35 // |
37 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); | 36 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); |
38 » | 37 |
39 // Function: FPDFText_CountChars | 38 // Function: FPDFText_CountChars |
40 // Get number of characters in a page. | 39 // Get number of characters in a page. |
41 // Parameters: | 40 // Parameters: |
42 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 41 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
43 // Return value: | 42 // Return value: |
44 //» » » Number of characters in the page. Return -1 for error. | 43 //» » » Number of characters in the page. Return -1 for error. |
45 // Generated characters, like additional space characters,
new line characters, are also counted. | 44 // Generated characters, like additional space characters,
new line characters, are also counted. |
46 // Comments: | 45 // Comments: |
47 // Characters in a page form a "stream", inside the stream,
each character has an index. | 46 // Characters in a page form a "stream", inside the stream,
each character has an index. |
48 // We will use the index parameters in many of FPDFTEXT fun
ctions. The first character in the page | 47 // We will use the index parameters in many of FPDFTEXT fun
ctions. The first character in the page |
49 // has an index value of zero. | 48 // has an index value of zero. |
50 // | 49 // |
51 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); | 50 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); |
52 | 51 |
53 // Function: FPDFText_GetUnicode | 52 // Function: FPDFText_GetUnicode |
54 // Get Unicode of a character in a page. | 53 // Get Unicode of a character in a page. |
55 // Parameters: | 54 // Parameters: |
56 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 55 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
57 // index - Zero-based index of the characte
r. | 56 // index - Zero-based index of the characte
r. |
58 // Return value: | 57 // Return value: |
59 // The Unicode of the particular character. | 58 // The Unicode of the particular character. |
60 // If a character is not encoded in Unicode and Foxit engin
e can't convert to Unicode, | 59 // If a character is not encoded in Unicode and Foxit engin
e can't convert to Unicode, |
61 // the return value will be zero. | 60 // the return value will be zero. |
62 // | 61 // |
63 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
index); | 62 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
index); |
64 | 63 |
65 // Function: FPDFText_GetFontSize | 64 // Function: FPDFText_GetFontSize |
66 // Get the font size of a particular character. | 65 // Get the font size of a particular character. |
67 // Parameters: | 66 // Parameters: |
68 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 67 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
69 // index - Zero-based index of the characte
r. | 68 // index - Zero-based index of the characte
r. |
70 // Return value: | 69 // Return value: |
71 // The font size of the particular character, measured in p
oints (about 1/72 inch). | 70 // The font size of the particular character, measured in p
oints (about 1/72 inch). |
72 // This is the typographic size of the font (so called "em
size"). | 71 // This is the typographic size of the font (so called "em
size"). |
73 // | 72 // |
74 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
); | 73 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
); |
75 | 74 |
76 // Function: FPDFText_GetCharBox | 75 // Function: FPDFText_GetCharBox |
77 // Get bounding box of a particular character. | 76 // Get bounding box of a particular character. |
78 // Parameters: | 77 // Parameters: |
79 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 78 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
80 // index - Zero-based index of the characte
r. | 79 // index - Zero-based index of the characte
r. |
81 // left - Pointer to a double number recei
ving left position of the character box. | 80 // left - Pointer to a double number recei
ving left position of the character box. |
82 // right - Pointer to a double number recei
ving right position of the character box. | 81 // right - Pointer to a double number recei
ving right position of the character box. |
83 // bottom - Pointer to a double number recei
ving bottom position of the character box. | 82 // bottom - Pointer to a double number recei
ving bottom position of the character box. |
84 // top - Pointer to a double numb
er receiving top position of the character box. | 83 // top - Pointer to a double numb
er receiving top position of the character box. |
85 // Return Value: | 84 // Return Value: |
86 // None. | 85 // None. |
87 // Comments: | 86 // Comments: |
88 // All positions are measured in PDF "user space". | 87 // All positions are measured in PDF "user space". |
(...skipping 22 matching lines...) Expand all Loading... |
111 // Parameters: | 110 // Parameters: |
112 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 111 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
113 // start_index - Index for the start characters. | 112 // start_index - Index for the start characters. |
114 // count - Number of characters to be extra
cted. | 113 // count - Number of characters to be extra
cted. |
115 // result - A buffer (allocated by applicati
on) receiving the extracted unicodes. | 114 // result - A buffer (allocated by applicati
on) receiving the extracted unicodes. |
116 // The size of the buffer m
ust be able to hold the number of characters plus a terminator. | 115 // The size of the buffer m
ust be able to hold the number of characters plus a terminator. |
117 // Return Value: | 116 // Return Value: |
118 // Number of characters written into the result buffer, inc
luding the trailing terminator. | 117 // Number of characters written into the result buffer, inc
luding the trailing terminator. |
119 // Comments: | 118 // Comments: |
120 // This function ignores characters without unicode informa
tion. | 119 // This function ignores characters without unicode informa
tion. |
121 //» » » | 120 // |
122 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index,
int count, unsigned short* result); | 121 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index,
int count, unsigned short* result); |
123 | 122 |
124 // Function: FPDFText_CountRects | 123 // Function: FPDFText_CountRects |
125 // Count number of rectangular areas occupied by a segment
of texts. | 124 // Count number of rectangular areas occupied by a segment
of texts. |
126 // Parameters: | 125 // Parameters: |
127 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 126 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
128 // start_index - Index for the start characters. | 127 // start_index - Index for the start characters. |
129 // count - Number of characters. | 128 // count - Number of characters. |
130 // Return value: | 129 // Return value: |
131 // Number of rectangles. Zero for error. | 130 // Number of rectangles. Zero for error. |
(...skipping 24 matching lines...) Expand all Loading... |
156 // Extract unicode text within a rectangular boundary on th
e page. | 155 // Extract unicode text within a rectangular boundary on th
e page. |
157 // Parameters: | 156 // Parameters: |
158 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 157 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
159 // left - Left boundary. | 158 // left - Left boundary. |
160 // top - Top boundary. | 159 // top - Top boundary. |
161 // right - Right boundary. | 160 // right - Right boundary. |
162 // bottom - Bottom boundary. | 161 // bottom - Bottom boundary. |
163 // buffer - A unicode buffer. | 162 // buffer - A unicode buffer. |
164 // buflen - Number of characters (not bytes)
for the buffer, excluding an additional terminator. | 163 // buflen - Number of characters (not bytes)
for the buffer, excluding an additional terminator. |
165 // Return Value: | 164 // Return Value: |
166 //» » » If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes) needed, | 165 //» » » If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes) of text present within |
167 //» » » otherwise, return number of characters copied into the b
uffer. | 166 //» » » the rectangle, excluding a terminating NUL. Generally y
ou should pass a buffer at least one larger |
| 167 //» » » than this if you want a terminating NUL, which will be p
rovided if space is available. |
| 168 //» » » Otherwise, return number of characters copied into the b
uffer, including the terminating NUL |
| 169 //» » » when space for it is available. |
| 170 // Comment: |
| 171 //» » » If the buffer is too small, as much text as will fit is
copied into it. |
168 // | 172 // |
169 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double lef
t, double top, | 173 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double lef
t, double top, |
170
double right, double bottom,unsigned short* buffer,int buflen); | 174
double right, double bottom,unsigned short* buffer,int buflen); |
171 | 175 |
172 | 176 |
173 // Flags used by FPDFText_FindStart function. | 177 // Flags used by FPDFText_FindStart function. |
174 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match
case by default. | 178 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match
case by default. |
175 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match
the whole word by default. | 179 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match
the whole word by default. |
176 | 180 |
177 // Function: FPDFText_FindStart | 181 // Function: FPDFText_FindStart |
178 // Start a search. | 182 // Start a search. |
179 // Parameters: | 183 // Parameters: |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
229 // handle - A search context handle returned
by FPDFText_FindStart. | 233 // handle - A search context handle returned
by FPDFText_FindStart. |
230 // Return Value: | 234 // Return Value: |
231 // None. | 235 // None. |
232 // | 236 // |
233 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); | 237 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); |
234 | 238 |
235 // Function: FPDFLink_LoadWebLinks | 239 // Function: FPDFLink_LoadWebLinks |
236 // Prepare information about weblinks in a page. | 240 // Prepare information about weblinks in a page. |
237 // Parameters: | 241 // Parameters: |
238 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | 242 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. |
239 // Return Value:» | 243 // Return Value: |
240 // A handle to the page's links information structure. | 244 // A handle to the page's links information structure. |
241 // NULL if something goes wrong. | 245 // NULL if something goes wrong. |
242 // Comments: | 246 // Comments: |
243 // Weblinks are those links implicitly embedded in PDF page
s. PDF also has a type of | 247 // Weblinks are those links implicitly embedded in PDF page
s. PDF also has a type of |
244 // annotation called "link", FPDFTEXT doesn't deal with tha
t kind of link. | 248 // annotation called "link", FPDFTEXT doesn't deal with tha
t kind of link. |
245 // FPDFTEXT weblink feature is useful for automatically det
ecting links in the page | 249 // FPDFTEXT weblink feature is useful for automatically det
ecting links in the page |
246 // contents. For example, things like "http://www.foxitsoft
ware.com" will be detected, | 250 // contents. For example, things like "http://www.foxitsoft
ware.com" will be detected, |
247 // so applications can allow user to click on those charact
ers to activate the link, | 251 // so applications can allow user to click on those charact
ers to activate the link, |
248 // even the PDF doesn't come with link annotations. | 252 // even the PDF doesn't come with link annotations. |
249 // | 253 // |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 // link_page - Handle returned by FPDFLink_Load
WebLinks. | 293 // link_page - Handle returned by FPDFLink_Load
WebLinks. |
290 // link_index - Zero-based index for the link. | 294 // link_index - Zero-based index for the link. |
291 // rect_index - Zero-based index for a rectangle
. | 295 // rect_index - Zero-based index for a rectangle
. |
292 // left - Pointer to a double value receiv
ing the rectangle left boundary. | 296 // left - Pointer to a double value receiv
ing the rectangle left boundary. |
293 // top - Pointer to a double valu
e receiving the rectangle top boundary. | 297 // top - Pointer to a double valu
e receiving the rectangle top boundary. |
294 // right - Pointer to a double value receiv
ing the rectangle right boundary. | 298 // right - Pointer to a double value receiv
ing the rectangle right boundary. |
295 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | 299 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. |
296 // Return Value: | 300 // Return Value: |
297 // None. | 301 // None. |
298 // | 302 // |
299 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index,
int rect_index, | 303 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index,
int rect_index, |
300
double* left, double* top,double* right, double* bottom); | 304
double* left, double* top,double* right, double* bottom); |
301 | 305 |
302 // Function: FPDFLink_CloseWebLinks | 306 // Function: FPDFLink_CloseWebLinks |
303 // Release resources used by weblink feature. | 307 // Release resources used by weblink feature. |
304 // Parameters: | 308 // Parameters: |
305 // link_page - Handle returned by FPDFLink_Load
WebLinks. | 309 // link_page - Handle returned by FPDFLink_Load
WebLinks. |
306 // Return Value: | 310 // Return Value: |
307 // None. | 311 // None. |
308 // | 312 // |
309 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); | 313 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); |
310 | 314 |
311 | 315 |
312 #ifdef __cplusplus | 316 #ifdef __cplusplus |
313 }; | 317 }; |
314 #endif | 318 #endif |
315 | 319 |
316 #endif//_FPDFTEXT_H_ | 320 #endif//_FPDFTEXT_H_ |
OLD | NEW |