| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #ifndef _FPDFTEXT_H_ | |
| 8 #define _FPDFTEXT_H_ | |
| 9 | |
| 10 #include "fpdfview.h" | |
| 11 | |
| 12 // Exported Functions | |
| 13 #ifdef __cplusplus | |
| 14 extern "C" { | |
| 15 #endif | |
| 16 | |
| 17 // Function: FPDFText_LoadPage | |
| 18 // Prepare information about all characters in a page. | |
| 19 // Parameters: | |
| 20 // page - Handle to the page. Returned by FPDF_Loa
dPage function (in FPDFVIEW module). | |
| 21 // Return value: | |
| 22 // A handle to the text page information structure. | |
| 23 // NULL if something goes wrong. | |
| 24 // Comments: | |
| 25 // Application must call FPDFText_ClosePage to release the
text page information. | |
| 26 // Notes: | |
| 27 // The method can not support to load out FPDF_TEXTPAGE for
the document consists of dynamic fields. | |
| 28 // | |
| 29 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); | |
| 30 | |
| 31 // Function: FPDFText_ClosePage | |
| 32 // Release all resources allocated for a text page informat
ion structure. | |
| 33 // Parameters: | |
| 34 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 35 // Return Value: | |
| 36 // None. | |
| 37 // | |
| 38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); | |
| 39 | |
| 40 // Function: FPDFText_CountChars | |
| 41 // Get number of characters in a page. | |
| 42 // Parameters: | |
| 43 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 44 // Return value: | |
| 45 // Number of characters in the page. Return -1 for error. | |
| 46 // Generated characters, like additional space characters,
new line characters, are also counted. | |
| 47 // Comments: | |
| 48 // Characters in a page form a "stream", inside the stream,
each character has an index. | |
| 49 // We will use the index parameters in many of FPDFTEXT fun
ctions. The first character in the page | |
| 50 // has an index value of zero. | |
| 51 // | |
| 52 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); | |
| 53 | |
| 54 // Function: FPDFText_GetUnicode | |
| 55 // Get Unicode of a character in a page. | |
| 56 // Parameters: | |
| 57 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 58 // index - Zero-based index of the characte
r. | |
| 59 // Return value: | |
| 60 // The Unicode of the particular character. | |
| 61 // If a character is not encoded in Unicode and Foxit engin
e can't convert to Unicode, | |
| 62 // the return value will be zero. | |
| 63 // | |
| 64 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
index); | |
| 65 | |
| 66 // Function: FPDFText_GetFontSize | |
| 67 // Get the font size of a particular character. | |
| 68 // Parameters: | |
| 69 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 70 // index - Zero-based index of the characte
r. | |
| 71 // Return value: | |
| 72 // The font size of the particular character, measured in p
oints (about 1/72 inch). | |
| 73 // This is the typographic size of the font (so called "em
size"). | |
| 74 // | |
| 75 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
); | |
| 76 | |
| 77 // Function: FPDFText_GetCharBox | |
| 78 // Get bounding box of a particular character. | |
| 79 // Parameters: | |
| 80 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 81 // index - Zero-based index of the characte
r. | |
| 82 // left - Pointer to a double number recei
ving left position of the character box. | |
| 83 // right - Pointer to a double number recei
ving right position of the character box. | |
| 84 // bottom - Pointer to a double number recei
ving bottom position of the character box. | |
| 85 // top - Pointer to a double numb
er receiving top position of the character box. | |
| 86 // Return Value: | |
| 87 // None. | |
| 88 // Comments: | |
| 89 // All positions are measured in PDF "user space". | |
| 90 // | |
| 91 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, d
ouble* left, | |
| 92
double* right, double* bottom, double* top); | |
| 93 | |
| 94 // Function: FPDFText_GetCharIndexAtPos | |
| 95 // Get the index of a character at or nearby a certain posi
tion on the page. | |
| 96 // Parameters: | |
| 97 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 98 // x - X position in PDF "user
space". | |
| 99 // y - Y position in PDF "user
space". | |
| 100 // xTolerance - An x-axis tolerance value for ch
aracter hit detection, in point unit. | |
| 101 // yTolerance - A y-axis tolerance value for cha
racter hit detection, in point unit. | |
| 102 // Return Value: | |
| 103 // The zero-based index of the character at, or nearby the
point (x,y). | |
| 104 // If there is no character at or nearby the point, return
value will be -1. | |
| 105 // If an error occurs, -3 will be returned. | |
| 106 // | |
| 107 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, | |
| 108
double x, double y, double xTorelance, double yTolerance); | |
| 109 | |
| 110 // Function: FPDFText_GetText | |
| 111 // Extract unicode text string from the page. | |
| 112 // Parameters: | |
| 113 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 114 // start_index - Index for the start characters. | |
| 115 // count - Number of characters to be extra
cted. | |
| 116 // result - A buffer (allocated by applicati
on) receiving the extracted unicodes. | |
| 117 // The size of the buffer m
ust be able to hold the number of characters plus a terminator. | |
| 118 // Return Value: | |
| 119 // Number of characters written into the result buffer, inc
luding the trailing terminator. | |
| 120 // Comments: | |
| 121 // This function ignores characters without unicode informa
tion. | |
| 122 // | |
| 123 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index,
int count, unsigned short* result); | |
| 124 | |
| 125 // Function: FPDFText_CountRects | |
| 126 // Count number of rectangular areas occupied by a segment
of texts. | |
| 127 // Parameters: | |
| 128 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 129 // start_index - Index for the start characters. | |
| 130 // count - Number of characters. | |
| 131 // Return value: | |
| 132 // Number of rectangles. Zero for error. | |
| 133 // Comments: | |
| 134 // This function, along with FPDFText_GetRect can be used b
y applications to detect the position | |
| 135 // on the page for a text segment, so proper areas can be h
ighlighted or something. | |
| 136 // FPDFTEXT will automatically merge small character boxes
into bigger one if those characters | |
| 137 // are on the same line and use same font settings. | |
| 138 // | |
| 139 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_ind
ex, int count); | |
| 140 | |
| 141 // Function: FPDFText_GetRect | |
| 142 // Get a rectangular area from the result generated by FPDF
Text_CountRects. | |
| 143 // Parameters: | |
| 144 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 145 // rect_index - Zero-based index for the rectang
le. | |
| 146 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
| 147 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
| 148 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
| 149 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
| 150 // Return Value: | |
| 151 // None. | |
| 152 // | |
| 153 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index,
double* left, double* top, | |
| 154
double* right, double* bottom); | |
| 155 | |
| 156 // Function: FPDFText_GetBoundedText | |
| 157 // Extract unicode text within a rectangular boundary on th
e page. | |
| 158 // Parameters: | |
| 159 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 160 // left - Left boundary. | |
| 161 // top - Top boundary. | |
| 162 // right - Right boundary. | |
| 163 // bottom - Bottom boundary. | |
| 164 // buffer - A unicode buffer. | |
| 165 // buflen - Number of characters (not bytes)
for the buffer, excluding an additional terminator. | |
| 166 // Return Value: | |
| 167 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes) of text present within | |
| 168 // the rectangle, excluding a terminating NUL. Generally y
ou should pass a buffer at least one larger | |
| 169 // than this if you want a terminating NUL, which will be p
rovided if space is available. | |
| 170 // Otherwise, return number of characters copied into the b
uffer, including the terminating NUL | |
| 171 // when space for it is available. | |
| 172 // Comment: | |
| 173 // If the buffer is too small, as much text as will fit is
copied into it. | |
| 174 // | |
| 175 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double lef
t, double top, | |
| 176
double right, double bottom,unsigned short* buffer,int buflen); | |
| 177 | |
| 178 | |
| 179 // Flags used by FPDFText_FindStart function. | |
| 180 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match
case by default. | |
| 181 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match
the whole word by default. | |
| 182 | |
| 183 // Function: FPDFText_FindStart | |
| 184 // Start a search. | |
| 185 // Parameters: | |
| 186 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 187 // findwhat - A unicode match pattern. | |
| 188 // flags - Option flags. | |
| 189 // start_index - Start from this character. -1 fo
r end of the page. | |
| 190 // Return Value: | |
| 191 // A handle for the search context. FPDFText_FindClose must
be called to release this handle. | |
| 192 // | |
| 193 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPD
F_WIDESTRING findwhat, | |
| 194
unsigned long flags, int start_index); | |
| 195 | |
| 196 // Function: FPDFText_FindNext | |
| 197 // Search in the direction from page start to end. | |
| 198 // Parameters: | |
| 199 // handle - A search context handle returned
by FPDFText_FindStart. | |
| 200 // Return Value: | |
| 201 // Whether a match is found. | |
| 202 // | |
| 203 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); | |
| 204 | |
| 205 // Function: FPDFText_FindPrev | |
| 206 // Search in the direction from page end to start. | |
| 207 // Parameters: | |
| 208 // handle - A search context handle returned
by FPDFText_FindStart. | |
| 209 // Return Value: | |
| 210 // Whether a match is found. | |
| 211 // | |
| 212 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); | |
| 213 | |
| 214 // Function: FPDFText_GetSchResultIndex | |
| 215 // Get the starting character index of the search result. | |
| 216 // Parameters: | |
| 217 // handle - A search context handle returned
by FPDFText_FindStart. | |
| 218 // Return Value: | |
| 219 // Index for the starting character. | |
| 220 // | |
| 221 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); | |
| 222 | |
| 223 // Function: FPDFText_GetSchCount | |
| 224 // Get the number of matched characters in the search resul
t. | |
| 225 // Parameters: | |
| 226 // handle - A search context handle returned
by FPDFText_FindStart. | |
| 227 // Return Value: | |
| 228 // Number of matched characters. | |
| 229 // | |
| 230 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); | |
| 231 | |
| 232 // Function: FPDFText_FindClose | |
| 233 // Release a search context. | |
| 234 // Parameters: | |
| 235 // handle - A search context handle returned
by FPDFText_FindStart. | |
| 236 // Return Value: | |
| 237 // None. | |
| 238 // | |
| 239 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); | |
| 240 | |
| 241 // Function: FPDFLink_LoadWebLinks | |
| 242 // Prepare information about weblinks in a page. | |
| 243 // Parameters: | |
| 244 // text_page - Handle to a text page informatio
n structure. Returned by FPDFText_LoadPage function. | |
| 245 // Return Value: | |
| 246 // A handle to the page's links information structure. | |
| 247 // NULL if something goes wrong. | |
| 248 // Comments: | |
| 249 // Weblinks are those links implicitly embedded in PDF page
s. PDF also has a type of | |
| 250 // annotation called "link", FPDFTEXT doesn't deal with tha
t kind of link. | |
| 251 // FPDFTEXT weblink feature is useful for automatically det
ecting links in the page | |
| 252 // contents. For example, things like "http://www.foxitsoft
ware.com" will be detected, | |
| 253 // so applications can allow user to click on those charact
ers to activate the link, | |
| 254 // even the PDF doesn't come with link annotations. | |
| 255 // | |
| 256 // FPDFLink_CloseWebLinks must be called to release resourc
es. | |
| 257 // | |
| 258 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); | |
| 259 | |
| 260 // Function: FPDFLink_CountWebLinks | |
| 261 // Count number of detected web links. | |
| 262 // Parameters: | |
| 263 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
| 264 // Return Value: | |
| 265 // Number of detected web links. | |
| 266 // | |
| 267 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); | |
| 268 | |
| 269 // Function: FPDFLink_GetURL | |
| 270 // Fetch the URL information for a detected web link. | |
| 271 // Parameters: | |
| 272 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
| 273 // link_index - Zero-based index for the link. | |
| 274 // buffer - A unicode buffer. | |
| 275 // buflen - Number of characters (not bytes)
for the buffer, including an additional terminator. | |
| 276 // Return Value: | |
| 277 // If buffer is NULL or buflen is zero, return number of ch
aracters (not bytes and an additional terminator is also counted) needed, | |
| 278 // otherwise, return number of characters copied into the b
uffer. | |
| 279 // | |
| 280 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, u
nsigned short* buffer,int buflen); | |
| 281 | |
| 282 // Function: FPDFLink_CountRects | |
| 283 // Count number of rectangular areas for the link. | |
| 284 // Parameters: | |
| 285 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
| 286 // link_index - Zero-based index for the link. | |
| 287 // Return Value: | |
| 288 // Number of rectangular areas for the link. | |
| 289 // | |
| 290 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_inde
x); | |
| 291 | |
| 292 // Function: FPDFLink_GetRect | |
| 293 // Fetch the boundaries of a rectangle for a link. | |
| 294 // Parameters: | |
| 295 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
| 296 // link_index - Zero-based index for the link. | |
| 297 // rect_index - Zero-based index for a rectangle
. | |
| 298 // left - Pointer to a double value receiv
ing the rectangle left boundary. | |
| 299 // top - Pointer to a double valu
e receiving the rectangle top boundary. | |
| 300 // right - Pointer to a double value receiv
ing the rectangle right boundary. | |
| 301 // bottom - Pointer to a double value receiv
ing the rectangle bottom boundary. | |
| 302 // Return Value: | |
| 303 // None. | |
| 304 // | |
| 305 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index,
int rect_index, | |
| 306
double* left, double* top,double* right, double* bottom); | |
| 307 | |
| 308 // Function: FPDFLink_CloseWebLinks | |
| 309 // Release resources used by weblink feature. | |
| 310 // Parameters: | |
| 311 // link_page - Handle returned by FPDFLink_Load
WebLinks. | |
| 312 // Return Value: | |
| 313 // None. | |
| 314 // | |
| 315 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); | |
| 316 | |
| 317 | |
| 318 #ifdef __cplusplus | |
| 319 }; | |
| 320 #endif | |
| 321 | |
| 322 #endif//_FPDFTEXT_H_ | |
| OLD | NEW |