Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2038)

Side by Side Diff: public/i18n/unicode/uregex.h

Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n} (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu46.git@master
Patch Set: same as ps #3. retry uploading Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « public/i18n/unicode/unum.h ('k') | public/i18n/unicode/usearch.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 **********************************************************************
3 * Copyright (C) 2004-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: uregex.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2004mar09
11 * created by: Andy Heninger
12 *
13 * ICU Regular Expressions, API for C
14 */
15
16 /**
17 * \file
18 * \brief C API: Regular Expressions
19 *
20 * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.< /p>
21 */
22
23 #ifndef UREGEX_H
24 #define UREGEX_H
25
26 #include "unicode/utext.h"
27 #include "unicode/utypes.h"
28
29 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
30
31 #include "unicode/localpointer.h"
32 #include "unicode/parseerr.h"
33
34 struct URegularExpression;
35 /**
36 * Structure representing a compiled regular rexpression, plus the results
37 * of a match operation.
38 * @stable ICU 3.0
39 */
40 typedef struct URegularExpression URegularExpression;
41
42
43 /**
44 * Constants for Regular Expression Match Modes.
45 * @stable ICU 2.4
46 */
47 typedef enum URegexpFlag{
48
49 #ifndef U_HIDE_DRAFT_API
50 /** Forces normalization of pattern and strings.
51 Not implemented yet, just a placeholder, hence draft.
52 @draft ICU 2.4 */
53 UREGEX_CANON_EQ = 128,
54 #endif
55 /** Enable case insensitive matching. @stable ICU 2.4 */
56 UREGEX_CASE_INSENSITIVE = 2,
57
58 /** Allow white space and comments within patterns @stable ICU 2.4 */
59 UREGEX_COMMENTS = 4,
60
61 /** If set, '.' matches line terminators, otherwise '.' matching stops at line end.
62 * @stable ICU 2.4 */
63 UREGEX_DOTALL = 32,
64
65 /** If set, treat the entire pattern as a literal string.
66 * Metacharacters or escape sequences in the input sequence will be given
67 * no special meaning. Not implemented yet as of ICU 4.4.
68 *
69 * The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact
70 * on matching when used in conjunction with this flag.
71 * The other flags become superfluous.
72 * TODO: say which escapes are still handled; anything Java does
73 * early (\\u) we should still do.
74 * @stable ICU 4.0
75 */
76 UREGEX_LITERAL = 16,
77
78 /** Control behavior of "$" and "^"
79 * If set, recognize line terminators within string,
80 * otherwise, match only at start and end of input string.
81 * @stable ICU 2.4 */
82 UREGEX_MULTILINE = 8,
83
84 /** Unix-only line endings.
85 * When this mode is enabled, only \\u000a is recognized as a line ending
86 * in the behavior of ., ^, and $.
87 * @stable ICU 4.0
88 */
89 UREGEX_UNIX_LINES = 1,
90
91 /** Unicode word boundaries.
92 * If set, \b uses the Unicode TR 29 definition of word boundaries.
93 * Warning: Unicode word boundaries are quite different from
94 * traditional regular expression word boundaries. See
95 * http://unicode.org/reports/tr29/#Word_Boundaries
96 * @stable ICU 2.8
97 */
98 UREGEX_UWORD = 256,
99
100 /** Error on Unrecognized backslash escapes.
101 * If set, fail with an error on patterns that contain
102 * backslash-escaped ASCII letters without a known specail
103 * meaning. If this flag is not set, these
104 * escaped letters represent themselves.
105 * @stable ICU 4.0
106 */
107 UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512
108
109 } URegexpFlag;
110
111 /**
112 * Open (compile) an ICU regular expression. Compiles the regular expression in
113 * string form into an internal representation using the specified match mode flags.
114 * The resulting regular expression handle can then be used to perform various
115 * matching operations.
116 *
117 *
118 * @param pattern The Regular Expression pattern to be compiled.
119 * @param patternLength The length of the pattern, or -1 if the pattern is
120 * NUL termintated.
121 * @param flags Flags that alter the default matching behavior for
122 * the regular expression, UREGEX_CASE_INSENSITIVE, for
123 * example. For default behavior, set this parameter to zero.
124 * See <code>enum URegexpFlag</code>. All desired flags
125 * are bitwise-ORed together.
126 * @param pe Receives the position (line and column nubers) of any syntax
127 * error within the source regular expression string. If this
128 * information is not wanted, pass NULL for this paramete r.
129 * @param status Receives error detected by this function.
130 * @stable ICU 3.0
131 *
132 */
133 U_STABLE URegularExpression * U_EXPORT2
134 uregex_open( const UChar *pattern,
135 int32_t patternLength,
136 uint32_t flags,
137 UParseError *pe,
138 UErrorCode *status);
139
140 /**
141 * Open (compile) an ICU regular expression. Compiles the regular expression in
142 * string form into an internal representation using the specified match mode flags.
143 * The resulting regular expression handle can then be used to perform various
144 * matching operations.
145 * <p>
146 * The contents of the pattern UText will be extracted and saved. Ownership of the
147 * UText struct itself remains with the caller. This is to match the behavior of
148 * uregex_open().
149 *
150 * @param pattern The Regular Expression pattern to be compiled.
151 * @param flags Flags that alter the default matching behavior for
152 * the regular expression, UREGEX_CASE_INSENSITIVE, for
153 * example. For default behavior, set this parameter to zero.
154 * See <code>enum URegexpFlag</code>. All desired flags
155 * are bitwise-ORed together.
156 * @param pe Receives the position (line and column nubers) of any syntax
157 * error within the source regular expression string. If this
158 * information is not wanted, pass NULL for this paramete r.
159 * @param status Receives error detected by this function.
160 *
161 * @draft ICU 4.6
162 */
163 U_DRAFT URegularExpression * U_EXPORT2
164 uregex_openUText(UText *pattern,
165 uint32_t flags,
166 UParseError *pe,
167 UErrorCode *status);
168
169 /**
170 * Open (compile) an ICU regular expression. The resulting regular expression
171 * handle can then be used to perform various matching operations.
172 * <p>
173 * This function is the same as uregex_open, except that the pattern
174 * is supplied as an 8 bit char * string in the default code page.
175 *
176 * @param pattern The Regular Expression pattern to be compiled,
177 * NUL termintated.
178 * @param flags Flags that alter the default matching behavior for
179 * the regular expression, UREGEX_CASE_INSENSITIVE, for
180 * example. For default behavior, set this parameter to zero.
181 * See <code>enum URegexpFlag</code>. All desired flags
182 * are bitwise-ORed together.
183 * @param pe Receives the position (line and column nubers) of any syntax
184 * error within the source regular expression string. If this
185 * information is not wanted, pass NULL for this paramete r.
186 * @param status Receives errors detected by this function.
187 * @return The URegularExpression object representing the compile d
188 * pattern.
189 *
190 * @stable ICU 3.0
191 */
192 #if !UCONFIG_NO_CONVERSION
193 U_STABLE URegularExpression * U_EXPORT2
194 uregex_openC( const char *pattern,
195 uint32_t flags,
196 UParseError *pe,
197 UErrorCode *status);
198 #endif
199
200
201
202 /**
203 * Close the regular expression, recovering all resources (memory) it
204 * was holding.
205 *
206 * @param regexp The regular expression to be closed.
207 * @stable ICU 3.0
208 */
209 U_STABLE void U_EXPORT2
210 uregex_close(URegularExpression *regexp);
211
212 #if U_SHOW_CPLUSPLUS_API
213
214 U_NAMESPACE_BEGIN
215
216 /**
217 * \class LocalURegularExpressionPointer
218 * "Smart pointer" class, closes a URegularExpression via uregex_close().
219 * For most methods see the LocalPointerBase base class.
220 *
221 * @see LocalPointerBase
222 * @see LocalPointer
223 * @stable ICU 4.4
224 */
225 U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close);
226
227 U_NAMESPACE_END
228
229 #endif
230
231 /**
232 * Make a copy of a compiled regular expression. Cloning a regular
233 * expression is faster than opening a second instance from the source
234 * form of the expression, and requires less memory.
235 * <p>
236 * Note that the current input string and the position of any matched text
237 * within it are not cloned; only the pattern itself and and the
238 * match mode flags are copied.
239 * <p>
240 * Cloning can be particularly useful to threaded applications that perform
241 * multiple match operations in parallel. Each concurrent RE
242 * operation requires its own instance of a URegularExpression.
243 *
244 * @param regexp The compiled regular expression to be cloned.
245 * @param status Receives indication of any errors encountered
246 * @return the cloned copy of the compiled regular expression.
247 * @stable ICU 3.0
248 */
249 U_STABLE URegularExpression * U_EXPORT2
250 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
251
252 /**
253 * Returns a pointer to the source form of the pattern for this regular express ion.
254 * This function will work even if the pattern was originally specified as a UT ext.
255 *
256 * @param regexp The compiled regular expression.
257 * @param patLength This output parameter will be set to the length of the
258 * pattern string. A NULL pointer may be used here if the
259 * pattern length is not needed, as would be the case if
260 * the pattern is known in advance to be a NUL terminated
261 * string.
262 * @param status Receives errors detected by this function.
263 * @return a pointer to the pattern string. The storage for the string is
264 * owned by the regular expression object, and must not be
265 * altered or deleted by the application. The returned string
266 * will remain valid until the regular expression is closed.
267 * @stable ICU 3.0
268 */
269 U_STABLE const UChar * U_EXPORT2
270 uregex_pattern(const URegularExpression *regexp,
271 int32_t *patLength,
272 UErrorCode *status);
273
274 /**
275 * Returns the source text of the pattern for this regular expression.
276 * This function will work even if the pattern was originally specified as a UC har string.
277 *
278 * @param regexp The compiled regular expression.
279 * @param status Receives errors detected by this function.
280 * @return the pattern text. The storage for the text is owned by the regular e xpression
281 * object, and must not be altered or deleted.
282 *
283 * @draft ICU 4.6
284 */
285 U_DRAFT UText * U_EXPORT2
286 uregex_patternUText(const URegularExpression *regexp,
287 UErrorCode *status);
288
289
290 /**
291 * Get the match mode flags that were specified when compiling this regular exp ression.
292 * @param status Receives errors detected by this function.
293 * @param regexp The compiled regular expression.
294 * @return The match mode flags
295 * @see URegexpFlag
296 * @stable ICU 3.0
297 */
298 U_STABLE int32_t U_EXPORT2
299 uregex_flags(const URegularExpression *regexp,
300 UErrorCode *status);
301
302
303 /**
304 * Set the subject text string upon which the regular expression will look for matches.
305 * This function may be called any number of times, allowing the regular
306 * expression pattern to be applied to different strings.
307 * <p>
308 * Regular expression matching operations work directly on the application's
309 * string data. No copy is made. The subject string data must not be
310 * altered after calling this function until after all regular expression
311 * operations involving this string data are completed.
312 * <p>
313 * Zero length strings are permitted. In this case, no subsequent match
314 * operation will dereference the text string pointer.
315 *
316 * @param regexp The compiled regular expression.
317 * @param text The subject text string.
318 * @param textLength The length of the subject text, or -1 if the string
319 * is NUL terminated.
320 * @param status Receives errors detected by this function.
321 * @stable ICU 3.0
322 */
323 U_STABLE void U_EXPORT2
324 uregex_setText(URegularExpression *regexp,
325 const UChar *text,
326 int32_t textLength,
327 UErrorCode *status);
328
329
330 /**
331 * Set the subject text string upon which the regular expression will look for matches.
332 * This function may be called any number of times, allowing the regular
333 * expression pattern to be applied to different strings.
334 * <p>
335 * Regular expression matching operations work directly on the application's
336 * string data; only a shallow clone is made. The subject string data must no t be
337 * altered after calling this function until after all regular expression
338 * operations involving this string data are completed.
339 *
340 * @param regexp The compiled regular expression.
341 * @param text The subject text string.
342 * @param status Receives errors detected by this function.
343 *
344 * @draft ICU 4.6
345 */
346 U_DRAFT void U_EXPORT2
347 uregex_setUText(URegularExpression *regexp,
348 UText *text,
349 UErrorCode *status);
350
351 /**
352 * Get the subject text that is currently associated with this
353 * regular expression object. If the input was supplied using uregex_setText (),
354 * that pointer will be returned. Otherwise, the characters in the input wil l
355 * be extracted to a buffer and returned. In either case, ownership remains
356 * with the regular expression object.
357 *
358 * This function will work even if the input was originally specified as a UTe xt.
359 *
360 * @param regexp The compiled regular expression.
361 * @param textLength The length of the string is returned in this output param eter.
362 * A NULL pointer may be used here if the
363 * text length is not needed, as would be the case if
364 * the text is known in advance to be a NUL terminated
365 * string.
366 * @param status Receives errors detected by this function.
367 * @return Pointer to the subject text string currently associated w ith
368 * this regular expression.
369 * @stable ICU 3.0
370 */
371 U_STABLE const UChar * U_EXPORT2
372 uregex_getText(URegularExpression *regexp,
373 int32_t *textLength,
374 UErrorCode *status);
375
376
377 /**
378 * Get the subject text that is currently associated with this
379 * regular expression object.
380 *
381 * This function will work even if the input was originally specified as a UCh ar string.
382 *
383 * @param regexp The compiled regular expression.
384 * @param dest A mutable UText in which to store the current input.
385 * If NULL, a new UText will be created as an immutable shal low clone
386 * of the actual input string.
387 * @param status Receives errors detected by this function.
388 * @return The subject text currently associated with this regular e xpression.
389 * If a pre-allocated UText was provided, it will always be used and returned.
390 *
391 * @draft ICU 4.6
392 */
393 U_DRAFT UText * U_EXPORT2
394 uregex_getUText(URegularExpression *regexp,
395 UText *dest,
396 UErrorCode *status);
397
398 /**
399 * Attempts to match the input string against the pattern.
400 * To succeed, the match must extend to the end of the string,
401 * or cover the complete match region.
402 *
403 * If startIndex >= zero the match operation starts at the specified
404 * index and must extend to the end of the input string. Any region
405 * that has been specified is reset.
406 *
407 * If startIndex == -1 the match must cover the input region, or the entire
408 * input string if no region has been set. This directly corresponds to
409 * Matcher.matches() in Java
410 *
411 * @param regexp The compiled regular expression.
412 * @param startIndex The input string (native) index at which to begin mat ching, or -1
413 * to match the input Region.
414 * @param status Receives errors detected by this function.
415 * @return TRUE if there is a match
416 * @stable ICU 3.0
417 */
418 U_STABLE UBool U_EXPORT2
419 uregex_matches(URegularExpression *regexp,
420 int32_t startIndex,
421 UErrorCode *status);
422
423 /**
424 * 64bit version of uregex_matches.
425 * @draft ICU 4.6
426 */
427 U_DRAFT UBool U_EXPORT2
428 uregex_matches64(URegularExpression *regexp,
429 int64_t startIndex,
430 UErrorCode *status);
431
432 /**
433 * Attempts to match the input string, starting from the specified index, aga inst the pattern.
434 * The match may be of any length, and is not required to extend to the end
435 * of the input string. Contrast with uregex_matches().
436 *
437 * <p>If startIndex is >= 0 any input region that was set for this
438 * URegularExpression is reset before the operation begins.
439 *
440 * <p>If the specified starting index == -1 the match begins at the start of the input
441 * region, or at the start of the full string if no region has been specified .
442 * This corresponds directly with Matcher.lookingAt() in Java.
443 *
444 * <p>If the match succeeds then more information can be obtained via the
445 * <code>uregexp_start()</code>, <code>uregexp_end()</code>,
446 * and <code>uregexp_group()</code> functions.</p>
447 *
448 * @param regexp The compiled regular expression.
449 * @param startIndex The input string (native) index at which to begin ma tching, or
450 * -1 to match the Input Region
451 * @param status A reference to a UErrorCode to receive any errors.
452 * @return TRUE if there is a match.
453 * @stable ICU 3.0
454 */
455 U_STABLE UBool U_EXPORT2
456 uregex_lookingAt(URegularExpression *regexp,
457 int32_t startIndex,
458 UErrorCode *status);
459
460 /**
461 * 64bit version of uregex_lookingAt.
462 * @draft ICU 4.6
463 */
464 U_DRAFT UBool U_EXPORT2
465 uregex_lookingAt64(URegularExpression *regexp,
466 int64_t startIndex,
467 UErrorCode *status);
468
469 /**
470 * Find the first matching substring of the input string that matches the pat tern.
471 * If startIndex is >= zero the search for a match begins at the specified in dex,
472 * and any match region is reset. This corresponds directly with
473 * Matcher.find(startIndex) in Java.
474 *
475 * If startIndex == -1 the search begins at the start of the input region,
476 * or at the start of the full string if no region has been specified .
477 *
478 * If a match is found, <code>uregex_start(), uregex_end()</code>, and
479 * <code>uregex_group()</code> will provide more information regarding the ma tch.
480 *
481 * @param regexp The compiled regular expression.
482 * @param startIndex The position (native) in the input string to begin th e search, or
483 * -1 to search within the Input Region.
484 * @param status A reference to a UErrorCode to receive any errors.
485 * @return TRUE if a match is found.
486 * @stable ICU 3.0
487 */
488 U_STABLE UBool U_EXPORT2
489 uregex_find(URegularExpression *regexp,
490 int32_t startIndex,
491 UErrorCode *status);
492
493 /**
494 * 64bit version of uregex_find.
495 * @draft ICU 4.6
496 */
497 U_DRAFT UBool U_EXPORT2
498 uregex_find64(URegularExpression *regexp,
499 int64_t startIndex,
500 UErrorCode *status);
501
502 /**
503 * Find the next pattern match in the input string. Begin searching
504 * the input at the location following the end of he previous match,
505 * or at the start of the string (or region) if there is no
506 * previous match. If a match is found, <code>uregex_start(), uregex_end()</c ode>, and
507 * <code>uregex_group()</code> will provide more information regarding the mat ch.
508 *
509 * @param regexp The compiled regular expression.
510 * @param status A reference to a UErrorCode to receive any errors.
511 * @return TRUE if a match is found.
512 * @see uregex_reset
513 * @stable ICU 3.0
514 */
515 U_STABLE UBool U_EXPORT2
516 uregex_findNext(URegularExpression *regexp,
517 UErrorCode *status);
518
519 /**
520 * Get the number of capturing groups in this regular expression's pattern.
521 * @param regexp The compiled regular expression.
522 * @param status A reference to a UErrorCode to receive any errors.
523 * @return the number of capture groups
524 * @stable ICU 3.0
525 */
526 U_STABLE int32_t U_EXPORT2
527 uregex_groupCount(URegularExpression *regexp,
528 UErrorCode *status);
529
530 /** Extract the string for the specified matching expression or subexpression.
531 * Group #0 is the complete string of matched text.
532 * Group #1 is the text matched by the first set of capturing parentheses.
533 *
534 * @param regexp The compiled regular expression.
535 * @param groupNum The capture group to extract. Group 0 is the comple te
536 * match. The value of this parameter must be
537 * less than or equal to the number of capture groups i n
538 * the pattern.
539 * @param dest Buffer to receive the matching string data
540 * @param destCapacity Capacity of the dest buffer.
541 * @param status A reference to a UErrorCode to receive any errors.
542 * @return Length of matching data,
543 * or -1 if no applicable match.
544 * @stable ICU 3.0
545 */
546 U_STABLE int32_t U_EXPORT2
547 uregex_group(URegularExpression *regexp,
548 int32_t groupNum,
549 UChar *dest,
550 int32_t destCapacity,
551 UErrorCode *status);
552
553
554 /** Returns a shallow immutable clone of the entire input string. The returned UText current native index
555 * is set to the beginning of the requested capture group. The capture group length is also
556 * returned via groupLength.
557 * Group #0 is the complete string of matched text.
558 * Group #1 is the text matched by the first set of capturing parentheses.
559 *
560 * @param regexp The compiled regular expression.
561 * @param groupNum The capture group to extract. Group 0 is the comple te
562 * match. The value of this parameter must be
563 * less than or equal to the number of capture groups i n
564 * the pattern.
565 * @param dest A mutable UText in which to store the current input.
566 * If NULL, a new UText will be created as an immutable shallow clone
567 * of the entire input string.
568 * @param groupLength The group length of the desired capture group.
569 * @param status A reference to a UErrorCode to receive any errors.
570 * @return The subject text currently associated with this regu lar expression.
571 * If a pre-allocated UText was provided, it will alway s be used and returned.
572
573 *
574 * @draft ICU 4.6
575 */
576 U_DRAFT UText * U_EXPORT2
577 uregex_groupUText(URegularExpression *regexp,
578 int32_t groupNum,
579 UText *dest,
580 int64_t *groupLength,
581 UErrorCode *status);
582
583
584 /** Extract the string for the specified matching expression or subexpression.
585 * Group #0 is the complete string of matched text.
586 * Group #1 is the text matched by the first set of capturing parentheses.
587 *
588 * @param regexp The compiled regular expression.
589 * @param groupNum The capture group to extract. Group 0 is the comple te
590 * match. The value of this parameter must be
591 * less than or equal to the number of capture groups i n
592 * the pattern.
593 * @param dest Mutable UText to receive the matching string data.
594 * If NULL, a new UText will be created (which may not be mutable).
595 * @param status A reference to a UErrorCode to receive any errors.
596 * @return The matching string data. If a pre-allocated UText w as provided,
597 * it will always be used and returned.
598 *
599 * @internal ICU 4.4 technology preview
600 */
601 U_INTERNAL UText * U_EXPORT2
602 uregex_groupUTextDeep(URegularExpression *regexp,
603 int32_t groupNum,
604 UText *dest,
605 UErrorCode *status);
606
607 /**
608 * Returns the index in the input string of the start of the text matched by the
609 * specified capture group during the previous match operation. Return -1 if
610 * the capture group was not part of the last match.
611 * Group #0 refers to the complete range of matched text.
612 * Group #1 refers to the text matched by the first set of capturing parenthe ses.
613 *
614 * @param regexp The compiled regular expression.
615 * @param groupNum The capture group number
616 * @param status A reference to a UErrorCode to receive any errors.
617 * @return the starting (native) position in the input of the t ext matched
618 * by the specified group.
619 * @stable ICU 3.0
620 */
621 U_STABLE int32_t U_EXPORT2
622 uregex_start(URegularExpression *regexp,
623 int32_t groupNum,
624 UErrorCode *status);
625
626 /**
627 * 64bit version of uregex_start.
628 * @draft ICU 4.6
629 */
630 U_DRAFT int64_t U_EXPORT2
631 uregex_start64(URegularExpression *regexp,
632 int32_t groupNum,
633 UErrorCode *status);
634
635 /**
636 * Returns the index in the input string of the position following the end
637 * of the text matched by the specified capture group.
638 * Return -1 if the capture group was not part of the last match.
639 * Group #0 refers to the complete range of matched text.
640 * Group #1 refers to the text matched by the first set of capturing parenthe ses.
641 *
642 * @param regexp The compiled regular expression.
643 * @param groupNum The capture group number
644 * @param status A reference to a UErrorCode to receive any errors.
645 * @return the (native) index of the position following the las t matched character.
646 * @stable ICU 3.0
647 */
648 U_STABLE int32_t U_EXPORT2
649 uregex_end(URegularExpression *regexp,
650 int32_t groupNum,
651 UErrorCode *status);
652
653 /**
654 * 64bit version of uregex_end.
655 * @draft ICU 4.6
656 */
657 U_DRAFT int64_t U_EXPORT2
658 uregex_end64(URegularExpression *regexp,
659 int32_t groupNum,
660 UErrorCode *status);
661
662 /**
663 * Reset any saved state from the previous match. Has the effect of
664 * causing uregex_findNext to begin at the specified index, and causing
665 * uregex_start(), uregex_end() and uregex_group() to return an error
666 * indicating that there is no match information available. Clears any
667 * match region that may have been set.
668 *
669 * @param regexp The compiled regular expression.
670 * @param index The position (native) in the text at which a
671 * uregex_findNext() should begin searching.
672 * @param status A reference to a UErrorCode to receive any errors.
673 * @stable ICU 3.0
674 */
675 U_STABLE void U_EXPORT2
676 uregex_reset(URegularExpression *regexp,
677 int32_t index,
678 UErrorCode *status);
679
680 /**
681 * 64bit version of uregex_reset.
682 * @draft ICU 4.6
683 */
684 U_DRAFT void U_EXPORT2
685 uregex_reset64(URegularExpression *regexp,
686 int64_t index,
687 UErrorCode *status);
688
689 /** Sets the limits of the matching region for this URegularExpression.
690 * The region is the part of the input string that will be considered when matc hing.
691 * Invoking this method resets any saved state from the previous match,
692 * then sets the region to start at the index specified by the start parameter
693 * and end at the index specified by the end parameter.
694 *
695 * Depending on the transparency and anchoring being used (see useTransparentBo unds
696 * and useAnchoringBounds), certain constructs such as anchors may behave diffe rently
697 * at or around the boundaries of the region
698 *
699 * The function will fail if start is greater than limit, or if either index
700 * is less than zero or greater than the length of the string being matched.
701 *
702 * @param regexp The compiled regular expression.
703 * @param regionStart The (native) index to begin searches at.
704 * @param regionLimit The (native) index to end searches at (exclusive).
705 * @param status A pointer to a UErrorCode to receive any errors.
706 * @stable ICU 4.0
707 */
708 U_STABLE void U_EXPORT2
709 uregex_setRegion(URegularExpression *regexp,
710 int32_t regionStart,
711 int32_t regionLimit,
712 UErrorCode *status);
713
714 /**
715 * 64bit version of uregex_setRegion.
716 * @draft ICU 4.6
717 */
718 U_DRAFT void U_EXPORT2
719 uregex_setRegion64(URegularExpression *regexp,
720 int64_t regionStart,
721 int64_t regionLimit,
722 UErrorCode *status);
723
724 /**
725 * Variation on uregex_setRegion to set the region without resetting the star t index
726 * without resetting the position for subsequent matches.
727 * @draft ICU 4.6
728 */
729 U_DRAFT void U_EXPORT2
730 uregex_setRegionAndStart(URegularExpression *regexp,
731 int64_t regionStart,
732 int64_t regionLimit,
733 int64_t startIndex,
734 UErrorCode *status);
735
736 /**
737 * Reports the start index of the matching region. Any matches found are limite d to
738 * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
739 *
740 * @param regexp The compiled regular expression.
741 * @param status A pointer to a UErrorCode to receive any errors.
742 * @return The starting (native) index of this matcher's region.
743 * @stable ICU 4.0
744 */
745 U_STABLE int32_t U_EXPORT2
746 uregex_regionStart(const URegularExpression *regexp,
747 UErrorCode *status);
748
749 /**
750 * 64bit version of uregex_regionStart.
751 * @draft ICU 4.6
752 */
753 U_DRAFT int64_t U_EXPORT2
754 uregex_regionStart64(const URegularExpression *regexp,
755 UErrorCode *status);
756
757 /**
758 * Reports the end index (exclusive) of the matching region for this URegularEx pression.
759 * Any matches found are limited to to the region bounded by regionStart (inclu sive)
760 * and regionEnd (exclusive).
761 *
762 * @param regexp The compiled regular expression.
763 * @param status A pointer to a UErrorCode to receive any errors.
764 * @return The ending point (native) of this matcher's region.
765 * @stable ICU 4.0
766 */
767 U_STABLE int32_t U_EXPORT2
768 uregex_regionEnd(const URegularExpression *regexp,
769 UErrorCode *status);
770
771 /**
772 * 64bit version of uregex_regionEnd.
773 * @draft ICU 4.6
774 */
775 U_DRAFT int64_t U_EXPORT2
776 uregex_regionEnd64(const URegularExpression *regexp,
777 UErrorCode *status);
778
779 /**
780 * Queries the transparency of region bounds for this URegularExpression.
781 * See useTransparentBounds for a description of transparent and opaque bounds.
782 * By default, matching boundaries are opaque.
783 *
784 * @param regexp The compiled regular expression.
785 * @param status A pointer to a UErrorCode to receive any errors.
786 * @return TRUE if this matcher is using opaque bounds, false if it is not.
787 * @stable ICU 4.0
788 */
789 U_STABLE UBool U_EXPORT2
790 uregex_hasTransparentBounds(const URegularExpression *regexp,
791 UErrorCode *status);
792
793
794 /**
795 * Sets the transparency of region bounds for this URegularExpression.
796 * Invoking this function with an argument of TRUE will set matches to use tran sparent bounds.
797 * If the boolean argument is FALSE, then opaque bounds will be used.
798 *
799 * Using transparent bounds, the boundaries of the matching region are transpar ent
800 * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
801 * see text beyond the boundaries of the region while checking for a match.
802 *
803 * With opaque bounds, no text outside of the matching region is visible to loo kahead,
804 * lookbehind, and boundary matching constructs.
805 *
806 * By default, opaque bounds are used.
807 *
808 * @param regexp The compiled regular expression.
809 * @param b TRUE for transparent bounds; FALSE for opaque bounds
810 * @param status A pointer to a UErrorCode to receive any errors.
811 * @stable ICU 4.0
812 **/
813 U_STABLE void U_EXPORT2
814 uregex_useTransparentBounds(URegularExpression *regexp,
815 UBool b,
816 UErrorCode *status);
817
818
819 /**
820 * Return true if this URegularExpression is using anchoring bounds.
821 * By default, anchoring region bounds are used.
822 *
823 * @param regexp The compiled regular expression.
824 * @param status A pointer to a UErrorCode to receive any errors.
825 * @return TRUE if this matcher is using anchoring bounds.
826 * @stable ICU 4.0
827 */
828 U_STABLE UBool U_EXPORT2
829 uregex_hasAnchoringBounds(const URegularExpression *regexp,
830 UErrorCode *status);
831
832
833 /**
834 * Set whether this URegularExpression is using Anchoring Bounds for its region .
835 * With anchoring bounds, pattern anchors such as ^ and $ will match at the sta rt
836 * and end of the region. Without Anchoring Bounds, anchors will only match at
837 * the positions they would in the complete text.
838 *
839 * Anchoring Bounds are the default for regions.
840 *
841 * @param regexp The compiled regular expression.
842 * @param b TRUE if to enable anchoring bounds; FALSE to disable them.
843 * @param status A pointer to a UErrorCode to receive any errors.
844 * @stable ICU 4.0
845 */
846 U_STABLE void U_EXPORT2
847 uregex_useAnchoringBounds(URegularExpression *regexp,
848 UBool b,
849 UErrorCode *status);
850
851 /**
852 * Return TRUE if the most recent matching operation touched the
853 * end of the text being processed. In this case, additional input text could
854 * change the results of that match.
855 *
856 * @param regexp The compiled regular expression.
857 * @param status A pointer to a UErrorCode to receive any errors.
858 * @return TRUE if the most recent match hit the end of input
859 * @stable ICU 4.0
860 */
861 U_STABLE UBool U_EXPORT2
862 uregex_hitEnd(const URegularExpression *regexp,
863 UErrorCode *status);
864
865 /**
866 * Return TRUE the most recent match succeeded and additional input could cause
867 * it to fail. If this function returns false and a match was found, then more input
868 * might change the match but the match won't be lost. If a match was not found ,
869 * then requireEnd has no meaning.
870 *
871 * @param regexp The compiled regular expression.
872 * @param status A pointer to a UErrorCode to receive any errors.
873 * @return TRUE if more input could cause the most recent match to no longer m atch.
874 * @stable ICU 4.0
875 */
876 U_STABLE UBool U_EXPORT2
877 uregex_requireEnd(const URegularExpression *regexp,
878 UErrorCode *status);
879
880
881
882
883
884 /**
885 * Replaces every substring of the input that matches the pattern
886 * with the given replacement string. This is a convenience function that
887 * provides a complete find-and-replace-all operation.
888 *
889 * This method scans the input string looking for matches of the pattern.
890 * Input that is not part of any match is copied unchanged to the
891 * destination buffer. Matched regions are replaced in the output
892 * buffer by the replacement string. The replacement string may contain
893 * references to capture groups; these take the form of $1, $2, etc.
894 *
895 * @param regexp The compiled regular expression.
896 * @param replacementText A string containing the replacement text.
897 * @param replacementLength The length of the replacement string, or
898 * -1 if it is NUL terminated.
899 * @param destBuf A (UChar *) buffer that will receive the resu lt.
900 * @param destCapacity The capacity of the desitnation buffer.
901 * @param status A reference to a UErrorCode to receive any er rors.
902 * @return The length of the string resulting from the f ind
903 * and replace operation. In the event that the
904 * destination capacity is inadequate, the retur n value
905 * is still the full length of the untruncated s tring.
906 * @stable ICU 3.0
907 */
908 U_STABLE int32_t U_EXPORT2
909 uregex_replaceAll(URegularExpression *regexp,
910 const UChar *replacementText,
911 int32_t replacementLength,
912 UChar *destBuf,
913 int32_t destCapacity,
914 UErrorCode *status);
915
916 /**
917 * Replaces every substring of the input that matches the pattern
918 * with the given replacement string. This is a convenience function that
919 * provides a complete find-and-replace-all operation.
920 *
921 * This method scans the input string looking for matches of the pattern.
922 * Input that is not part of any match is copied unchanged to the
923 * destination buffer. Matched regions are replaced in the output
924 * buffer by the replacement string. The replacement string may contain
925 * references to capture groups; these take the form of $1, $2, etc.
926 *
927 * @param regexp The compiled regular expression.
928 * @param replacement A string containing the replacement text.
929 * @param dest A mutable UText that will receive the result.
930 * If NULL, a new UText will be created (which may not be mutable).
931 * @param status A reference to a UErrorCode to receive any errors .
932 * @return A UText containing the results of the find and re place.
933 * If a pre-allocated UText was provided, it will a lways be used and returned.
934 *
935 * @draft ICU 4.6
936 */
937 U_DRAFT UText * U_EXPORT2
938 uregex_replaceAllUText(URegularExpression *regexp,
939 UText *replacement,
940 UText *dest,
941 UErrorCode *status);
942
943 /**
944 * Replaces the first substring of the input that matches the pattern
945 * with the given replacement string. This is a convenience function that
946 * provides a complete find-and-replace operation.
947 *
948 * This method scans the input string looking for a match of the pattern.
949 * All input that is not part of the match is copied unchanged to the
950 * destination buffer. The matched region is replaced in the output
951 * buffer by the replacement string. The replacement string may contain
952 * references to capture groups; these take the form of $1, $2, etc.
953 *
954 * @param regexp The compiled regular expression.
955 * @param replacementText A string containing the replacement text.
956 * @param replacementLength The length of the replacement string, or
957 * -1 if it is NUL terminated.
958 * @param destBuf A (UChar *) buffer that will receive the resu lt.
959 * @param destCapacity The capacity of the desitnation buffer.
960 * @param status a reference to a UErrorCode to receive any er rors.
961 * @return The length of the string resulting from the f ind
962 * and replace operation. In the event that the
963 * destination capacity is inadequate, the retur n value
964 * is still the full length of the untruncated s tring.
965 * @stable ICU 3.0
966 */
967 U_STABLE int32_t U_EXPORT2
968 uregex_replaceFirst(URegularExpression *regexp,
969 const UChar *replacementText,
970 int32_t replacementLength,
971 UChar *destBuf,
972 int32_t destCapacity,
973 UErrorCode *status);
974
975 /**
976 * Replaces the first substring of the input that matches the pattern
977 * with the given replacement string. This is a convenience function that
978 * provides a complete find-and-replace operation.
979 *
980 * This method scans the input string looking for a match of the pattern.
981 * All input that is not part of the match is copied unchanged to the
982 * destination buffer. The matched region is replaced in the output
983 * buffer by the replacement string. The replacement string may contain
984 * references to capture groups; these take the form of $1, $2, etc.
985 *
986 * @param regexp The compiled regular expression.
987 * @param replacement A string containing the replacement text.
988 * @param dest A mutable UText that will receive the result.
989 * If NULL, a new UText will be created (which may not be mutable).
990 * @param status A reference to a UErrorCode to receive any errors .
991 * @return A UText containing the results of the find and re place.
992 * If a pre-allocated UText was provided, it will a lways be used and returned.
993 *
994 * @draft ICU 4.6
995 */
996 U_DRAFT UText * U_EXPORT2
997 uregex_replaceFirstUText(URegularExpression *regexp,
998 UText *replacement,
999 UText *dest,
1000 UErrorCode *status);
1001
1002
1003 /**
1004 * Implements a replace operation intended to be used as part of an
1005 * incremental find-and-replace.
1006 *
1007 * <p>The input string, starting from the end of the previous match and endin g at
1008 * the start of the current match, is appended to the destination string. Th en the
1009 * replacement string is appended to the output string,
1010 * including handling any substitutions of captured text.</p>
1011 *
1012 * <p>A note on preflight computation of buffersize and error handling:
1013 * Calls to uregex_appendReplacement() and uregex_appendTail() are
1014 * designed to be chained, one after another, with the destination
1015 * buffer pointer and buffer capacity updated after each in preparation
1016 * to for the next. If the destination buffer is exhausted partway through s uch a
1017 * sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal
1018 * ICU conventions are for a function to perform no action if it is
1019 * called with an error status, but for this one case, uregex_appendRepacemen t()
1020 * will operate normally so that buffer size computations will complete
1021 * correctly.
1022 *
1023 * <p>For simple, prepackaged, non-incremental find-and-replace
1024 * operations, see replaceFirst() or replaceAll().</p>
1025 *
1026 * @param regexp The regular expression object.
1027 * @param replacementText The string that will replace the matched portion of the
1028 * input string as it is copied to the destination buffe r.
1029 * The replacement text may contain references ($1, for
1030 * example) to capture groups from the match.
1031 * @param replacementLength The length of the replacement text string,
1032 * or -1 if the string is NUL terminated.
1033 * @param destBuf The buffer into which the results of the
1034 * find-and-replace are placed. On return, this pointer
1035 * will be updated to refer to the beginning of the
1036 * unused portion of buffer, leaving it in position for
1037 * a subsequent call to this function.
1038 * @param destCapacity The size of the output buffer, On return, this
1039 * parameter will be updated to reflect the space remain ing
1040 * unused in the output buffer.
1041 * @param status A reference to a UErrorCode to receive any errors.
1042 * @return The length of the result string. In the event that
1043 * destCapacity is inadequate, the full length of the
1044 * untruncated output string is returned.
1045 *
1046 * @stable ICU 3.0
1047 *
1048 */
1049 U_STABLE int32_t U_EXPORT2
1050 uregex_appendReplacement(URegularExpression *regexp,
1051 const UChar *replacementText,
1052 int32_t replacementLength,
1053 UChar **destBuf,
1054 int32_t *destCapacity,
1055 UErrorCode *status);
1056
1057
1058 /**
1059 * Implements a replace operation intended to be used as part of an
1060 * incremental find-and-replace.
1061 *
1062 * <p>The input string, starting from the end of the previous match and endin g at
1063 * the start of the current match, is appended to the destination string. Th en the
1064 * replacement string is appended to the output string,
1065 * including handling any substitutions of captured text.</p>
1066 *
1067 * <p>For simple, prepackaged, non-incremental find-and-replace
1068 * operations, see replaceFirst() or replaceAll().</p>
1069 *
1070 * @param regexp The regular expression object.
1071 * @param replacementText The string that will replace the matched portion of the
1072 * input string as it is copied to the destination buffe r.
1073 * The replacement text may contain references ($1, for
1074 * example) to capture groups from the match.
1075 * @param dest A mutable UText that will receive the result. Must no t be NULL.
1076 * @param status A reference to a UErrorCode to receive any errors.
1077 *
1078 * @draft ICU 4.6
1079 */
1080 U_DRAFT void U_EXPORT2
1081 uregex_appendReplacementUText(URegularExpression *regexp,
1082 UText *replacementText,
1083 UText *dest,
1084 UErrorCode *status);
1085
1086
1087 /**
1088 * As the final step in a find-and-replace operation, append the remainder
1089 * of the input string, starting at the position following the last match,
1090 * to the destination string. <code>uregex_appendTail()</code> is intended
1091 * to be invoked after one or more invocations of the
1092 * <code>uregex_appendReplacement()</code> function.
1093 *
1094 * @param regexp The regular expression object. This is needed to
1095 * obtain the input string and with the position
1096 * of the last match within it.
1097 * @param destBuf The buffer in which the results of the
1098 * find-and-replace are placed. On return, the pointer
1099 * will be updated to refer to the beginning of the
1100 * unused portion of buffer.
1101 * @param destCapacity The size of the output buffer, On return, this
1102 * value will be updated to reflect the space remaining
1103 * unused in the output buffer.
1104 * @param status A reference to a UErrorCode to receive any errors.
1105 * @return The length of the result string. In the event that
1106 * destCapacity is inadequate, the full length of the
1107 * untruncated output string is returned.
1108 *
1109 * @stable ICU 3.0
1110 */
1111 U_STABLE int32_t U_EXPORT2
1112 uregex_appendTail(URegularExpression *regexp,
1113 UChar **destBuf,
1114 int32_t *destCapacity,
1115 UErrorCode *status);
1116
1117
1118 /**
1119 * As the final step in a find-and-replace operation, append the remainder
1120 * of the input string, starting at the position following the last match,
1121 * to the destination string. <code>uregex_appendTailUText()</code> is intended
1122 * to be invoked after one or more invocations of the
1123 * <code>uregex_appendReplacementUText()</code> function.
1124 *
1125 * @param regexp The regular expression object. This is needed to
1126 * obtain the input string and with the position
1127 * of the last match within it.
1128 * @param dest A mutable UText that will receive the result. Must no t be NULL.
1129 * @return The destination UText.
1130 *
1131 * @draft ICU 4.6
1132 */
1133 U_DRAFT UText * U_EXPORT2
1134 uregex_appendTailUText(URegularExpression *regexp,
1135 UText *dest,
1136 UErrorCode *status);
1137
1138
1139
1140 /**
1141 * Split a string into fields. Somewhat like split() from Perl.
1142 * The pattern matches identify delimiters that separate the input
1143 * into fields. The input data between the matches becomes the
1144 * fields themselves.
1145 * <p>
1146 * Each of the fields is copied from the input string to the destination
1147 * buffer, and NUL terminated. The position of each field within
1148 * the destination buffer is returned in the destFields array.
1149 *
1150 * Note: another choice for the design of this function would be to not
1151 * copy the resulting fields at all, but to return indexes and
1152 * lengths within the source text.
1153 * Advantages would be
1154 * o Faster. No Copying.
1155 * o Nothing extra needed when field data may contain embedded NU L chars.
1156 * o Less memory needed if working on large data.
1157 * Disadvantages
1158 * o Less consistent with C++ split, which copies into an
1159 * array of UnicodeStrings.
1160 * o No NUL termination, extracted fields would be less convenien t
1161 * to use in most cases.
1162 * o Possible problems in the future, when support Unicode Normal ization
1163 * could cause the fields to not correspond exactly to
1164 * a range of the source text.
1165 *
1166 * @param regexp The compiled regular expression.
1167 * @param destBuf A (UChar *) buffer to receive the fields that
1168 * are extracted from the input string. These
1169 * field pointers will refer to positions within the
1170 * destination buffer supplied by the caller. Any
1171 * extra positions within the destFields array will be
1172 * set to NULL.
1173 * @param destCapacity The capacity of the destBuf.
1174 * @param requiredCapacity The actual capacity required of the destBuf.
1175 * If destCapacity is too small, requiredCapacity will return
1176 * the total capacity required to hold all of the outp ut, and
1177 * a U_BUFFER_OVERFLOW_ERROR will be returned.
1178 * @param destFields An array to be filled with the position of each
1179 * of the extracted fields within destBuf.
1180 * @param destFieldsCapacity The number of elements in the destFields ar ray.
1181 * If the number of fields found is less than destFieldsCapacit y,
1182 * the extra destFields elements are set to zero.
1183 * If destFieldsCapacity is too small, the trailing part of the
1184 * input, including any field delimiters, is treated as if it
1185 * were the last field - it is copied to the destBuf, and
1186 * its position is in the destBuf is stored in the last element
1187 * of destFields. This behavior mimics that of Perl. It is no t
1188 * an error condition, and no error status is returned when all destField
1189 * positions are used.
1190 * @param status A reference to a UErrorCode to receive any errors.
1191 * @return The number of fields into which the input string was split.
1192 * @stable ICU 3.0
1193 */
1194 U_STABLE int32_t U_EXPORT2
1195 uregex_split( URegularExpression *regexp,
1196 UChar *destBuf,
1197 int32_t destCapacity,
1198 int32_t *requiredCapacity,
1199 UChar *destFields[],
1200 int32_t destFieldsCapacity,
1201 UErrorCode *status);
1202
1203
1204 /**
1205 * Split a string into fields. Somewhat like split() from Perl.
1206 * The pattern matches identify delimiters that separate the input
1207 * into fields. The input data between the matches becomes the
1208 * fields themselves.
1209 * <p>
1210 * The behavior of this function is not very closely aligned with uregex_split ();
1211 * instead, it is based on (and implemented directly on top of) the C++ split method.
1212 *
1213 * @param regexp The compiled regular expression.
1214 * @param destFields An array of mutable UText structs to receive the resul ts of the split.
1215 * If a field is NULL, a new UText is allocated to contain the results for
1216 * that field. This new UText is not guaranteed to be mutable.
1217 * @param destFieldsCapacity The number of elements in the destination array.
1218 * If the number of fields found is less than destCapacity, the
1219 * extra strings in the destination array are not altered.
1220 * If the number of destination strings is less than the number
1221 * of fields, the trailing part of the input string, including any
1222 * field delimiters, is placed in the last destination string.
1223 * This behavior mimics that of Perl. It is not an error cond ition, and no
1224 * error status is returned when all destField positions are us ed.
1225 * @param status A reference to a UErrorCode to receive any errors.
1226 * @return The number of fields into which the input string was split.
1227 *
1228 * @draft ICU 4.6
1229 */
1230 U_DRAFT int32_t U_EXPORT2
1231 uregex_splitUText(URegularExpression *regexp,
1232 UText *destFields[],
1233 int32_t destFieldsCapacity,
1234 UErrorCode *status);
1235
1236
1237
1238
1239 /**
1240 * Set a processing time limit for match operations with this URegularExpression .
1241 *
1242 * Some patterns, when matching certain strings, can run in exponential time.
1243 * For practical purposes, the match operation may appear to be in an
1244 * infinite loop.
1245 * When a limit is set a match operation will fail with an error if the
1246 * limit is exceeded.
1247 * <p>
1248 * The units of the limit are steps of the match engine.
1249 * Correspondence with actual processor time will depend on the speed
1250 * of the processor and the details of the specific pattern, but will
1251 * typically be on the order of milliseconds.
1252 * <p>
1253 * By default, the matching time is not limited.
1254 * <p>
1255 *
1256 * @param regexp The compiled regular expression.
1257 * @param limit The limit value, or 0 for no limit.
1258 * @param status A reference to a UErrorCode to receive any errors.
1259 * @stable ICU 4.0
1260 */
1261 U_STABLE void U_EXPORT2
1262 uregex_setTimeLimit(URegularExpression *regexp,
1263 int32_t limit,
1264 UErrorCode *status);
1265
1266 /**
1267 * Get the time limit for for matches with this URegularExpression.
1268 * A return value of zero indicates that there is no limit.
1269 *
1270 * @param regexp The compiled regular expression.
1271 * @param status A reference to a UErrorCode to receive any errors.
1272 * @return the maximum allowed time for a match, in units of processing steps.
1273 * @stable ICU 4.0
1274 */
1275 U_STABLE int32_t U_EXPORT2
1276 uregex_getTimeLimit(const URegularExpression *regexp,
1277 UErrorCode *status);
1278
1279 /**
1280 * Set the amount of heap storage avaliable for use by the match backtracking st ack.
1281 * <p>
1282 * ICU uses a backtracking regular expression engine, with the backtrack stack
1283 * maintained on the heap. This function sets the limit to the amount of memory
1284 * that can be used for this purpose. A backtracking stack overflow will
1285 * result in an error from the match operation that caused it.
1286 * <p>
1287 * A limit is desirable because a malicious or poorly designed pattern can use
1288 * excessive memory, potentially crashing the process. A limit is enabled
1289 * by default.
1290 * <p>
1291 * @param regexp The compiled regular expression.
1292 * @param limit The maximum size, in bytes, of the matching backtrack st ack.
1293 * A value of -1 means no limit.
1294 * The limit must be greater than zero, or -1.
1295 * @param status A reference to a UErrorCode to receive any errors.
1296 *
1297 * @stable ICU 4.0
1298 */
1299 U_STABLE void U_EXPORT2
1300 uregex_setStackLimit(URegularExpression *regexp,
1301 int32_t limit,
1302 UErrorCode *status);
1303
1304 /**
1305 * Get the size of the heap storage available for use by the back tracking stack .
1306 *
1307 * @return the maximum backtracking stack size, in bytes, or zero if the
1308 * stack size is unlimited.
1309 * @stable ICU 4.0
1310 */
1311 U_STABLE int32_t U_EXPORT2
1312 uregex_getStackLimit(const URegularExpression *regexp,
1313 UErrorCode *status);
1314
1315
1316 /**
1317 * Function pointer for a regular expression matching callback function.
1318 * When set, a callback function will be called periodically during matching
1319 * operations. If the call back function returns FALSE, the matching
1320 * operation will be terminated early.
1321 *
1322 * Note: the callback function must not call other functions on this
1323 * URegularExpression.
1324 *
1325 * @param context context pointer. The callback function will be invoked
1326 * with the context specified at the time that
1327 * uregex_setMatchCallback() is called.
1328 * @param steps the accumulated processing time, in match steps,
1329 * for this matching operation.
1330 * @return TRUE to continue the matching operation.
1331 * FALSE to terminate the matching operation.
1332 * @stable ICU 4.0
1333 */
1334 U_CDECL_BEGIN
1335 typedef UBool U_CALLCONV URegexMatchCallback (
1336 const void *context,
1337 int32_t steps);
1338 U_CDECL_END
1339
1340 /**
1341 * Set a callback function for this URegularExpression.
1342 * During matching operations the function will be called periodically,
1343 * giving the application the opportunity to terminate a long-running
1344 * match.
1345 *
1346 * @param regexp The compiled regular expression.
1347 * @param callback A pointer to the user-supplied callback function.
1348 * @param context User context pointer. The value supplied at the
1349 * time the callback function is set will be saved
1350 * and passed to the callback each time that it is called.
1351 * @param status A reference to a UErrorCode to receive any errors.
1352 * @stable ICU 4.0
1353 */
1354 U_STABLE void U_EXPORT2
1355 uregex_setMatchCallback(URegularExpression *regexp,
1356 URegexMatchCallback *callback,
1357 const void *context,
1358 UErrorCode *status);
1359
1360
1361 /**
1362 * Get the callback function for this URegularExpression.
1363 *
1364 * @param regexp The compiled regular expression.
1365 * @param callback Out paramater, receives a pointer to the user-supplied
1366 * callback function.
1367 * @param context Out parameter, receives the user context pointer that
1368 * was set when uregex_setMatchCallback() was called.
1369 * @param status A reference to a UErrorCode to receive any errors.
1370 * @stable ICU 4.0
1371 */
1372 U_STABLE void U_EXPORT2
1373 uregex_getMatchCallback(const URegularExpression *regexp,
1374 URegexMatchCallback **callback,
1375 const void **context,
1376 UErrorCode *status);
1377
1378
1379 /**
1380 * Function pointer for a regular expression find callback function.
1381 *
1382 * When set, a callback function will be called during a find operation
1383 * and for operations that depend on find, such as findNext, split and some repl ace
1384 * operations like replaceFirst.
1385 * The callback will usually be called after each attempt at a match, but this i s not a
1386 * guarantee that the callback will be invoked at each character. For finds whe re the
1387 * match engine is invoked at each character, this may be close to true, but les s likely
1388 * for more optimized loops where the pattern is known to only start, and the ma tch
1389 * engine invoked, at certain characters.
1390 * When invoked, this callback will specify the index at which a match operation is about
1391 * to be attempted, giving the application the opportunity to terminate a long-r unning
1392 * find operation.
1393 *
1394 * If the call back function returns FALSE, the find operation will be terminate d early.
1395 *
1396 * Note: the callback function must not call other functions on this
1397 * URegularExpression
1398 *
1399 * @param context context pointer. The callback function will be invoked
1400 * with the context specified at the time that
1401 * uregex_setFindProgressCallback() is called.
1402 * @param matchIndex the next index at which a match attempt will be attempted for this
1403 * find operation. If this callback interrupts the search, this is the
1404 * index at which a find/findNext operation may be re-initiated.
1405 * @return TRUE to continue the matching operation.
1406 * FALSE to terminate the matching operation.
1407 * @draft ICU 4.6
1408 */
1409 U_CDECL_BEGIN
1410 typedef UBool U_CALLCONV URegexFindProgressCallback (
1411 const void *context,
1412 int64_t matchIndex);
1413 U_CDECL_END
1414
1415 /**
1416 * Set the find progress callback function for this URegularExpression.
1417 *
1418 * @param regexp The compiled regular expression.
1419 * @param callback A pointer to the user-supplied callback function.
1420 * @param context User context pointer. The value supplied at the
1421 * time the callback function is set will be saved
1422 * and passed to the callback each time that it is called.
1423 * @param status A reference to a UErrorCode to receive any errors.
1424 * @draft ICU 4.6
1425 */
1426 U_DRAFT void U_EXPORT2
1427 uregex_setFindProgressCallback(URegularExpression *regexp,
1428 URegexFindProgressCallback *callback,
1429 const void *context,
1430 UErrorCode *status);
1431
1432
1433 /**
1434 * Get the find progress callback function for this URegularExpression.
1435 *
1436 * @param regexp The compiled regular expression.
1437 * @param callback Out paramater, receives a pointer to the user-supplied
1438 * callback function.
1439 * @param context Out parameter, receives the user context pointer that
1440 * was set when uregex_setFindProgressCallback() was called .
1441 * @param status A reference to a UErrorCode to receive any errors.
1442 * @draft ICU 4.6
1443 */
1444 U_DRAFT void U_EXPORT2
1445 uregex_getFindProgressCallback(const URegularExpression *regexp,
1446 URegexFindProgressCallback **callback,
1447 const void **context,
1448 UErrorCode *status);
1449
1450 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1451 #endif /* UREGEX_H */
OLDNEW
« no previous file with comments | « public/i18n/unicode/unum.h ('k') | public/i18n/unicode/usearch.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698