Index: source/i18n/unicode/regex.h |
diff --git a/source/i18n/unicode/regex.h b/source/i18n/unicode/regex.h |
index 6f805f88e3a15652728bb89f81bcccfd6421d02f..1e23905a3abc8f6589556900918f0ba3f63525cb 100644 |
--- a/source/i18n/unicode/regex.h |
+++ b/source/i18n/unicode/regex.h |
@@ -1,6 +1,6 @@ |
/* |
********************************************************************** |
-* Copyright (C) 2002-2014, International Business Machines |
+* Copyright (C) 2002-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
********************************************************************** |
* file name: regex.h |
@@ -55,6 +55,8 @@ |
// Forward Declarations |
+struct UHashtable; |
+ |
U_NAMESPACE_BEGIN |
struct Regex8BitSet; |
@@ -136,7 +138,7 @@ public: |
/** |
* Create an exact copy of this RegexPattern object. Since RegexPattern is not |
- * intended to be subclasses, <code>clone()</code> and the copy construction are |
+ * intended to be subclassed, <code>clone()</code> and the copy construction are |
* equivalent operations. |
* @return the copy of this RegexPattern |
* @stable ICU 2.4 |
@@ -438,6 +440,41 @@ public: |
/** |
+ * Get the group number corresponding to a named capture group. |
+ * The returned number can be used with any function that access |
+ * capture groups by number. |
+ * |
+ * The function returns an error status if the specified name does not |
+ * appear in the pattern. |
+ * |
+ * @param groupName The capture group name. |
+ * @param status A UErrorCode to receive any errors. |
+ * |
+ * @draft ICU 55 |
+ */ |
+ virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const; |
+ |
+ |
+ /** |
+ * Get the group number corresponding to a named capture group. |
+ * The returned number can be used with any function that access |
+ * capture groups by number. |
+ * |
+ * The function returns an error status if the specified name does not |
+ * appear in the pattern. |
+ * |
+ * @param groupName The capture group name, |
+ * platform invariant characters only. |
+ * @param nameLength The length of the name, or -1 if the name is |
+ * nul-terminated. |
+ * @param status A UErrorCode to receive any errors. |
+ * |
+ * @draft ICU 55 |
+ */ |
+ virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const; |
+ |
+ |
+ /** |
* Split a string into fields. Somewhat like split() from Perl or Java. |
* Pattern matches identify delimiters that separate the input |
* into fields. The input data between the delimiters becomes the |
@@ -573,8 +610,6 @@ private: |
UVector32 *fGroupMap; // Map from capture group number to position of |
// the group's variables in the matcher stack frame. |
- int32_t fMaxCaptureDigits; |
- |
UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined |
// regex character classes, e.g. Word. |
@@ -589,6 +624,8 @@ private: |
Regex8BitSet *fInitialChars8; |
UBool fNeedsAltInput; |
+ UHashtable *fNamedCaptureMap; // Map from capture group names to numbers. |
+ |
friend class RegexCompile; |
friend class RegexMatcher; |
friend class RegexCImpl; |
@@ -812,7 +849,7 @@ public: |
* position may not be valid with the altered input string.</p> |
* @param status A reference to a UErrorCode to receive any errors. |
* @return TRUE if a match is found. |
- * @internal |
+ * @draft ICU 55 |
*/ |
virtual UBool find(UErrorCode &status); |
@@ -844,6 +881,11 @@ public: |
* Returns a string containing the text captured by the given group |
* during the previous match operation. Group(0) is the entire match. |
* |
+ * A zero length string is returned both for capture groups that did not |
+ * participate in the match and for actual zero length matches. |
+ * To distinguish between these two cases use the function start(), |
+ * which returns -1 for non-participating groups. |
+ * |
* @param groupNum the capture group number |
* @param status A reference to a UErrorCode to receive any errors. |
* Possible errors are U_REGEX_INVALID_STATE if no match |
@@ -854,7 +896,6 @@ public: |
*/ |
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; |
- |
/** |
* Returns the number of capturing groups in this matcher's pattern. |
* @return the number of capture groups |
@@ -883,6 +924,11 @@ public: |
* Returns a shallow clone of the entire live input string with the UText current native index |
* set to the beginning of the requested group. |
* |
+ * A group length of zero is returned both for capture groups that did not |
+ * participate in the match and for actual zero length matches. |
+ * To distinguish between these two cases use the function start(), |
+ * which returns -1 for non-participating groups. |
+ * |
* @param groupNum The capture group number. |
* @param dest The UText into which the input should be cloned, or NULL to create a new UText. |
* @param group_len A reference to receive the length of the desired capture group |
@@ -897,24 +943,6 @@ public: |
virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; |
/** |
- * Returns a string containing the text captured by the given group |
- * during the previous match operation. Group(0) is the entire match. |
- * |
- * @param groupNum the capture group number |
- * @param dest A mutable UText in which the matching text is placed. |
- * If NULL, a new UText will be created (which may not be mutable). |
- * @param status A reference to a UErrorCode to receive any errors. |
- * Possible errors are U_REGEX_INVALID_STATE if no match |
- * has been attempted or the last match failed. |
- * @return A string containing the matched input text. If a pre-allocated UText |
- * was provided, it will always be used and returned. |
- * |
- * @internal ICU 4.4 technology preview |
- */ |
- virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const; |
- |
- |
- /** |
* Returns the index in the input string of the start of the text matched |
* during the previous match operation. |
* @param status a reference to a UErrorCode to receive any errors. |
@@ -963,7 +991,6 @@ public: |
*/ |
virtual int64_t start64(int32_t group, UErrorCode &status) const; |
- |
/** |
* Returns the index in the input string of the first character following the |
* text matched during the previous match operation. |
@@ -1033,7 +1060,6 @@ public: |
*/ |
virtual int64_t end64(int32_t group, UErrorCode &status) const; |
- |
/** |
* Resets this matcher. The effect is to remove any memory of previous matches, |
* and to cause subsequent find() operations to begin at the beginning of |