| OLD | NEW | 
|     1 /* |     1 /* | 
|     2 ******************************************************************************* |     2 ******************************************************************************* | 
|     3 *   Copyright (C) 2004-2014, International Business Machines |     3 *   Copyright (C) 2004-2015, International Business Machines | 
|     4 *   Corporation and others.  All Rights Reserved. |     4 *   Corporation and others.  All Rights Reserved. | 
|     5 ******************************************************************************* |     5 ******************************************************************************* | 
|     6 *   file name:  uregex.cpp |     6 *   file name:  uregex.cpp | 
|     7 */ |     7 */ | 
|     8  |     8  | 
|     9 #include "unicode/utypes.h" |     9 #include "unicode/utypes.h" | 
|    10  |    10  | 
|    11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |    11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 
|    12  |    12  | 
|    13 #include "unicode/regex.h" |    13 #include "unicode/regex.h" | 
|    14 #include "unicode/uregex.h" |    14 #include "unicode/uregex.h" | 
|    15 #include "unicode/unistr.h" |    15 #include "unicode/unistr.h" | 
|    16 #include "unicode/ustring.h" |    16 #include "unicode/ustring.h" | 
|    17 #include "unicode/uchar.h" |    17 #include "unicode/uchar.h" | 
|    18 #include "unicode/uobject.h" |    18 #include "unicode/uobject.h" | 
|    19 #include "unicode/utf16.h" |    19 #include "unicode/utf16.h" | 
 |    20 #include "cmemory.h" | 
 |    21 #include "uassert.h" | 
 |    22 #include "uhash.h" | 
|    20 #include "umutex.h" |    23 #include "umutex.h" | 
|    21 #include "uassert.h" |    24 #include "uvectr32.h" | 
|    22 #include "cmemory.h" |  | 
|    23  |    25  | 
|    24 #include "regextxt.h" |    26 #include "regextxt.h" | 
|    25  |    27  | 
|    26 #include <stdio.h> |  | 
|    27  |  | 
|    28 U_NAMESPACE_BEGIN |    28 U_NAMESPACE_BEGIN | 
|    29  |    29  | 
|    30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) |    30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) | 
|    31  |    31  | 
|    32 struct RegularExpression: public UMemory { |    32 struct RegularExpression: public UMemory { | 
|    33 public: |    33 public: | 
|    34     RegularExpression(); |    34     RegularExpression(); | 
|    35     ~RegularExpression(); |    35     ~RegularExpression(); | 
|    36     int32_t           fMagic; |    36     int32_t           fMagic; | 
|    37     RegexPattern     *fPat; |    37     RegexPattern     *fPat; | 
| (...skipping 582 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   620     if (validateRE(regexp, FALSE, status) == FALSE) { |   620     if (validateRE(regexp, FALSE, status) == FALSE) { | 
|   621         return 0; |   621         return 0; | 
|   622     } |   622     } | 
|   623     int32_t  result = regexp->fMatcher->groupCount(); |   623     int32_t  result = regexp->fMatcher->groupCount(); | 
|   624     return result; |   624     return result; | 
|   625 } |   625 } | 
|   626  |   626  | 
|   627  |   627  | 
|   628 //------------------------------------------------------------------------------ |   628 //------------------------------------------------------------------------------ | 
|   629 // |   629 // | 
 |   630 //    uregex_groupNumberFromName | 
 |   631 // | 
 |   632 //------------------------------------------------------------------------------ | 
 |   633 int32_t | 
 |   634 uregex_groupNumberFromName(URegularExpression *regexp2, | 
 |   635                            const UChar        *groupName, | 
 |   636                            int32_t             nameLength, | 
 |   637                            UErrorCode          *status) { | 
 |   638     RegularExpression *regexp = (RegularExpression*)regexp2; | 
 |   639     if (validateRE(regexp, FALSE, status) == FALSE) { | 
 |   640         return 0; | 
 |   641     } | 
 |   642     int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName,
       nameLength), *status); | 
 |   643     return result; | 
 |   644 } | 
 |   645  | 
 |   646 int32_t | 
 |   647 uregex_groupNumberFromCName(URegularExpression *regexp2, | 
 |   648                             const char         *groupName, | 
 |   649                             int32_t             nameLength, | 
 |   650                             UErrorCode          *status) { | 
 |   651     RegularExpression *regexp = (RegularExpression*)regexp2; | 
 |   652     if (validateRE(regexp, FALSE, status) == FALSE) { | 
 |   653         return 0; | 
 |   654     } | 
 |   655     return regexp->fPat->groupNumberFromName(groupName, nameLength, *status); | 
 |   656 } | 
 |   657  | 
 |   658 //------------------------------------------------------------------------------ | 
 |   659 // | 
|   630 //    uregex_group |   660 //    uregex_group | 
|   631 // |   661 // | 
|   632 //------------------------------------------------------------------------------ |   662 //------------------------------------------------------------------------------ | 
|   633 U_CAPI int32_t U_EXPORT2 |   663 U_CAPI int32_t U_EXPORT2 | 
|   634 uregex_group(URegularExpression *regexp2, |   664 uregex_group(URegularExpression *regexp2, | 
|   635              int32_t             groupNum, |   665              int32_t             groupNum, | 
|   636              UChar              *dest, |   666              UChar              *dest, | 
|   637              int32_t             destCapacity, |   667              int32_t             destCapacity, | 
|   638              UErrorCode          *status)  { |   668              UErrorCode          *status)  { | 
|   639     RegularExpression *regexp = (RegularExpression*)regexp2; |   669     RegularExpression *regexp = (RegularExpression*)regexp2; | 
|   640     if (validateRE(regexp, TRUE, status) == FALSE) { |   670     if (validateRE(regexp, TRUE, status) == FALSE) { | 
|   641         return 0; |   671         return 0; | 
|   642     } |   672     } | 
|   643     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { |   673     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { | 
|   644         *status = U_ILLEGAL_ARGUMENT_ERROR; |   674         *status = U_ILLEGAL_ARGUMENT_ERROR; | 
|   645         return 0; |   675         return 0; | 
|   646     } |   676     } | 
|   647  |   677  | 
|   648     if (destCapacity == 0 || regexp->fText != NULL) { |   678     if (destCapacity == 0 || regexp->fText != NULL) { | 
|   649         // If preflighting or if we already have the text as UChars, |   679         // If preflighting or if we already have the text as UChars, | 
|   650         // this is a little cheaper than going through uregex_groupUTextDeep() |   680         // this is a little cheaper than extracting from the UText | 
|   651  |   681  | 
|   652         // |   682         // | 
|   653         // Pick up the range of characters from the matcher |   683         // Pick up the range of characters from the matcher | 
|   654         // |   684         // | 
|   655         int32_t  startIx = regexp->fMatcher->start(groupNum, *status); |   685         int32_t  startIx = regexp->fMatcher->start(groupNum, *status); | 
|   656         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status); |   686         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status); | 
|   657         if (U_FAILURE(*status)) { |   687         if (U_FAILURE(*status)) { | 
|   658             return 0; |   688             return 0; | 
|   659         } |   689         } | 
|   660  |   690  | 
| (...skipping 12 matching lines...) Expand all  Loading... | 
|   673         } |   703         } | 
|   674  |   704  | 
|   675         // |   705         // | 
|   676         // Copy capture group to user's buffer |   706         // Copy capture group to user's buffer | 
|   677         // |   707         // | 
|   678         if (copyLength > 0) { |   708         if (copyLength > 0) { | 
|   679             u_memcpy(dest, ®exp->fText[startIx], copyLength); |   709             u_memcpy(dest, ®exp->fText[startIx], copyLength); | 
|   680         } |   710         } | 
|   681         return fullLength; |   711         return fullLength; | 
|   682     } else { |   712     } else { | 
|   683         int32_t result = 0; |   713         int64_t  start = regexp->fMatcher->start64(groupNum, *status); | 
|   684         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status
      ); |   714         int64_t  limit = regexp->fMatcher->end64(groupNum, *status); | 
|   685         if (U_SUCCESS(*status)) { |   715         if (U_FAILURE(*status)) { | 
|   686             result = utext_extract(groupText, 0, utext_nativeLength(groupText), 
      dest, destCapacity, status); |   716             return 0; | 
|   687         } |   717         } | 
|   688         utext_close(groupText); |   718         // Note edge cases: | 
|   689         return result; |   719         //   Group didn't match: start == end == -1. UText trims to 0, UText giv
      es zero length result. | 
 |   720         //   Zero Length Match: start == end. | 
 |   721         int32_t length = utext_extract(regexp->fMatcher->inputText(), start, lim
      it, dest, destCapacity, status); | 
 |   722         return length; | 
|   690     } |   723     } | 
 |   724  | 
|   691 } |   725 } | 
|   692  |   726  | 
|   693  |   727  | 
|   694 //------------------------------------------------------------------------------ |   728 //------------------------------------------------------------------------------ | 
|   695 // |   729 // | 
|   696 //    uregex_groupUText |   730 //    uregex_groupUText | 
|   697 // |   731 // | 
|   698 //------------------------------------------------------------------------------ |   732 //------------------------------------------------------------------------------ | 
|   699 U_CAPI UText * U_EXPORT2 |   733 U_CAPI UText * U_EXPORT2 | 
|   700 uregex_groupUText(URegularExpression *regexp2, |   734 uregex_groupUText(URegularExpression *regexp2, | 
|   701                   int32_t             groupNum, |   735                   int32_t             groupNum, | 
|   702                   UText              *dest, |   736                   UText              *dest, | 
|   703                   int64_t            *groupLength, |   737                   int64_t            *groupLength, | 
|   704                   UErrorCode         *status)  { |   738                   UErrorCode         *status)  { | 
|   705     RegularExpression *regexp = (RegularExpression*)regexp2; |   739     RegularExpression *regexp = (RegularExpression*)regexp2; | 
|   706     if (validateRE(regexp, TRUE, status) == FALSE) { |   740     if (validateRE(regexp, TRUE, status) == FALSE) { | 
|   707         UErrorCode emptyTextStatus = U_ZERO_ERROR; |   741         UErrorCode emptyTextStatus = U_ZERO_ERROR; | 
|   708         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
      ; |   742         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
      ; | 
|   709     } |   743     } | 
|   710  |   744  | 
|   711     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); |   745     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); | 
|   712 } |   746 } | 
|   713  |   747  | 
|   714 //------------------------------------------------------------------------------ |   748 //------------------------------------------------------------------------------ | 
|   715 // |   749 // | 
|   716 //    uregex_groupUTextDeep |  | 
|   717 // |  | 
|   718 //------------------------------------------------------------------------------ |  | 
|   719 U_CAPI UText * U_EXPORT2 |  | 
|   720 uregex_groupUTextDeep(URegularExpression *regexp2, |  | 
|   721                   int32_t             groupNum, |  | 
|   722                   UText              *dest, |  | 
|   723                   UErrorCode         *status)  { |  | 
|   724     RegularExpression *regexp = (RegularExpression*)regexp2; |  | 
|   725     if (validateRE(regexp, TRUE, status) == FALSE) { |  | 
|   726         UErrorCode emptyTextStatus = U_ZERO_ERROR; |  | 
|   727         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
      ; |  | 
|   728     } |  | 
|   729  |  | 
|   730     if (regexp->fText != NULL) { |  | 
|   731         // |  | 
|   732         // Pick up the range of characters from the matcher |  | 
|   733         // and use our already-extracted characters |  | 
|   734         // |  | 
|   735         int32_t  startIx = regexp->fMatcher->start(groupNum, *status); |  | 
|   736         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status); |  | 
|   737         if (U_FAILURE(*status)) { |  | 
|   738             UErrorCode emptyTextStatus = U_ZERO_ERROR; |  | 
|   739             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStat
      us)); |  | 
|   740         } |  | 
|   741  |  | 
|   742         if (dest) { |  | 
|   743             utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[star
      tIx], endIx - startIx, status); |  | 
|   744         } else { |  | 
|   745             UText groupText = UTEXT_INITIALIZER; |  | 
|   746             utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startI
      x, status); |  | 
|   747             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); |  | 
|   748             utext_close(&groupText); |  | 
|   749         } |  | 
|   750  |  | 
|   751         return dest; |  | 
|   752     } else { |  | 
|   753         return regexp->fMatcher->group(groupNum, dest, *status); |  | 
|   754     } |  | 
|   755 } |  | 
|   756  |  | 
|   757 //------------------------------------------------------------------------------ |  | 
|   758 // |  | 
|   759 //    uregex_start |   750 //    uregex_start | 
|   760 // |   751 // | 
|   761 //------------------------------------------------------------------------------ |   752 //------------------------------------------------------------------------------ | 
|   762 U_CAPI int32_t U_EXPORT2 |   753 U_CAPI int32_t U_EXPORT2 | 
|   763 uregex_start(URegularExpression *regexp2, |   754 uregex_start(URegularExpression *regexp2, | 
|   764              int32_t             groupNum, |   755              int32_t             groupNum, | 
|   765              UErrorCode          *status)  { |   756              UErrorCode          *status)  { | 
|   766     return (int32_t)uregex_start64( regexp2, groupNum, status); |   757     return (int32_t)uregex_start64( regexp2, groupNum, status); | 
|   767 } |   758 } | 
|   768  |   759  | 
| (...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  1317         int32_t                destFieldsCapacity, |  1308         int32_t                destFieldsCapacity, | 
|  1318         UErrorCode            *status); |  1309         UErrorCode            *status); | 
|  1319 }; |  1310 }; | 
|  1320  |  1311  | 
|  1321 U_NAMESPACE_END |  1312 U_NAMESPACE_END | 
|  1322  |  1313  | 
|  1323  |  1314  | 
|  1324  |  1315  | 
|  1325 static const UChar BACKSLASH  = 0x5c; |  1316 static const UChar BACKSLASH  = 0x5c; | 
|  1326 static const UChar DOLLARSIGN = 0x24; |  1317 static const UChar DOLLARSIGN = 0x24; | 
 |  1318 static const UChar LEFTBRACKET = 0x7b; | 
 |  1319 static const UChar RIGHTBRACKET = 0x7d; | 
|  1327  |  1320  | 
|  1328 // |  1321 // | 
|  1329 //  Move a character to an output buffer, with bounds checking on the index. |  1322 //  Move a character to an output buffer, with bounds checking on the index. | 
|  1330 //      Index advances even if capacity is exceeded, for preflight size computat
      ions. |  1323 //      Index advances even if capacity is exceeded, for preflight size computat
      ions. | 
|  1331 //      This little sequence is used a LOT. |  1324 //      This little sequence is used a LOT. | 
|  1332 // |  1325 // | 
|  1333 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
      acity) { |  1326 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
      acity) { | 
|  1334     if (*idx < bufCapacity) { |  1327     if (*idx < bufCapacity) { | 
|  1335         buf[*idx] = c; |  1328         buf[*idx] = c; | 
|  1336     } |  1329     } | 
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  1391  |  1384  | 
|  1392     // Copy input string from the end of previous match to start of current matc
      h |  1385     // Copy input string from the end of previous match to start of current matc
      h | 
|  1393     if (regexp->fText != NULL) { |  1386     if (regexp->fText != NULL) { | 
|  1394         int32_t matchStart; |  1387         int32_t matchStart; | 
|  1395         int32_t lastMatchEnd; |  1388         int32_t lastMatchEnd; | 
|  1396         if (UTEXT_USES_U16(m->fInputText)) { |  1389         if (UTEXT_USES_U16(m->fInputText)) { | 
|  1397             lastMatchEnd = (int32_t)m->fLastMatchEnd; |  1390             lastMatchEnd = (int32_t)m->fLastMatchEnd; | 
|  1398             matchStart = (int32_t)m->fMatchStart; |  1391             matchStart = (int32_t)m->fMatchStart; | 
|  1399         } else { |  1392         } else { | 
|  1400             // !!!: Would like a better way to do this! |  1393             // !!!: Would like a better way to do this! | 
|  1401             UErrorCode status = U_ZERO_ERROR; |  1394             UErrorCode tempStatus = U_ZERO_ERROR; | 
|  1402             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NUL
      L, 0, &status); |  1395             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NUL
      L, 0, &tempStatus); | 
|  1403             status = U_ZERO_ERROR; |  1396             tempStatus = U_ZERO_ERROR; | 
|  1404             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMat
      chEnd, m->fMatchStart, NULL, 0, &status); |  1397             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMat
      chEnd, m->fMatchStart, NULL, 0, &tempStatus); | 
|  1405         } |  1398         } | 
|  1406         for (i=lastMatchEnd; i<matchStart; i++) { |  1399         for (i=lastMatchEnd; i<matchStart; i++) { | 
|  1407             appendToBuf(regexp->fText[i], &destIdx, dest, capacity); |  1400             appendToBuf(regexp->fText[i], &destIdx, dest, capacity); | 
|  1408         } |  1401         } | 
|  1409     } else { |  1402     } else { | 
|  1410         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore |  1403         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore | 
|  1411         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart
      , |  1404         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart
      , | 
|  1412                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPAC
      ITY(destIdx, capacity), |  1405                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPAC
      ITY(destIdx, capacity), | 
|  1413                                  &possibleOverflowError); |  1406                                  &possibleOverflowError); | 
|  1414     } |  1407     } | 
|  1415     U_ASSERT(destIdx >= 0); |  1408     U_ASSERT(destIdx >= 0); | 
|  1416  |  1409  | 
|  1417     // scan the replacement text, looking for substitutions ($n) and \escapes. |  1410     // scan the replacement text, looking for substitutions ($n) and \escapes. | 
|  1418     int32_t  replIdx = 0; |  1411     int32_t  replIdx = 0; | 
|  1419     while (replIdx < replacementLength) { |  1412     while (replIdx < replacementLength && U_SUCCESS(*status)) { | 
|  1420         UChar  c = replacementText[replIdx]; |  1413         UChar  c = replacementText[replIdx]; | 
|  1421         replIdx++; |  1414         replIdx++; | 
|  1422         if (c != DOLLARSIGN && c != BACKSLASH) { |  1415         if (c != DOLLARSIGN && c != BACKSLASH) { | 
|  1423             // Common case, no substitution, no escaping, |  1416             // Common case, no substitution, no escaping, | 
|  1424             //  just copy the char to the dest buf. |  1417             //  just copy the char to the dest buf. | 
|  1425             appendToBuf(c, &destIdx, dest, capacity); |  1418             appendToBuf(c, &destIdx, dest, capacity); | 
|  1426             continue; |  1419             continue; | 
|  1427         } |  1420         } | 
|  1428  |  1421  | 
|  1429         if (c == BACKSLASH) { |  1422         if (c == BACKSLASH) { | 
| (...skipping 28 matching lines...) Expand all  Loading... | 
|  1458                 //        treat it as a plain \<anything> escape. |  1451                 //        treat it as a plain \<anything> escape. | 
|  1459             } |  1452             } | 
|  1460  |  1453  | 
|  1461             // Plain backslash escape.  Just put out the escaped character. |  1454             // Plain backslash escape.  Just put out the escaped character. | 
|  1462             appendToBuf(c, &destIdx, dest, capacity); |  1455             appendToBuf(c, &destIdx, dest, capacity); | 
|  1463  |  1456  | 
|  1464             replIdx++; |  1457             replIdx++; | 
|  1465             continue; |  1458             continue; | 
|  1466         } |  1459         } | 
|  1467  |  1460  | 
 |  1461         // We've got a $.  Pick up the following capture group name or number. | 
 |  1462         // For numbers, consume only digits that produce a valid capture group f
      or the pattern. | 
 |  1463  | 
 |  1464         int32_t groupNum  = 0; | 
 |  1465         U_ASSERT(c == DOLLARSIGN); | 
 |  1466         UChar32 c32; | 
 |  1467         U16_GET(replacementText, 0, replIdx, replacementLength, c32); | 
 |  1468         if (u_isdigit(c32)) { | 
 |  1469             int32_t numDigits = 0; | 
 |  1470             int32_t numCaptureGroups = m->fPattern->fGroupMap->size(); | 
 |  1471             for (;;) { | 
 |  1472                 if (replIdx >= replacementLength) { | 
 |  1473                     break; | 
 |  1474                 } | 
 |  1475                 U16_GET(replacementText, 0, replIdx, replacementLength, c32); | 
 |  1476                 if (u_isdigit(c32) == FALSE) { | 
 |  1477                     break; | 
 |  1478                 } | 
 |  1479  | 
 |  1480                 int32_t digitVal = u_charDigitValue(c32); | 
 |  1481                 if (groupNum * 10 + digitVal <= numCaptureGroups) { | 
 |  1482                     groupNum = groupNum * 10 + digitVal; | 
 |  1483                     U16_FWD_1(replacementText, replIdx, replacementLength); | 
 |  1484                     numDigits++; | 
 |  1485                 } else { | 
 |  1486                     if (numDigits == 0) { | 
 |  1487                         *status = U_INDEX_OUTOFBOUNDS_ERROR; | 
 |  1488                     } | 
 |  1489                     break; | 
 |  1490                 } | 
 |  1491             } | 
 |  1492         } else if (c32 == LEFTBRACKET) { | 
 |  1493             // Scan for Named Capture Group, ${name}. | 
 |  1494             UnicodeString groupName; | 
 |  1495             U16_FWD_1(replacementText, replIdx, replacementLength); | 
 |  1496             while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) {  | 
 |  1497                 if (replIdx >= replacementLength) { | 
 |  1498                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; | 
 |  1499                     break; | 
 |  1500                 } | 
 |  1501                 U16_NEXT(replacementText, replIdx, replacementLength, c32); | 
 |  1502                 if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z | 
 |  1503                         (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z | 
 |  1504                         (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9 | 
 |  1505                     groupName.append(c32); | 
 |  1506                 } else if (c32 == RIGHTBRACKET) { | 
 |  1507                     groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &group
      Name); | 
 |  1508                     if (groupNum == 0) { | 
 |  1509                         // Name not defined by pattern. | 
 |  1510                         *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; | 
 |  1511                     } | 
 |  1512                 } else { | 
 |  1513                     // Character was something other than a name char or a closi
      ng '}' | 
 |  1514                     *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; | 
 |  1515                 } | 
 |  1516             } | 
 |  1517         } else { | 
 |  1518             // $ not followed by {name} or digits. | 
 |  1519             *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; | 
 |  1520         } | 
|  1468  |  1521  | 
|  1469  |  1522  | 
|  1470         // We've got a $.  Pick up a capture group number if one follows. |  1523         // Finally, append the capture group data to the destination. | 
|  1471         // Consume at most the number of digits necessary for the largest captur
      e |  1524         if (U_SUCCESS(*status)) { | 
|  1472         // number that is valid for this pattern. |  1525             destIdx += uregex_group((URegularExpression*)regexp, groupNum, | 
|  1473  |  1526                                     dest==NULL?NULL:&dest[destIdx], REMAINING_CA
      PACITY(destIdx, capacity), status); | 
|  1474         int32_t numDigits = 0; |  1527             if (*status == U_BUFFER_OVERFLOW_ERROR) { | 
|  1475         int32_t groupNum  = 0; |  1528                 // Ignore buffer overflow when extracting the group.  We need to | 
|  1476         UChar32 digitC; |  1529                 //   continue on to get full size of the untruncated result.  We
       will | 
|  1477         for (;;) { |  1530                 //   raise our own buffer overflow error at the end. | 
|  1478             if (replIdx >= replacementLength) { |  1531                 *status = U_ZERO_ERROR; | 
|  1479                 break; |  | 
|  1480             } |  | 
|  1481             U16_GET(replacementText, 0, replIdx, replacementLength, digitC); |  | 
|  1482             if (u_isdigit(digitC) == FALSE) { |  | 
|  1483                 break; |  | 
|  1484             } |  | 
|  1485  |  | 
|  1486             U16_FWD_1(replacementText, replIdx, replacementLength); |  | 
|  1487             groupNum=groupNum*10 + u_charDigitValue(digitC); |  | 
|  1488             numDigits++; |  | 
|  1489             if (numDigits >= m->fPattern->fMaxCaptureDigits) { |  | 
|  1490                 break; |  | 
|  1491             } |  1532             } | 
|  1492         } |  1533         } | 
|  1493  |  1534  | 
|  1494  |  | 
|  1495         if (numDigits == 0) { |  | 
|  1496             // The $ didn't introduce a group number at all. |  | 
|  1497             // Treat it as just part of the substitution text. |  | 
|  1498             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity); |  | 
|  1499             continue; |  | 
|  1500         } |  | 
|  1501  |  | 
|  1502         // Finally, append the capture group data to the destination. |  | 
|  1503         destIdx += uregex_group((URegularExpression*)regexp, groupNum, |  | 
|  1504                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACI
      TY(destIdx, capacity), status); |  | 
|  1505         if (*status == U_BUFFER_OVERFLOW_ERROR) { |  | 
|  1506             // Ignore buffer overflow when extracting the group.  We need to |  | 
|  1507             //   continue on to get full size of the untruncated result.  We wil
      l |  | 
|  1508             //   raise our own buffer overflow error at the end. |  | 
|  1509             *status = U_ZERO_ERROR; |  | 
|  1510         } |  | 
|  1511  |  | 
|  1512         if (U_FAILURE(*status)) { |  1535         if (U_FAILURE(*status)) { | 
|  1513             // Can fail if group number is out of range. |  1536             // bad group number or name. | 
|  1514             break; |  1537             break; | 
|  1515         } |  1538         } | 
|  1516  |  | 
|  1517     } |  1539     } | 
|  1518  |  1540  | 
|  1519     // |  1541     // | 
|  1520     //  Nul Terminate the dest buffer if possible. |  1542     //  Nul Terminate the dest buffer if possible. | 
|  1521     //  Set the appropriate buffer overflow or not terminated error, if needed. |  1543     //  Set the appropriate buffer overflow or not terminated error, if needed. | 
|  1522     // |  1544     // | 
|  1523     if (destIdx < capacity) { |  1545     if (destIdx < capacity) { | 
|  1524         dest[destIdx] = 0; |  1546         dest[destIdx] = 0; | 
|  1525     } else if (destIdx == *destCapacity) { |  1547     } else if (U_SUCCESS(*status)) { | 
|  1526         *status = U_STRING_NOT_TERMINATED_WARNING; |  1548         if (destIdx == *destCapacity) { | 
|  1527     } else { |  1549             *status = U_STRING_NOT_TERMINATED_WARNING; | 
|  1528         *status = U_BUFFER_OVERFLOW_ERROR; |  1550         } else { | 
 |  1551             *status = U_BUFFER_OVERFLOW_ERROR; | 
 |  1552         } | 
|  1529     } |  1553     } | 
|  1530  |  1554  | 
|  1531     // |  1555     // | 
|  1532     // Return an updated dest buffer and capacity to the caller. |  1556     // Return an updated dest buffer and capacity to the caller. | 
|  1533     // |  1557     // | 
|  1534     if (destIdx > 0 &&  *destCapacity > 0) { |  1558     if (destIdx > 0 &&  *destCapacity > 0) { | 
|  1535         if (destIdx < capacity) { |  1559         if (destIdx < capacity) { | 
|  1536             *destBuf      += destIdx; |  1560             *destBuf      += destIdx; | 
|  1537             *destCapacity -= destIdx; |  1561             *destCapacity -= destIdx; | 
|  1538         } else { |  1562         } else { | 
| (...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  1942                   UText                 *destFields[], |  1966                   UText                 *destFields[], | 
|  1943                   int32_t                destFieldsCapacity, |  1967                   int32_t                destFieldsCapacity, | 
|  1944                   UErrorCode            *status) { |  1968                   UErrorCode            *status) { | 
|  1945     RegularExpression *regexp = (RegularExpression*)regexp2; |  1969     RegularExpression *regexp = (RegularExpression*)regexp2; | 
|  1946     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, de
      stFieldsCapacity, *status); |  1970     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, de
      stFieldsCapacity, *status); | 
|  1947 } |  1971 } | 
|  1948  |  1972  | 
|  1949  |  1973  | 
|  1950 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS |  1974 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS | 
|  1951  |  1975  | 
| OLD | NEW |