OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2004-2014, International Business Machines | 3 * Copyright (C) 2004-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * file name: uregex.cpp | 6 * file name: uregex.cpp |
7 */ | 7 */ |
8 | 8 |
9 #include "unicode/utypes.h" | 9 #include "unicode/utypes.h" |
10 | 10 |
11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
12 | 12 |
13 #include "unicode/regex.h" | 13 #include "unicode/regex.h" |
14 #include "unicode/uregex.h" | 14 #include "unicode/uregex.h" |
15 #include "unicode/unistr.h" | 15 #include "unicode/unistr.h" |
16 #include "unicode/ustring.h" | 16 #include "unicode/ustring.h" |
17 #include "unicode/uchar.h" | 17 #include "unicode/uchar.h" |
18 #include "unicode/uobject.h" | 18 #include "unicode/uobject.h" |
19 #include "unicode/utf16.h" | 19 #include "unicode/utf16.h" |
| 20 #include "cmemory.h" |
| 21 #include "uassert.h" |
| 22 #include "uhash.h" |
20 #include "umutex.h" | 23 #include "umutex.h" |
21 #include "uassert.h" | 24 #include "uvectr32.h" |
22 #include "cmemory.h" | |
23 | 25 |
24 #include "regextxt.h" | 26 #include "regextxt.h" |
25 | 27 |
26 #include <stdio.h> | |
27 | |
28 U_NAMESPACE_BEGIN | 28 U_NAMESPACE_BEGIN |
29 | 29 |
30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) | 30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) |
31 | 31 |
32 struct RegularExpression: public UMemory { | 32 struct RegularExpression: public UMemory { |
33 public: | 33 public: |
34 RegularExpression(); | 34 RegularExpression(); |
35 ~RegularExpression(); | 35 ~RegularExpression(); |
36 int32_t fMagic; | 36 int32_t fMagic; |
37 RegexPattern *fPat; | 37 RegexPattern *fPat; |
(...skipping 582 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
620 if (validateRE(regexp, FALSE, status) == FALSE) { | 620 if (validateRE(regexp, FALSE, status) == FALSE) { |
621 return 0; | 621 return 0; |
622 } | 622 } |
623 int32_t result = regexp->fMatcher->groupCount(); | 623 int32_t result = regexp->fMatcher->groupCount(); |
624 return result; | 624 return result; |
625 } | 625 } |
626 | 626 |
627 | 627 |
628 //------------------------------------------------------------------------------ | 628 //------------------------------------------------------------------------------ |
629 // | 629 // |
| 630 // uregex_groupNumberFromName |
| 631 // |
| 632 //------------------------------------------------------------------------------ |
| 633 int32_t |
| 634 uregex_groupNumberFromName(URegularExpression *regexp2, |
| 635 const UChar *groupName, |
| 636 int32_t nameLength, |
| 637 UErrorCode *status) { |
| 638 RegularExpression *regexp = (RegularExpression*)regexp2; |
| 639 if (validateRE(regexp, FALSE, status) == FALSE) { |
| 640 return 0; |
| 641 } |
| 642 int32_t result = regexp->fPat->groupNumberFromName(UnicodeString(groupName,
nameLength), *status); |
| 643 return result; |
| 644 } |
| 645 |
| 646 int32_t |
| 647 uregex_groupNumberFromCName(URegularExpression *regexp2, |
| 648 const char *groupName, |
| 649 int32_t nameLength, |
| 650 UErrorCode *status) { |
| 651 RegularExpression *regexp = (RegularExpression*)regexp2; |
| 652 if (validateRE(regexp, FALSE, status) == FALSE) { |
| 653 return 0; |
| 654 } |
| 655 return regexp->fPat->groupNumberFromName(groupName, nameLength, *status); |
| 656 } |
| 657 |
| 658 //------------------------------------------------------------------------------ |
| 659 // |
630 // uregex_group | 660 // uregex_group |
631 // | 661 // |
632 //------------------------------------------------------------------------------ | 662 //------------------------------------------------------------------------------ |
633 U_CAPI int32_t U_EXPORT2 | 663 U_CAPI int32_t U_EXPORT2 |
634 uregex_group(URegularExpression *regexp2, | 664 uregex_group(URegularExpression *regexp2, |
635 int32_t groupNum, | 665 int32_t groupNum, |
636 UChar *dest, | 666 UChar *dest, |
637 int32_t destCapacity, | 667 int32_t destCapacity, |
638 UErrorCode *status) { | 668 UErrorCode *status) { |
639 RegularExpression *regexp = (RegularExpression*)regexp2; | 669 RegularExpression *regexp = (RegularExpression*)regexp2; |
640 if (validateRE(regexp, TRUE, status) == FALSE) { | 670 if (validateRE(regexp, TRUE, status) == FALSE) { |
641 return 0; | 671 return 0; |
642 } | 672 } |
643 if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { | 673 if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { |
644 *status = U_ILLEGAL_ARGUMENT_ERROR; | 674 *status = U_ILLEGAL_ARGUMENT_ERROR; |
645 return 0; | 675 return 0; |
646 } | 676 } |
647 | 677 |
648 if (destCapacity == 0 || regexp->fText != NULL) { | 678 if (destCapacity == 0 || regexp->fText != NULL) { |
649 // If preflighting or if we already have the text as UChars, | 679 // If preflighting or if we already have the text as UChars, |
650 // this is a little cheaper than going through uregex_groupUTextDeep() | 680 // this is a little cheaper than extracting from the UText |
651 | 681 |
652 // | 682 // |
653 // Pick up the range of characters from the matcher | 683 // Pick up the range of characters from the matcher |
654 // | 684 // |
655 int32_t startIx = regexp->fMatcher->start(groupNum, *status); | 685 int32_t startIx = regexp->fMatcher->start(groupNum, *status); |
656 int32_t endIx = regexp->fMatcher->end (groupNum, *status); | 686 int32_t endIx = regexp->fMatcher->end (groupNum, *status); |
657 if (U_FAILURE(*status)) { | 687 if (U_FAILURE(*status)) { |
658 return 0; | 688 return 0; |
659 } | 689 } |
660 | 690 |
(...skipping 12 matching lines...) Expand all Loading... |
673 } | 703 } |
674 | 704 |
675 // | 705 // |
676 // Copy capture group to user's buffer | 706 // Copy capture group to user's buffer |
677 // | 707 // |
678 if (copyLength > 0) { | 708 if (copyLength > 0) { |
679 u_memcpy(dest, ®exp->fText[startIx], copyLength); | 709 u_memcpy(dest, ®exp->fText[startIx], copyLength); |
680 } | 710 } |
681 return fullLength; | 711 return fullLength; |
682 } else { | 712 } else { |
683 int32_t result = 0; | 713 int64_t start = regexp->fMatcher->start64(groupNum, *status); |
684 UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status
); | 714 int64_t limit = regexp->fMatcher->end64(groupNum, *status); |
685 if (U_SUCCESS(*status)) { | 715 if (U_FAILURE(*status)) { |
686 result = utext_extract(groupText, 0, utext_nativeLength(groupText),
dest, destCapacity, status); | 716 return 0; |
687 } | 717 } |
688 utext_close(groupText); | 718 // Note edge cases: |
689 return result; | 719 // Group didn't match: start == end == -1. UText trims to 0, UText giv
es zero length result. |
| 720 // Zero Length Match: start == end. |
| 721 int32_t length = utext_extract(regexp->fMatcher->inputText(), start, lim
it, dest, destCapacity, status); |
| 722 return length; |
690 } | 723 } |
| 724 |
691 } | 725 } |
692 | 726 |
693 | 727 |
694 //------------------------------------------------------------------------------ | 728 //------------------------------------------------------------------------------ |
695 // | 729 // |
696 // uregex_groupUText | 730 // uregex_groupUText |
697 // | 731 // |
698 //------------------------------------------------------------------------------ | 732 //------------------------------------------------------------------------------ |
699 U_CAPI UText * U_EXPORT2 | 733 U_CAPI UText * U_EXPORT2 |
700 uregex_groupUText(URegularExpression *regexp2, | 734 uregex_groupUText(URegularExpression *regexp2, |
701 int32_t groupNum, | 735 int32_t groupNum, |
702 UText *dest, | 736 UText *dest, |
703 int64_t *groupLength, | 737 int64_t *groupLength, |
704 UErrorCode *status) { | 738 UErrorCode *status) { |
705 RegularExpression *regexp = (RegularExpression*)regexp2; | 739 RegularExpression *regexp = (RegularExpression*)regexp2; |
706 if (validateRE(regexp, TRUE, status) == FALSE) { | 740 if (validateRE(regexp, TRUE, status) == FALSE) { |
707 UErrorCode emptyTextStatus = U_ZERO_ERROR; | 741 UErrorCode emptyTextStatus = U_ZERO_ERROR; |
708 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
; | 742 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
; |
709 } | 743 } |
710 | 744 |
711 return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); | 745 return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); |
712 } | 746 } |
713 | 747 |
714 //------------------------------------------------------------------------------ | 748 //------------------------------------------------------------------------------ |
715 // | 749 // |
716 // uregex_groupUTextDeep | |
717 // | |
718 //------------------------------------------------------------------------------ | |
719 U_CAPI UText * U_EXPORT2 | |
720 uregex_groupUTextDeep(URegularExpression *regexp2, | |
721 int32_t groupNum, | |
722 UText *dest, | |
723 UErrorCode *status) { | |
724 RegularExpression *regexp = (RegularExpression*)regexp2; | |
725 if (validateRE(regexp, TRUE, status) == FALSE) { | |
726 UErrorCode emptyTextStatus = U_ZERO_ERROR; | |
727 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus))
; | |
728 } | |
729 | |
730 if (regexp->fText != NULL) { | |
731 // | |
732 // Pick up the range of characters from the matcher | |
733 // and use our already-extracted characters | |
734 // | |
735 int32_t startIx = regexp->fMatcher->start(groupNum, *status); | |
736 int32_t endIx = regexp->fMatcher->end (groupNum, *status); | |
737 if (U_FAILURE(*status)) { | |
738 UErrorCode emptyTextStatus = U_ZERO_ERROR; | |
739 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStat
us)); | |
740 } | |
741 | |
742 if (dest) { | |
743 utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[star
tIx], endIx - startIx, status); | |
744 } else { | |
745 UText groupText = UTEXT_INITIALIZER; | |
746 utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startI
x, status); | |
747 dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); | |
748 utext_close(&groupText); | |
749 } | |
750 | |
751 return dest; | |
752 } else { | |
753 return regexp->fMatcher->group(groupNum, dest, *status); | |
754 } | |
755 } | |
756 | |
757 //------------------------------------------------------------------------------ | |
758 // | |
759 // uregex_start | 750 // uregex_start |
760 // | 751 // |
761 //------------------------------------------------------------------------------ | 752 //------------------------------------------------------------------------------ |
762 U_CAPI int32_t U_EXPORT2 | 753 U_CAPI int32_t U_EXPORT2 |
763 uregex_start(URegularExpression *regexp2, | 754 uregex_start(URegularExpression *regexp2, |
764 int32_t groupNum, | 755 int32_t groupNum, |
765 UErrorCode *status) { | 756 UErrorCode *status) { |
766 return (int32_t)uregex_start64( regexp2, groupNum, status); | 757 return (int32_t)uregex_start64( regexp2, groupNum, status); |
767 } | 758 } |
768 | 759 |
(...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1317 int32_t destFieldsCapacity, | 1308 int32_t destFieldsCapacity, |
1318 UErrorCode *status); | 1309 UErrorCode *status); |
1319 }; | 1310 }; |
1320 | 1311 |
1321 U_NAMESPACE_END | 1312 U_NAMESPACE_END |
1322 | 1313 |
1323 | 1314 |
1324 | 1315 |
1325 static const UChar BACKSLASH = 0x5c; | 1316 static const UChar BACKSLASH = 0x5c; |
1326 static const UChar DOLLARSIGN = 0x24; | 1317 static const UChar DOLLARSIGN = 0x24; |
| 1318 static const UChar LEFTBRACKET = 0x7b; |
| 1319 static const UChar RIGHTBRACKET = 0x7d; |
1327 | 1320 |
1328 // | 1321 // |
1329 // Move a character to an output buffer, with bounds checking on the index. | 1322 // Move a character to an output buffer, with bounds checking on the index. |
1330 // Index advances even if capacity is exceeded, for preflight size computat
ions. | 1323 // Index advances even if capacity is exceeded, for preflight size computat
ions. |
1331 // This little sequence is used a LOT. | 1324 // This little sequence is used a LOT. |
1332 // | 1325 // |
1333 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
acity) { | 1326 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
acity) { |
1334 if (*idx < bufCapacity) { | 1327 if (*idx < bufCapacity) { |
1335 buf[*idx] = c; | 1328 buf[*idx] = c; |
1336 } | 1329 } |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1391 | 1384 |
1392 // Copy input string from the end of previous match to start of current matc
h | 1385 // Copy input string from the end of previous match to start of current matc
h |
1393 if (regexp->fText != NULL) { | 1386 if (regexp->fText != NULL) { |
1394 int32_t matchStart; | 1387 int32_t matchStart; |
1395 int32_t lastMatchEnd; | 1388 int32_t lastMatchEnd; |
1396 if (UTEXT_USES_U16(m->fInputText)) { | 1389 if (UTEXT_USES_U16(m->fInputText)) { |
1397 lastMatchEnd = (int32_t)m->fLastMatchEnd; | 1390 lastMatchEnd = (int32_t)m->fLastMatchEnd; |
1398 matchStart = (int32_t)m->fMatchStart; | 1391 matchStart = (int32_t)m->fMatchStart; |
1399 } else { | 1392 } else { |
1400 // !!!: Would like a better way to do this! | 1393 // !!!: Would like a better way to do this! |
1401 UErrorCode status = U_ZERO_ERROR; | 1394 UErrorCode tempStatus = U_ZERO_ERROR; |
1402 lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NUL
L, 0, &status); | 1395 lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NUL
L, 0, &tempStatus); |
1403 status = U_ZERO_ERROR; | 1396 tempStatus = U_ZERO_ERROR; |
1404 matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMat
chEnd, m->fMatchStart, NULL, 0, &status); | 1397 matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMat
chEnd, m->fMatchStart, NULL, 0, &tempStatus); |
1405 } | 1398 } |
1406 for (i=lastMatchEnd; i<matchStart; i++) { | 1399 for (i=lastMatchEnd; i<matchStart; i++) { |
1407 appendToBuf(regexp->fText[i], &destIdx, dest, capacity); | 1400 appendToBuf(regexp->fText[i], &destIdx, dest, capacity); |
1408 } | 1401 } |
1409 } else { | 1402 } else { |
1410 UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore | 1403 UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore |
1411 destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart
, | 1404 destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart
, |
1412 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPAC
ITY(destIdx, capacity), | 1405 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPAC
ITY(destIdx, capacity), |
1413 &possibleOverflowError); | 1406 &possibleOverflowError); |
1414 } | 1407 } |
1415 U_ASSERT(destIdx >= 0); | 1408 U_ASSERT(destIdx >= 0); |
1416 | 1409 |
1417 // scan the replacement text, looking for substitutions ($n) and \escapes. | 1410 // scan the replacement text, looking for substitutions ($n) and \escapes. |
1418 int32_t replIdx = 0; | 1411 int32_t replIdx = 0; |
1419 while (replIdx < replacementLength) { | 1412 while (replIdx < replacementLength && U_SUCCESS(*status)) { |
1420 UChar c = replacementText[replIdx]; | 1413 UChar c = replacementText[replIdx]; |
1421 replIdx++; | 1414 replIdx++; |
1422 if (c != DOLLARSIGN && c != BACKSLASH) { | 1415 if (c != DOLLARSIGN && c != BACKSLASH) { |
1423 // Common case, no substitution, no escaping, | 1416 // Common case, no substitution, no escaping, |
1424 // just copy the char to the dest buf. | 1417 // just copy the char to the dest buf. |
1425 appendToBuf(c, &destIdx, dest, capacity); | 1418 appendToBuf(c, &destIdx, dest, capacity); |
1426 continue; | 1419 continue; |
1427 } | 1420 } |
1428 | 1421 |
1429 if (c == BACKSLASH) { | 1422 if (c == BACKSLASH) { |
(...skipping 28 matching lines...) Expand all Loading... |
1458 // treat it as a plain \<anything> escape. | 1451 // treat it as a plain \<anything> escape. |
1459 } | 1452 } |
1460 | 1453 |
1461 // Plain backslash escape. Just put out the escaped character. | 1454 // Plain backslash escape. Just put out the escaped character. |
1462 appendToBuf(c, &destIdx, dest, capacity); | 1455 appendToBuf(c, &destIdx, dest, capacity); |
1463 | 1456 |
1464 replIdx++; | 1457 replIdx++; |
1465 continue; | 1458 continue; |
1466 } | 1459 } |
1467 | 1460 |
| 1461 // We've got a $. Pick up the following capture group name or number. |
| 1462 // For numbers, consume only digits that produce a valid capture group f
or the pattern. |
| 1463 |
| 1464 int32_t groupNum = 0; |
| 1465 U_ASSERT(c == DOLLARSIGN); |
| 1466 UChar32 c32; |
| 1467 U16_GET(replacementText, 0, replIdx, replacementLength, c32); |
| 1468 if (u_isdigit(c32)) { |
| 1469 int32_t numDigits = 0; |
| 1470 int32_t numCaptureGroups = m->fPattern->fGroupMap->size(); |
| 1471 for (;;) { |
| 1472 if (replIdx >= replacementLength) { |
| 1473 break; |
| 1474 } |
| 1475 U16_GET(replacementText, 0, replIdx, replacementLength, c32); |
| 1476 if (u_isdigit(c32) == FALSE) { |
| 1477 break; |
| 1478 } |
| 1479 |
| 1480 int32_t digitVal = u_charDigitValue(c32); |
| 1481 if (groupNum * 10 + digitVal <= numCaptureGroups) { |
| 1482 groupNum = groupNum * 10 + digitVal; |
| 1483 U16_FWD_1(replacementText, replIdx, replacementLength); |
| 1484 numDigits++; |
| 1485 } else { |
| 1486 if (numDigits == 0) { |
| 1487 *status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 1488 } |
| 1489 break; |
| 1490 } |
| 1491 } |
| 1492 } else if (c32 == LEFTBRACKET) { |
| 1493 // Scan for Named Capture Group, ${name}. |
| 1494 UnicodeString groupName; |
| 1495 U16_FWD_1(replacementText, replIdx, replacementLength); |
| 1496 while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) { |
| 1497 if (replIdx >= replacementLength) { |
| 1498 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 1499 break; |
| 1500 } |
| 1501 U16_NEXT(replacementText, replIdx, replacementLength, c32); |
| 1502 if ((c32 >= 0x41 && c32 <= 0x5a) || // A..Z |
| 1503 (c32 >= 0x61 && c32 <= 0x7a) || // a..z |
| 1504 (c32 >= 0x31 && c32 <= 0x39)) { // 0..9 |
| 1505 groupName.append(c32); |
| 1506 } else if (c32 == RIGHTBRACKET) { |
| 1507 groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &group
Name); |
| 1508 if (groupNum == 0) { |
| 1509 // Name not defined by pattern. |
| 1510 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 1511 } |
| 1512 } else { |
| 1513 // Character was something other than a name char or a closi
ng '}' |
| 1514 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 1515 } |
| 1516 } |
| 1517 } else { |
| 1518 // $ not followed by {name} or digits. |
| 1519 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 1520 } |
1468 | 1521 |
1469 | 1522 |
1470 // We've got a $. Pick up a capture group number if one follows. | 1523 // Finally, append the capture group data to the destination. |
1471 // Consume at most the number of digits necessary for the largest captur
e | 1524 if (U_SUCCESS(*status)) { |
1472 // number that is valid for this pattern. | 1525 destIdx += uregex_group((URegularExpression*)regexp, groupNum, |
1473 | 1526 dest==NULL?NULL:&dest[destIdx], REMAINING_CA
PACITY(destIdx, capacity), status); |
1474 int32_t numDigits = 0; | 1527 if (*status == U_BUFFER_OVERFLOW_ERROR) { |
1475 int32_t groupNum = 0; | 1528 // Ignore buffer overflow when extracting the group. We need to |
1476 UChar32 digitC; | 1529 // continue on to get full size of the untruncated result. We
will |
1477 for (;;) { | 1530 // raise our own buffer overflow error at the end. |
1478 if (replIdx >= replacementLength) { | 1531 *status = U_ZERO_ERROR; |
1479 break; | |
1480 } | |
1481 U16_GET(replacementText, 0, replIdx, replacementLength, digitC); | |
1482 if (u_isdigit(digitC) == FALSE) { | |
1483 break; | |
1484 } | |
1485 | |
1486 U16_FWD_1(replacementText, replIdx, replacementLength); | |
1487 groupNum=groupNum*10 + u_charDigitValue(digitC); | |
1488 numDigits++; | |
1489 if (numDigits >= m->fPattern->fMaxCaptureDigits) { | |
1490 break; | |
1491 } | 1532 } |
1492 } | 1533 } |
1493 | 1534 |
1494 | |
1495 if (numDigits == 0) { | |
1496 // The $ didn't introduce a group number at all. | |
1497 // Treat it as just part of the substitution text. | |
1498 appendToBuf(DOLLARSIGN, &destIdx, dest, capacity); | |
1499 continue; | |
1500 } | |
1501 | |
1502 // Finally, append the capture group data to the destination. | |
1503 destIdx += uregex_group((URegularExpression*)regexp, groupNum, | |
1504 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACI
TY(destIdx, capacity), status); | |
1505 if (*status == U_BUFFER_OVERFLOW_ERROR) { | |
1506 // Ignore buffer overflow when extracting the group. We need to | |
1507 // continue on to get full size of the untruncated result. We wil
l | |
1508 // raise our own buffer overflow error at the end. | |
1509 *status = U_ZERO_ERROR; | |
1510 } | |
1511 | |
1512 if (U_FAILURE(*status)) { | 1535 if (U_FAILURE(*status)) { |
1513 // Can fail if group number is out of range. | 1536 // bad group number or name. |
1514 break; | 1537 break; |
1515 } | 1538 } |
1516 | |
1517 } | 1539 } |
1518 | 1540 |
1519 // | 1541 // |
1520 // Nul Terminate the dest buffer if possible. | 1542 // Nul Terminate the dest buffer if possible. |
1521 // Set the appropriate buffer overflow or not terminated error, if needed. | 1543 // Set the appropriate buffer overflow or not terminated error, if needed. |
1522 // | 1544 // |
1523 if (destIdx < capacity) { | 1545 if (destIdx < capacity) { |
1524 dest[destIdx] = 0; | 1546 dest[destIdx] = 0; |
1525 } else if (destIdx == *destCapacity) { | 1547 } else if (U_SUCCESS(*status)) { |
1526 *status = U_STRING_NOT_TERMINATED_WARNING; | 1548 if (destIdx == *destCapacity) { |
1527 } else { | 1549 *status = U_STRING_NOT_TERMINATED_WARNING; |
1528 *status = U_BUFFER_OVERFLOW_ERROR; | 1550 } else { |
| 1551 *status = U_BUFFER_OVERFLOW_ERROR; |
| 1552 } |
1529 } | 1553 } |
1530 | 1554 |
1531 // | 1555 // |
1532 // Return an updated dest buffer and capacity to the caller. | 1556 // Return an updated dest buffer and capacity to the caller. |
1533 // | 1557 // |
1534 if (destIdx > 0 && *destCapacity > 0) { | 1558 if (destIdx > 0 && *destCapacity > 0) { |
1535 if (destIdx < capacity) { | 1559 if (destIdx < capacity) { |
1536 *destBuf += destIdx; | 1560 *destBuf += destIdx; |
1537 *destCapacity -= destIdx; | 1561 *destCapacity -= destIdx; |
1538 } else { | 1562 } else { |
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1942 UText *destFields[], | 1966 UText *destFields[], |
1943 int32_t destFieldsCapacity, | 1967 int32_t destFieldsCapacity, |
1944 UErrorCode *status) { | 1968 UErrorCode *status) { |
1945 RegularExpression *regexp = (RegularExpression*)regexp2; | 1969 RegularExpression *regexp = (RegularExpression*)regexp2; |
1946 return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, de
stFieldsCapacity, *status); | 1970 return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, de
stFieldsCapacity, *status); |
1947 } | 1971 } |
1948 | 1972 |
1949 | 1973 |
1950 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 1974 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
1951 | 1975 |
OLD | NEW |