| OLD | NEW |
| 1 // | 1 // |
| 2 // file: repattrn.cpp | 2 // file: repattrn.cpp |
| 3 // | 3 // |
| 4 /* | 4 /* |
| 5 *************************************************************************** | 5 *************************************************************************** |
| 6 * Copyright (C) 2002-2013 International Business Machines Corporation * | 6 * Copyright (C) 2002-2015 International Business Machines Corporation * |
| 7 * and others. All rights reserved. * | 7 * and others. All rights reserved. * |
| 8 *************************************************************************** | 8 *************************************************************************** |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 | 12 |
| 13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 14 | 14 |
| 15 #include "unicode/regex.h" | 15 #include "unicode/regex.h" |
| 16 #include "unicode/uclean.h" | 16 #include "unicode/uclean.h" |
| 17 #include "uassert.h" | 17 #include "uassert.h" |
| 18 #include "uhash.h" |
| 18 #include "uvector.h" | 19 #include "uvector.h" |
| 19 #include "uvectr32.h" | 20 #include "uvectr32.h" |
| 20 #include "uvectr64.h" | 21 #include "uvectr64.h" |
| 21 #include "regexcmp.h" | 22 #include "regexcmp.h" |
| 22 #include "regeximp.h" | 23 #include "regeximp.h" |
| 23 #include "regexst.h" | 24 #include "regexst.h" |
| 24 | 25 |
| 25 U_NAMESPACE_BEGIN | 26 U_NAMESPACE_BEGIN |
| 26 | 27 |
| 27 //-------------------------------------------------------------------------- | 28 //-------------------------------------------------------------------------- |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 59 return *this; | 60 return *this; |
| 60 } | 61 } |
| 61 | 62 |
| 62 // Clean out any previous contents of object being assigned to. | 63 // Clean out any previous contents of object being assigned to. |
| 63 zap(); | 64 zap(); |
| 64 | 65 |
| 65 // Give target object a default initialization | 66 // Give target object a default initialization |
| 66 init(); | 67 init(); |
| 67 | 68 |
| 68 // Copy simple fields | 69 // Copy simple fields |
| 69 if ( other.fPatternString == NULL ) { | 70 fDeferredStatus = other.fDeferredStatus; |
| 71 |
| 72 if (U_FAILURE(fDeferredStatus)) { |
| 73 return *this; |
| 74 } |
| 75 |
| 76 if (other.fPatternString == NULL) { |
| 70 fPatternString = NULL; | 77 fPatternString = NULL; |
| 71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDef
erredStatus); | 78 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferred
Status); |
| 72 } else { | 79 } else { |
| 73 fPatternString = new UnicodeString(*(other.fPatternString)); | 80 fPatternString = new UnicodeString(*(other.fPatternString)); |
| 74 UErrorCode status = U_ZERO_ERROR; | 81 if (fPatternString == NULL) { |
| 75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &stat
us); | |
| 76 if (U_FAILURE(status)) { | |
| 77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 82 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
| 78 return *this; | 83 } else { |
| 84 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDefe
rredStatus); |
| 79 } | 85 } |
| 80 } | 86 } |
| 87 if (U_FAILURE(fDeferredStatus)) { |
| 88 return *this; |
| 89 } |
| 90 |
| 81 fFlags = other.fFlags; | 91 fFlags = other.fFlags; |
| 82 fLiteralText = other.fLiteralText; | 92 fLiteralText = other.fLiteralText; |
| 83 fDeferredStatus = other.fDeferredStatus; | |
| 84 fMinMatchLen = other.fMinMatchLen; | 93 fMinMatchLen = other.fMinMatchLen; |
| 85 fFrameSize = other.fFrameSize; | 94 fFrameSize = other.fFrameSize; |
| 86 fDataSize = other.fDataSize; | 95 fDataSize = other.fDataSize; |
| 87 fMaxCaptureDigits = other.fMaxCaptureDigits; | |
| 88 fStaticSets = other.fStaticSets; | 96 fStaticSets = other.fStaticSets; |
| 89 fStaticSets8 = other.fStaticSets8; | 97 fStaticSets8 = other.fStaticSets8; |
| 90 | 98 |
| 91 fStartType = other.fStartType; | 99 fStartType = other.fStartType; |
| 92 fInitialStringIdx = other.fInitialStringIdx; | 100 fInitialStringIdx = other.fInitialStringIdx; |
| 93 fInitialStringLen = other.fInitialStringLen; | 101 fInitialStringLen = other.fInitialStringLen; |
| 94 *fInitialChars = *other.fInitialChars; | 102 *fInitialChars = *other.fInitialChars; |
| 95 fInitialChar = other.fInitialChar; | 103 fInitialChar = other.fInitialChar; |
| 96 *fInitialChars8 = *other.fInitialChars8; | 104 *fInitialChars8 = *other.fInitialChars8; |
| 97 fNeedsAltInput = other.fNeedsAltInput; | 105 fNeedsAltInput = other.fNeedsAltInput; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); | 126 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); |
| 119 UnicodeSet *newSet = new UnicodeSet(*sourceSet); | 127 UnicodeSet *newSet = new UnicodeSet(*sourceSet); |
| 120 if (newSet == NULL) { | 128 if (newSet == NULL) { |
| 121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 129 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
| 122 break; | 130 break; |
| 123 } | 131 } |
| 124 fSets->addElement(newSet, fDeferredStatus); | 132 fSets->addElement(newSet, fDeferredStatus); |
| 125 fSets8[i] = other.fSets8[i]; | 133 fSets8[i] = other.fSets8[i]; |
| 126 } | 134 } |
| 127 | 135 |
| 136 // Copy the named capture group hash map. |
| 137 int32_t hashPos = UHASH_FIRST; |
| 138 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap
, &hashPos)) { |
| 139 if (U_FAILURE(fDeferredStatus)) { |
| 140 break; |
| 141 } |
| 142 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; |
| 143 UnicodeString *key = new UnicodeString(*name); |
| 144 int32_t val = hashEl->value.integer; |
| 145 if (key == NULL) { |
| 146 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
| 147 } else { |
| 148 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); |
| 149 } |
| 150 } |
| 128 return *this; | 151 return *this; |
| 129 } | 152 } |
| 130 | 153 |
| 131 | 154 |
| 132 //-------------------------------------------------------------------------- | 155 //-------------------------------------------------------------------------- |
| 133 // | 156 // |
| 134 // init Shared initialization for use by constructors. | 157 // init Shared initialization for use by constructors. |
| 135 // Bring an uninitialized RegexPattern up to a default state. | 158 // Bring an uninitialized RegexPattern up to a default state. |
| 136 // | 159 // |
| 137 //-------------------------------------------------------------------------- | 160 //-------------------------------------------------------------------------- |
| 138 void RegexPattern::init() { | 161 void RegexPattern::init() { |
| 139 fFlags = 0; | 162 fFlags = 0; |
| 140 fCompiledPat = 0; | 163 fCompiledPat = 0; |
| 141 fLiteralText.remove(); | 164 fLiteralText.remove(); |
| 142 fSets = NULL; | 165 fSets = NULL; |
| 143 fSets8 = NULL; | 166 fSets8 = NULL; |
| 144 fDeferredStatus = U_ZERO_ERROR; | 167 fDeferredStatus = U_ZERO_ERROR; |
| 145 fMinMatchLen = 0; | 168 fMinMatchLen = 0; |
| 146 fFrameSize = 0; | 169 fFrameSize = 0; |
| 147 fDataSize = 0; | 170 fDataSize = 0; |
| 148 fGroupMap = NULL; | 171 fGroupMap = NULL; |
| 149 fMaxCaptureDigits = 1; | |
| 150 fStaticSets = NULL; | 172 fStaticSets = NULL; |
| 151 fStaticSets8 = NULL; | 173 fStaticSets8 = NULL; |
| 152 fStartType = START_NO_INFO; | 174 fStartType = START_NO_INFO; |
| 153 fInitialStringIdx = 0; | 175 fInitialStringIdx = 0; |
| 154 fInitialStringLen = 0; | 176 fInitialStringLen = 0; |
| 155 fInitialChars = NULL; | 177 fInitialChars = NULL; |
| 156 fInitialChar = 0; | 178 fInitialChar = 0; |
| 157 fInitialChars8 = NULL; | 179 fInitialChars8 = NULL; |
| 158 fNeedsAltInput = FALSE; | 180 fNeedsAltInput = FALSE; |
| 181 fNamedCaptureMap = NULL; |
| 159 | 182 |
| 160 fPattern = NULL; // will be set later | 183 fPattern = NULL; // will be set later |
| 161 fPatternString = NULL; // may be set later | 184 fPatternString = NULL; // may be set later |
| 162 fCompiledPat = new UVector64(fDeferredStatus); | 185 fCompiledPat = new UVector64(fDeferredStatus); |
| 163 fGroupMap = new UVector32(fDeferredStatus); | 186 fGroupMap = new UVector32(fDeferredStatus); |
| 164 fSets = new UVector(fDeferredStatus); | 187 fSets = new UVector(fDeferredStatus); |
| 165 fInitialChars = new UnicodeSet; | 188 fInitialChars = new UnicodeSet; |
| 166 fInitialChars8 = new Regex8BitSet; | 189 fInitialChars8 = new Regex8BitSet; |
| 190 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash func
tion |
| 191 uhash_compareUnicodeString, // Key comparato
r function |
| 192 uhash_compareLong, // Value compara
tor function |
| 193 &fDeferredStatus); |
| 167 if (U_FAILURE(fDeferredStatus)) { | 194 if (U_FAILURE(fDeferredStatus)) { |
| 168 return; | 195 return; |
| 169 } | 196 } |
| 170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || | 197 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || |
| 171 fInitialChars == NULL || fInitialChars8 == NULL) { | 198 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap
== NULL) { |
| 172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 199 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
| 173 return; | 200 return; |
| 174 } | 201 } |
| 175 | 202 |
| 176 // Slot zero of the vector of sets is reserved. Fill it here. | 203 // Slot zero of the vector of sets is reserved. Fill it here. |
| 177 fSets->addElement((int32_t)0, fDeferredStatus); | 204 fSets->addElement((int32_t)0, fDeferredStatus); |
| 205 |
| 206 // fNamedCaptureMap owns its key strings, type (UnicodeString *) |
| 207 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); |
| 178 } | 208 } |
| 179 | 209 |
| 180 | 210 |
| 181 //-------------------------------------------------------------------------- | 211 //-------------------------------------------------------------------------- |
| 182 // | 212 // |
| 183 // zap Delete everything owned by this RegexPattern. | 213 // zap Delete everything owned by this RegexPattern. |
| 184 // | 214 // |
| 185 //-------------------------------------------------------------------------- | 215 //-------------------------------------------------------------------------- |
| 186 void RegexPattern::zap() { | 216 void RegexPattern::zap() { |
| 187 delete fCompiledPat; | 217 delete fCompiledPat; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 205 delete fInitialChars8; | 235 delete fInitialChars8; |
| 206 fInitialChars8 = NULL; | 236 fInitialChars8 = NULL; |
| 207 if (fPattern != NULL) { | 237 if (fPattern != NULL) { |
| 208 utext_close(fPattern); | 238 utext_close(fPattern); |
| 209 fPattern = NULL; | 239 fPattern = NULL; |
| 210 } | 240 } |
| 211 if (fPatternString != NULL) { | 241 if (fPatternString != NULL) { |
| 212 delete fPatternString; | 242 delete fPatternString; |
| 213 fPatternString = NULL; | 243 fPatternString = NULL; |
| 214 } | 244 } |
| 245 uhash_close(fNamedCaptureMap); |
| 246 fNamedCaptureMap = NULL; |
| 215 } | 247 } |
| 216 | 248 |
| 217 | 249 |
| 218 //-------------------------------------------------------------------------- | 250 //-------------------------------------------------------------------------- |
| 219 // | 251 // |
| 220 // Destructor | 252 // Destructor |
| 221 // | 253 // |
| 222 //-------------------------------------------------------------------------- | 254 //-------------------------------------------------------------------------- |
| 223 RegexPattern::~RegexPattern() { | 255 RegexPattern::~RegexPattern() { |
| 224 zap(); | 256 zap(); |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 562 | 594 |
| 563 if (fPattern != NULL) { | 595 if (fPattern != NULL) { |
| 564 return fPattern; | 596 return fPattern; |
| 565 } else { | 597 } else { |
| 566 RegexStaticSets::initGlobals(&status); | 598 RegexStaticSets::initGlobals(&status); |
| 567 return RegexStaticSets::gStaticSets->fEmptyText; | 599 return RegexStaticSets::gStaticSets->fEmptyText; |
| 568 } | 600 } |
| 569 } | 601 } |
| 570 | 602 |
| 571 | 603 |
| 604 //------------------------------------------------------------------------------
-- |
| 605 // |
| 606 // groupNumberFromName() |
| 607 // |
| 608 //------------------------------------------------------------------------------
-- |
| 609 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UError
Code &status) const { |
| 610 if (U_FAILURE(status)) { |
| 611 return 0; |
| 612 } |
| 613 |
| 614 // No need to explicitly check for syntactically valid names. |
| 615 // Invalid ones will never be in the map, and the lookup will fail. |
| 616 |
| 617 int32_t number = uhash_geti(fNamedCaptureMap, &groupName); |
| 618 if (number == 0) { |
| 619 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 620 } |
| 621 return number; |
| 622 } |
| 623 |
| 624 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLen
gth, UErrorCode &status) const { |
| 625 if (U_FAILURE(status)) { |
| 626 return 0; |
| 627 } |
| 628 UnicodeString name(groupName, nameLength, US_INV); |
| 629 return groupNumberFromName(name, status); |
| 630 } |
| 631 |
| 572 | 632 |
| 573 //--------------------------------------------------------------------- | 633 //--------------------------------------------------------------------- |
| 574 // | 634 // |
| 575 // split | 635 // split |
| 576 // | 636 // |
| 577 //--------------------------------------------------------------------- | 637 //--------------------------------------------------------------------- |
| 578 int32_t RegexPattern::split(const UnicodeString &input, | 638 int32_t RegexPattern::split(const UnicodeString &input, |
| 579 UnicodeString dest[], | 639 UnicodeString dest[], |
| 580 int32_t destCapacity, | 640 int32_t destCapacity, |
| 581 UErrorCode &status) const | 641 UErrorCode &status) const |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 675 case URX_LA_START: | 735 case URX_LA_START: |
| 676 case URX_LA_END: | 736 case URX_LA_END: |
| 677 case URX_BACKREF_I: | 737 case URX_BACKREF_I: |
| 678 case URX_LB_START: | 738 case URX_LB_START: |
| 679 case URX_LB_CONT: | 739 case URX_LB_CONT: |
| 680 case URX_LB_END: | 740 case URX_LB_END: |
| 681 case URX_LBN_CONT: | 741 case URX_LBN_CONT: |
| 682 case URX_LBN_END: | 742 case URX_LBN_END: |
| 683 case URX_LOOP_C: | 743 case URX_LOOP_C: |
| 684 case URX_LOOP_DOT_I: | 744 case URX_LOOP_DOT_I: |
| 745 case URX_BACKSLASH_H: |
| 746 case URX_BACKSLASH_R: |
| 747 case URX_BACKSLASH_V: |
| 685 // types with an integer operand field. | 748 // types with an integer operand field. |
| 686 printf("%d", val); | 749 printf("%d", val); |
| 687 break; | 750 break; |
| 688 | 751 |
| 689 case URX_ONECHAR: | 752 case URX_ONECHAR: |
| 690 case URX_ONECHAR_I: | 753 case URX_ONECHAR_I: |
| 691 printf("%c", val<256?val:'?'); | 754 printf("%c", val<256?val:'?'); |
| 692 break; | 755 break; |
| 693 | 756 |
| 694 case URX_STRING: | 757 case URX_STRING: |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 739 printf("??????"); | 802 printf("??????"); |
| 740 break; | 803 break; |
| 741 } | 804 } |
| 742 printf("\n"); | 805 printf("\n"); |
| 743 #endif | 806 #endif |
| 744 } | 807 } |
| 745 | 808 |
| 746 | 809 |
| 747 void RegexPattern::dumpPattern() const { | 810 void RegexPattern::dumpPattern() const { |
| 748 #if defined(REGEX_DEBUG) | 811 #if defined(REGEX_DEBUG) |
| 812 // TODO: This function assumes an ASCII based charset. |
| 749 int index; | 813 int index; |
| 750 int i; | 814 int i; |
| 751 | 815 |
| 752 printf("Original Pattern: "); | 816 printf("Original Pattern: "); |
| 753 UChar32 c = utext_next32From(fPattern, 0); | 817 UChar32 c = utext_next32From(fPattern, 0); |
| 754 while (c != U_SENTINEL) { | 818 while (c != U_SENTINEL) { |
| 755 if (c<32 || c>256) { | 819 if (c<32 || c>256) { |
| 756 c = '.'; | 820 c = '.'; |
| 757 } | 821 } |
| 758 printf("%c", c); | 822 printf("%c", c); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 790 | 854 |
| 791 } else if (fStartType == START_CHAR) { | 855 } else if (fStartType == START_CHAR) { |
| 792 printf(" First char of Match : "); | 856 printf(" First char of Match : "); |
| 793 if (0x20 < fInitialChar && fInitialChar<0x7e) { | 857 if (0x20 < fInitialChar && fInitialChar<0x7e) { |
| 794 printf("%c\n", fInitialChar); | 858 printf("%c\n", fInitialChar); |
| 795 } else { | 859 } else { |
| 796 printf("%#x\n", fInitialChar); | 860 printf("%#x\n", fInitialChar); |
| 797 } | 861 } |
| 798 } | 862 } |
| 799 | 863 |
| 864 printf("Named Capture Groups:\n"); |
| 865 if (uhash_count(fNamedCaptureMap) == 0) { |
| 866 printf(" None\n"); |
| 867 } else { |
| 868 int32_t pos = UHASH_FIRST; |
| 869 const UHashElement *el = NULL; |
| 870 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { |
| 871 const UnicodeString *name = (const UnicodeString *)el->key.pointer; |
| 872 char s[100]; |
| 873 name->extract(0, 99, s, sizeof(s), US_INV); // capture group names
are invariant. |
| 874 int32_t number = el->value.integer; |
| 875 printf(" %d\t%s\n", number, s); |
| 876 } |
| 877 } |
| 878 |
| 800 printf("\nIndex Binary Type Operand\n" \ | 879 printf("\nIndex Binary Type Operand\n" \ |
| 801 "-------------------------------------------\n"); | 880 "-------------------------------------------\n"); |
| 802 for (index = 0; index<fCompiledPat->size(); index++) { | 881 for (index = 0; index<fCompiledPat->size(); index++) { |
| 803 dumpOp(index); | 882 dumpOp(index); |
| 804 } | 883 } |
| 805 printf("\n\n"); | 884 printf("\n\n"); |
| 806 #endif | 885 #endif |
| 807 } | 886 } |
| 808 | 887 |
| 809 | 888 |
| 810 | 889 |
| 811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) | 890 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) |
| 812 | 891 |
| 813 U_NAMESPACE_END | 892 U_NAMESPACE_END |
| 814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 893 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
| OLD | NEW |