OLD | NEW |
1 // | 1 // |
2 // file: repattrn.cpp | 2 // file: repattrn.cpp |
3 // | 3 // |
4 /* | 4 /* |
5 *************************************************************************** | 5 *************************************************************************** |
6 * Copyright (C) 2002-2013 International Business Machines Corporation * | 6 * Copyright (C) 2002-2015 International Business Machines Corporation * |
7 * and others. All rights reserved. * | 7 * and others. All rights reserved. * |
8 *************************************************************************** | 8 *************************************************************************** |
9 */ | 9 */ |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 | 12 |
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
14 | 14 |
15 #include "unicode/regex.h" | 15 #include "unicode/regex.h" |
16 #include "unicode/uclean.h" | 16 #include "unicode/uclean.h" |
17 #include "uassert.h" | 17 #include "uassert.h" |
| 18 #include "uhash.h" |
18 #include "uvector.h" | 19 #include "uvector.h" |
19 #include "uvectr32.h" | 20 #include "uvectr32.h" |
20 #include "uvectr64.h" | 21 #include "uvectr64.h" |
21 #include "regexcmp.h" | 22 #include "regexcmp.h" |
22 #include "regeximp.h" | 23 #include "regeximp.h" |
23 #include "regexst.h" | 24 #include "regexst.h" |
24 | 25 |
25 U_NAMESPACE_BEGIN | 26 U_NAMESPACE_BEGIN |
26 | 27 |
27 //-------------------------------------------------------------------------- | 28 //-------------------------------------------------------------------------- |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 return *this; | 60 return *this; |
60 } | 61 } |
61 | 62 |
62 // Clean out any previous contents of object being assigned to. | 63 // Clean out any previous contents of object being assigned to. |
63 zap(); | 64 zap(); |
64 | 65 |
65 // Give target object a default initialization | 66 // Give target object a default initialization |
66 init(); | 67 init(); |
67 | 68 |
68 // Copy simple fields | 69 // Copy simple fields |
69 if ( other.fPatternString == NULL ) { | 70 fDeferredStatus = other.fDeferredStatus; |
| 71 |
| 72 if (U_FAILURE(fDeferredStatus)) { |
| 73 return *this; |
| 74 } |
| 75 |
| 76 if (other.fPatternString == NULL) { |
70 fPatternString = NULL; | 77 fPatternString = NULL; |
71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDef
erredStatus); | 78 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferred
Status); |
72 } else { | 79 } else { |
73 fPatternString = new UnicodeString(*(other.fPatternString)); | 80 fPatternString = new UnicodeString(*(other.fPatternString)); |
74 UErrorCode status = U_ZERO_ERROR; | 81 if (fPatternString == NULL) { |
75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &stat
us); | |
76 if (U_FAILURE(status)) { | |
77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 82 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
78 return *this; | 83 } else { |
| 84 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDefe
rredStatus); |
79 } | 85 } |
80 } | 86 } |
| 87 if (U_FAILURE(fDeferredStatus)) { |
| 88 return *this; |
| 89 } |
| 90 |
81 fFlags = other.fFlags; | 91 fFlags = other.fFlags; |
82 fLiteralText = other.fLiteralText; | 92 fLiteralText = other.fLiteralText; |
83 fDeferredStatus = other.fDeferredStatus; | |
84 fMinMatchLen = other.fMinMatchLen; | 93 fMinMatchLen = other.fMinMatchLen; |
85 fFrameSize = other.fFrameSize; | 94 fFrameSize = other.fFrameSize; |
86 fDataSize = other.fDataSize; | 95 fDataSize = other.fDataSize; |
87 fMaxCaptureDigits = other.fMaxCaptureDigits; | |
88 fStaticSets = other.fStaticSets; | 96 fStaticSets = other.fStaticSets; |
89 fStaticSets8 = other.fStaticSets8; | 97 fStaticSets8 = other.fStaticSets8; |
90 | 98 |
91 fStartType = other.fStartType; | 99 fStartType = other.fStartType; |
92 fInitialStringIdx = other.fInitialStringIdx; | 100 fInitialStringIdx = other.fInitialStringIdx; |
93 fInitialStringLen = other.fInitialStringLen; | 101 fInitialStringLen = other.fInitialStringLen; |
94 *fInitialChars = *other.fInitialChars; | 102 *fInitialChars = *other.fInitialChars; |
95 fInitialChar = other.fInitialChar; | 103 fInitialChar = other.fInitialChar; |
96 *fInitialChars8 = *other.fInitialChars8; | 104 *fInitialChars8 = *other.fInitialChars8; |
97 fNeedsAltInput = other.fNeedsAltInput; | 105 fNeedsAltInput = other.fNeedsAltInput; |
(...skipping 20 matching lines...) Expand all Loading... |
118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); | 126 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); |
119 UnicodeSet *newSet = new UnicodeSet(*sourceSet); | 127 UnicodeSet *newSet = new UnicodeSet(*sourceSet); |
120 if (newSet == NULL) { | 128 if (newSet == NULL) { |
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 129 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
122 break; | 130 break; |
123 } | 131 } |
124 fSets->addElement(newSet, fDeferredStatus); | 132 fSets->addElement(newSet, fDeferredStatus); |
125 fSets8[i] = other.fSets8[i]; | 133 fSets8[i] = other.fSets8[i]; |
126 } | 134 } |
127 | 135 |
| 136 // Copy the named capture group hash map. |
| 137 int32_t hashPos = UHASH_FIRST; |
| 138 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap
, &hashPos)) { |
| 139 if (U_FAILURE(fDeferredStatus)) { |
| 140 break; |
| 141 } |
| 142 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; |
| 143 UnicodeString *key = new UnicodeString(*name); |
| 144 int32_t val = hashEl->value.integer; |
| 145 if (key == NULL) { |
| 146 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
| 147 } else { |
| 148 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); |
| 149 } |
| 150 } |
128 return *this; | 151 return *this; |
129 } | 152 } |
130 | 153 |
131 | 154 |
132 //-------------------------------------------------------------------------- | 155 //-------------------------------------------------------------------------- |
133 // | 156 // |
134 // init Shared initialization for use by constructors. | 157 // init Shared initialization for use by constructors. |
135 // Bring an uninitialized RegexPattern up to a default state. | 158 // Bring an uninitialized RegexPattern up to a default state. |
136 // | 159 // |
137 //-------------------------------------------------------------------------- | 160 //-------------------------------------------------------------------------- |
138 void RegexPattern::init() { | 161 void RegexPattern::init() { |
139 fFlags = 0; | 162 fFlags = 0; |
140 fCompiledPat = 0; | 163 fCompiledPat = 0; |
141 fLiteralText.remove(); | 164 fLiteralText.remove(); |
142 fSets = NULL; | 165 fSets = NULL; |
143 fSets8 = NULL; | 166 fSets8 = NULL; |
144 fDeferredStatus = U_ZERO_ERROR; | 167 fDeferredStatus = U_ZERO_ERROR; |
145 fMinMatchLen = 0; | 168 fMinMatchLen = 0; |
146 fFrameSize = 0; | 169 fFrameSize = 0; |
147 fDataSize = 0; | 170 fDataSize = 0; |
148 fGroupMap = NULL; | 171 fGroupMap = NULL; |
149 fMaxCaptureDigits = 1; | |
150 fStaticSets = NULL; | 172 fStaticSets = NULL; |
151 fStaticSets8 = NULL; | 173 fStaticSets8 = NULL; |
152 fStartType = START_NO_INFO; | 174 fStartType = START_NO_INFO; |
153 fInitialStringIdx = 0; | 175 fInitialStringIdx = 0; |
154 fInitialStringLen = 0; | 176 fInitialStringLen = 0; |
155 fInitialChars = NULL; | 177 fInitialChars = NULL; |
156 fInitialChar = 0; | 178 fInitialChar = 0; |
157 fInitialChars8 = NULL; | 179 fInitialChars8 = NULL; |
158 fNeedsAltInput = FALSE; | 180 fNeedsAltInput = FALSE; |
| 181 fNamedCaptureMap = NULL; |
159 | 182 |
160 fPattern = NULL; // will be set later | 183 fPattern = NULL; // will be set later |
161 fPatternString = NULL; // may be set later | 184 fPatternString = NULL; // may be set later |
162 fCompiledPat = new UVector64(fDeferredStatus); | 185 fCompiledPat = new UVector64(fDeferredStatus); |
163 fGroupMap = new UVector32(fDeferredStatus); | 186 fGroupMap = new UVector32(fDeferredStatus); |
164 fSets = new UVector(fDeferredStatus); | 187 fSets = new UVector(fDeferredStatus); |
165 fInitialChars = new UnicodeSet; | 188 fInitialChars = new UnicodeSet; |
166 fInitialChars8 = new Regex8BitSet; | 189 fInitialChars8 = new Regex8BitSet; |
| 190 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash func
tion |
| 191 uhash_compareUnicodeString, // Key comparato
r function |
| 192 uhash_compareLong, // Value compara
tor function |
| 193 &fDeferredStatus); |
167 if (U_FAILURE(fDeferredStatus)) { | 194 if (U_FAILURE(fDeferredStatus)) { |
168 return; | 195 return; |
169 } | 196 } |
170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || | 197 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || |
171 fInitialChars == NULL || fInitialChars8 == NULL) { | 198 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap
== NULL) { |
172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | 199 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
173 return; | 200 return; |
174 } | 201 } |
175 | 202 |
176 // Slot zero of the vector of sets is reserved. Fill it here. | 203 // Slot zero of the vector of sets is reserved. Fill it here. |
177 fSets->addElement((int32_t)0, fDeferredStatus); | 204 fSets->addElement((int32_t)0, fDeferredStatus); |
| 205 |
| 206 // fNamedCaptureMap owns its key strings, type (UnicodeString *) |
| 207 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); |
178 } | 208 } |
179 | 209 |
180 | 210 |
181 //-------------------------------------------------------------------------- | 211 //-------------------------------------------------------------------------- |
182 // | 212 // |
183 // zap Delete everything owned by this RegexPattern. | 213 // zap Delete everything owned by this RegexPattern. |
184 // | 214 // |
185 //-------------------------------------------------------------------------- | 215 //-------------------------------------------------------------------------- |
186 void RegexPattern::zap() { | 216 void RegexPattern::zap() { |
187 delete fCompiledPat; | 217 delete fCompiledPat; |
(...skipping 17 matching lines...) Expand all Loading... |
205 delete fInitialChars8; | 235 delete fInitialChars8; |
206 fInitialChars8 = NULL; | 236 fInitialChars8 = NULL; |
207 if (fPattern != NULL) { | 237 if (fPattern != NULL) { |
208 utext_close(fPattern); | 238 utext_close(fPattern); |
209 fPattern = NULL; | 239 fPattern = NULL; |
210 } | 240 } |
211 if (fPatternString != NULL) { | 241 if (fPatternString != NULL) { |
212 delete fPatternString; | 242 delete fPatternString; |
213 fPatternString = NULL; | 243 fPatternString = NULL; |
214 } | 244 } |
| 245 uhash_close(fNamedCaptureMap); |
| 246 fNamedCaptureMap = NULL; |
215 } | 247 } |
216 | 248 |
217 | 249 |
218 //-------------------------------------------------------------------------- | 250 //-------------------------------------------------------------------------- |
219 // | 251 // |
220 // Destructor | 252 // Destructor |
221 // | 253 // |
222 //-------------------------------------------------------------------------- | 254 //-------------------------------------------------------------------------- |
223 RegexPattern::~RegexPattern() { | 255 RegexPattern::~RegexPattern() { |
224 zap(); | 256 zap(); |
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
562 | 594 |
563 if (fPattern != NULL) { | 595 if (fPattern != NULL) { |
564 return fPattern; | 596 return fPattern; |
565 } else { | 597 } else { |
566 RegexStaticSets::initGlobals(&status); | 598 RegexStaticSets::initGlobals(&status); |
567 return RegexStaticSets::gStaticSets->fEmptyText; | 599 return RegexStaticSets::gStaticSets->fEmptyText; |
568 } | 600 } |
569 } | 601 } |
570 | 602 |
571 | 603 |
| 604 //------------------------------------------------------------------------------
-- |
| 605 // |
| 606 // groupNumberFromName() |
| 607 // |
| 608 //------------------------------------------------------------------------------
-- |
| 609 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UError
Code &status) const { |
| 610 if (U_FAILURE(status)) { |
| 611 return 0; |
| 612 } |
| 613 |
| 614 // No need to explicitly check for syntactically valid names. |
| 615 // Invalid ones will never be in the map, and the lookup will fail. |
| 616 |
| 617 int32_t number = uhash_geti(fNamedCaptureMap, &groupName); |
| 618 if (number == 0) { |
| 619 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; |
| 620 } |
| 621 return number; |
| 622 } |
| 623 |
| 624 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLen
gth, UErrorCode &status) const { |
| 625 if (U_FAILURE(status)) { |
| 626 return 0; |
| 627 } |
| 628 UnicodeString name(groupName, nameLength, US_INV); |
| 629 return groupNumberFromName(name, status); |
| 630 } |
| 631 |
572 | 632 |
573 //--------------------------------------------------------------------- | 633 //--------------------------------------------------------------------- |
574 // | 634 // |
575 // split | 635 // split |
576 // | 636 // |
577 //--------------------------------------------------------------------- | 637 //--------------------------------------------------------------------- |
578 int32_t RegexPattern::split(const UnicodeString &input, | 638 int32_t RegexPattern::split(const UnicodeString &input, |
579 UnicodeString dest[], | 639 UnicodeString dest[], |
580 int32_t destCapacity, | 640 int32_t destCapacity, |
581 UErrorCode &status) const | 641 UErrorCode &status) const |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
675 case URX_LA_START: | 735 case URX_LA_START: |
676 case URX_LA_END: | 736 case URX_LA_END: |
677 case URX_BACKREF_I: | 737 case URX_BACKREF_I: |
678 case URX_LB_START: | 738 case URX_LB_START: |
679 case URX_LB_CONT: | 739 case URX_LB_CONT: |
680 case URX_LB_END: | 740 case URX_LB_END: |
681 case URX_LBN_CONT: | 741 case URX_LBN_CONT: |
682 case URX_LBN_END: | 742 case URX_LBN_END: |
683 case URX_LOOP_C: | 743 case URX_LOOP_C: |
684 case URX_LOOP_DOT_I: | 744 case URX_LOOP_DOT_I: |
| 745 case URX_BACKSLASH_H: |
| 746 case URX_BACKSLASH_R: |
| 747 case URX_BACKSLASH_V: |
685 // types with an integer operand field. | 748 // types with an integer operand field. |
686 printf("%d", val); | 749 printf("%d", val); |
687 break; | 750 break; |
688 | 751 |
689 case URX_ONECHAR: | 752 case URX_ONECHAR: |
690 case URX_ONECHAR_I: | 753 case URX_ONECHAR_I: |
691 printf("%c", val<256?val:'?'); | 754 printf("%c", val<256?val:'?'); |
692 break; | 755 break; |
693 | 756 |
694 case URX_STRING: | 757 case URX_STRING: |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
739 printf("??????"); | 802 printf("??????"); |
740 break; | 803 break; |
741 } | 804 } |
742 printf("\n"); | 805 printf("\n"); |
743 #endif | 806 #endif |
744 } | 807 } |
745 | 808 |
746 | 809 |
747 void RegexPattern::dumpPattern() const { | 810 void RegexPattern::dumpPattern() const { |
748 #if defined(REGEX_DEBUG) | 811 #if defined(REGEX_DEBUG) |
| 812 // TODO: This function assumes an ASCII based charset. |
749 int index; | 813 int index; |
750 int i; | 814 int i; |
751 | 815 |
752 printf("Original Pattern: "); | 816 printf("Original Pattern: "); |
753 UChar32 c = utext_next32From(fPattern, 0); | 817 UChar32 c = utext_next32From(fPattern, 0); |
754 while (c != U_SENTINEL) { | 818 while (c != U_SENTINEL) { |
755 if (c<32 || c>256) { | 819 if (c<32 || c>256) { |
756 c = '.'; | 820 c = '.'; |
757 } | 821 } |
758 printf("%c", c); | 822 printf("%c", c); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
790 | 854 |
791 } else if (fStartType == START_CHAR) { | 855 } else if (fStartType == START_CHAR) { |
792 printf(" First char of Match : "); | 856 printf(" First char of Match : "); |
793 if (0x20 < fInitialChar && fInitialChar<0x7e) { | 857 if (0x20 < fInitialChar && fInitialChar<0x7e) { |
794 printf("%c\n", fInitialChar); | 858 printf("%c\n", fInitialChar); |
795 } else { | 859 } else { |
796 printf("%#x\n", fInitialChar); | 860 printf("%#x\n", fInitialChar); |
797 } | 861 } |
798 } | 862 } |
799 | 863 |
| 864 printf("Named Capture Groups:\n"); |
| 865 if (uhash_count(fNamedCaptureMap) == 0) { |
| 866 printf(" None\n"); |
| 867 } else { |
| 868 int32_t pos = UHASH_FIRST; |
| 869 const UHashElement *el = NULL; |
| 870 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { |
| 871 const UnicodeString *name = (const UnicodeString *)el->key.pointer; |
| 872 char s[100]; |
| 873 name->extract(0, 99, s, sizeof(s), US_INV); // capture group names
are invariant. |
| 874 int32_t number = el->value.integer; |
| 875 printf(" %d\t%s\n", number, s); |
| 876 } |
| 877 } |
| 878 |
800 printf("\nIndex Binary Type Operand\n" \ | 879 printf("\nIndex Binary Type Operand\n" \ |
801 "-------------------------------------------\n"); | 880 "-------------------------------------------\n"); |
802 for (index = 0; index<fCompiledPat->size(); index++) { | 881 for (index = 0; index<fCompiledPat->size(); index++) { |
803 dumpOp(index); | 882 dumpOp(index); |
804 } | 883 } |
805 printf("\n\n"); | 884 printf("\n\n"); |
806 #endif | 885 #endif |
807 } | 886 } |
808 | 887 |
809 | 888 |
810 | 889 |
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) | 890 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) |
812 | 891 |
813 U_NAMESPACE_END | 892 U_NAMESPACE_END |
814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 893 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
OLD | NEW |