Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(578)

Side by Side Diff: source/i18n/repattrn.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/rematch.cpp ('k') | source/i18n/rulebasedcollator.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // file: repattrn.cpp 2 // file: repattrn.cpp
3 // 3 //
4 /* 4 /*
5 *************************************************************************** 5 ***************************************************************************
6 * Copyright (C) 2002-2013 International Business Machines Corporation * 6 * Copyright (C) 2002-2015 International Business Machines Corporation *
7 * and others. All rights reserved. * 7 * and others. All rights reserved. *
8 *************************************************************************** 8 ***************************************************************************
9 */ 9 */
10 10
11 #include "unicode/utypes.h" 11 #include "unicode/utypes.h"
12 12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 14
15 #include "unicode/regex.h" 15 #include "unicode/regex.h"
16 #include "unicode/uclean.h" 16 #include "unicode/uclean.h"
17 #include "uassert.h" 17 #include "uassert.h"
18 #include "uhash.h"
18 #include "uvector.h" 19 #include "uvector.h"
19 #include "uvectr32.h" 20 #include "uvectr32.h"
20 #include "uvectr64.h" 21 #include "uvectr64.h"
21 #include "regexcmp.h" 22 #include "regexcmp.h"
22 #include "regeximp.h" 23 #include "regeximp.h"
23 #include "regexst.h" 24 #include "regexst.h"
24 25
25 U_NAMESPACE_BEGIN 26 U_NAMESPACE_BEGIN
26 27
27 //-------------------------------------------------------------------------- 28 //--------------------------------------------------------------------------
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 return *this; 60 return *this;
60 } 61 }
61 62
62 // Clean out any previous contents of object being assigned to. 63 // Clean out any previous contents of object being assigned to.
63 zap(); 64 zap();
64 65
65 // Give target object a default initialization 66 // Give target object a default initialization
66 init(); 67 init();
67 68
68 // Copy simple fields 69 // Copy simple fields
69 if ( other.fPatternString == NULL ) { 70 fDeferredStatus = other.fDeferredStatus;
71
72 if (U_FAILURE(fDeferredStatus)) {
73 return *this;
74 }
75
76 if (other.fPatternString == NULL) {
70 fPatternString = NULL; 77 fPatternString = NULL;
71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDef erredStatus); 78 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferred Status);
72 } else { 79 } else {
73 fPatternString = new UnicodeString(*(other.fPatternString)); 80 fPatternString = new UnicodeString(*(other.fPatternString));
74 UErrorCode status = U_ZERO_ERROR; 81 if (fPatternString == NULL) {
75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &stat us);
76 if (U_FAILURE(status)) {
77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 82 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
78 return *this; 83 } else {
84 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDefe rredStatus);
79 } 85 }
80 } 86 }
87 if (U_FAILURE(fDeferredStatus)) {
88 return *this;
89 }
90
81 fFlags = other.fFlags; 91 fFlags = other.fFlags;
82 fLiteralText = other.fLiteralText; 92 fLiteralText = other.fLiteralText;
83 fDeferredStatus = other.fDeferredStatus;
84 fMinMatchLen = other.fMinMatchLen; 93 fMinMatchLen = other.fMinMatchLen;
85 fFrameSize = other.fFrameSize; 94 fFrameSize = other.fFrameSize;
86 fDataSize = other.fDataSize; 95 fDataSize = other.fDataSize;
87 fMaxCaptureDigits = other.fMaxCaptureDigits;
88 fStaticSets = other.fStaticSets; 96 fStaticSets = other.fStaticSets;
89 fStaticSets8 = other.fStaticSets8; 97 fStaticSets8 = other.fStaticSets8;
90 98
91 fStartType = other.fStartType; 99 fStartType = other.fStartType;
92 fInitialStringIdx = other.fInitialStringIdx; 100 fInitialStringIdx = other.fInitialStringIdx;
93 fInitialStringLen = other.fInitialStringLen; 101 fInitialStringLen = other.fInitialStringLen;
94 *fInitialChars = *other.fInitialChars; 102 *fInitialChars = *other.fInitialChars;
95 fInitialChar = other.fInitialChar; 103 fInitialChar = other.fInitialChar;
96 *fInitialChars8 = *other.fInitialChars8; 104 *fInitialChars8 = *other.fInitialChars8;
97 fNeedsAltInput = other.fNeedsAltInput; 105 fNeedsAltInput = other.fNeedsAltInput;
(...skipping 20 matching lines...) Expand all
118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 126 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
119 UnicodeSet *newSet = new UnicodeSet(*sourceSet); 127 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
120 if (newSet == NULL) { 128 if (newSet == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 129 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 break; 130 break;
123 } 131 }
124 fSets->addElement(newSet, fDeferredStatus); 132 fSets->addElement(newSet, fDeferredStatus);
125 fSets8[i] = other.fSets8[i]; 133 fSets8[i] = other.fSets8[i];
126 } 134 }
127 135
136 // Copy the named capture group hash map.
137 int32_t hashPos = UHASH_FIRST;
138 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap , &hashPos)) {
139 if (U_FAILURE(fDeferredStatus)) {
140 break;
141 }
142 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
143 UnicodeString *key = new UnicodeString(*name);
144 int32_t val = hashEl->value.integer;
145 if (key == NULL) {
146 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
147 } else {
148 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
149 }
150 }
128 return *this; 151 return *this;
129 } 152 }
130 153
131 154
132 //-------------------------------------------------------------------------- 155 //--------------------------------------------------------------------------
133 // 156 //
134 // init Shared initialization for use by constructors. 157 // init Shared initialization for use by constructors.
135 // Bring an uninitialized RegexPattern up to a default state. 158 // Bring an uninitialized RegexPattern up to a default state.
136 // 159 //
137 //-------------------------------------------------------------------------- 160 //--------------------------------------------------------------------------
138 void RegexPattern::init() { 161 void RegexPattern::init() {
139 fFlags = 0; 162 fFlags = 0;
140 fCompiledPat = 0; 163 fCompiledPat = 0;
141 fLiteralText.remove(); 164 fLiteralText.remove();
142 fSets = NULL; 165 fSets = NULL;
143 fSets8 = NULL; 166 fSets8 = NULL;
144 fDeferredStatus = U_ZERO_ERROR; 167 fDeferredStatus = U_ZERO_ERROR;
145 fMinMatchLen = 0; 168 fMinMatchLen = 0;
146 fFrameSize = 0; 169 fFrameSize = 0;
147 fDataSize = 0; 170 fDataSize = 0;
148 fGroupMap = NULL; 171 fGroupMap = NULL;
149 fMaxCaptureDigits = 1;
150 fStaticSets = NULL; 172 fStaticSets = NULL;
151 fStaticSets8 = NULL; 173 fStaticSets8 = NULL;
152 fStartType = START_NO_INFO; 174 fStartType = START_NO_INFO;
153 fInitialStringIdx = 0; 175 fInitialStringIdx = 0;
154 fInitialStringLen = 0; 176 fInitialStringLen = 0;
155 fInitialChars = NULL; 177 fInitialChars = NULL;
156 fInitialChar = 0; 178 fInitialChar = 0;
157 fInitialChars8 = NULL; 179 fInitialChars8 = NULL;
158 fNeedsAltInput = FALSE; 180 fNeedsAltInput = FALSE;
181 fNamedCaptureMap = NULL;
159 182
160 fPattern = NULL; // will be set later 183 fPattern = NULL; // will be set later
161 fPatternString = NULL; // may be set later 184 fPatternString = NULL; // may be set later
162 fCompiledPat = new UVector64(fDeferredStatus); 185 fCompiledPat = new UVector64(fDeferredStatus);
163 fGroupMap = new UVector32(fDeferredStatus); 186 fGroupMap = new UVector32(fDeferredStatus);
164 fSets = new UVector(fDeferredStatus); 187 fSets = new UVector(fDeferredStatus);
165 fInitialChars = new UnicodeSet; 188 fInitialChars = new UnicodeSet;
166 fInitialChars8 = new Regex8BitSet; 189 fInitialChars8 = new Regex8BitSet;
190 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash func tion
191 uhash_compareUnicodeString, // Key comparato r function
192 uhash_compareLong, // Value compara tor function
193 &fDeferredStatus);
167 if (U_FAILURE(fDeferredStatus)) { 194 if (U_FAILURE(fDeferredStatus)) {
168 return; 195 return;
169 } 196 }
170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 197 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
171 fInitialChars == NULL || fInitialChars8 == NULL) { 198 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 199 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
173 return; 200 return;
174 } 201 }
175 202
176 // Slot zero of the vector of sets is reserved. Fill it here. 203 // Slot zero of the vector of sets is reserved. Fill it here.
177 fSets->addElement((int32_t)0, fDeferredStatus); 204 fSets->addElement((int32_t)0, fDeferredStatus);
205
206 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
207 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
178 } 208 }
179 209
180 210
181 //-------------------------------------------------------------------------- 211 //--------------------------------------------------------------------------
182 // 212 //
183 // zap Delete everything owned by this RegexPattern. 213 // zap Delete everything owned by this RegexPattern.
184 // 214 //
185 //-------------------------------------------------------------------------- 215 //--------------------------------------------------------------------------
186 void RegexPattern::zap() { 216 void RegexPattern::zap() {
187 delete fCompiledPat; 217 delete fCompiledPat;
(...skipping 17 matching lines...) Expand all
205 delete fInitialChars8; 235 delete fInitialChars8;
206 fInitialChars8 = NULL; 236 fInitialChars8 = NULL;
207 if (fPattern != NULL) { 237 if (fPattern != NULL) {
208 utext_close(fPattern); 238 utext_close(fPattern);
209 fPattern = NULL; 239 fPattern = NULL;
210 } 240 }
211 if (fPatternString != NULL) { 241 if (fPatternString != NULL) {
212 delete fPatternString; 242 delete fPatternString;
213 fPatternString = NULL; 243 fPatternString = NULL;
214 } 244 }
245 uhash_close(fNamedCaptureMap);
246 fNamedCaptureMap = NULL;
215 } 247 }
216 248
217 249
218 //-------------------------------------------------------------------------- 250 //--------------------------------------------------------------------------
219 // 251 //
220 // Destructor 252 // Destructor
221 // 253 //
222 //-------------------------------------------------------------------------- 254 //--------------------------------------------------------------------------
223 RegexPattern::~RegexPattern() { 255 RegexPattern::~RegexPattern() {
224 zap(); 256 zap();
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
562 594
563 if (fPattern != NULL) { 595 if (fPattern != NULL) {
564 return fPattern; 596 return fPattern;
565 } else { 597 } else {
566 RegexStaticSets::initGlobals(&status); 598 RegexStaticSets::initGlobals(&status);
567 return RegexStaticSets::gStaticSets->fEmptyText; 599 return RegexStaticSets::gStaticSets->fEmptyText;
568 } 600 }
569 } 601 }
570 602
571 603
604 //------------------------------------------------------------------------------ --
605 //
606 // groupNumberFromName()
607 //
608 //------------------------------------------------------------------------------ --
609 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UError Code &status) const {
610 if (U_FAILURE(status)) {
611 return 0;
612 }
613
614 // No need to explicitly check for syntactically valid names.
615 // Invalid ones will never be in the map, and the lookup will fail.
616
617 int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
618 if (number == 0) {
619 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
620 }
621 return number;
622 }
623
624 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLen gth, UErrorCode &status) const {
625 if (U_FAILURE(status)) {
626 return 0;
627 }
628 UnicodeString name(groupName, nameLength, US_INV);
629 return groupNumberFromName(name, status);
630 }
631
572 632
573 //--------------------------------------------------------------------- 633 //---------------------------------------------------------------------
574 // 634 //
575 // split 635 // split
576 // 636 //
577 //--------------------------------------------------------------------- 637 //---------------------------------------------------------------------
578 int32_t RegexPattern::split(const UnicodeString &input, 638 int32_t RegexPattern::split(const UnicodeString &input,
579 UnicodeString dest[], 639 UnicodeString dest[],
580 int32_t destCapacity, 640 int32_t destCapacity,
581 UErrorCode &status) const 641 UErrorCode &status) const
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
675 case URX_LA_START: 735 case URX_LA_START:
676 case URX_LA_END: 736 case URX_LA_END:
677 case URX_BACKREF_I: 737 case URX_BACKREF_I:
678 case URX_LB_START: 738 case URX_LB_START:
679 case URX_LB_CONT: 739 case URX_LB_CONT:
680 case URX_LB_END: 740 case URX_LB_END:
681 case URX_LBN_CONT: 741 case URX_LBN_CONT:
682 case URX_LBN_END: 742 case URX_LBN_END:
683 case URX_LOOP_C: 743 case URX_LOOP_C:
684 case URX_LOOP_DOT_I: 744 case URX_LOOP_DOT_I:
745 case URX_BACKSLASH_H:
746 case URX_BACKSLASH_R:
747 case URX_BACKSLASH_V:
685 // types with an integer operand field. 748 // types with an integer operand field.
686 printf("%d", val); 749 printf("%d", val);
687 break; 750 break;
688 751
689 case URX_ONECHAR: 752 case URX_ONECHAR:
690 case URX_ONECHAR_I: 753 case URX_ONECHAR_I:
691 printf("%c", val<256?val:'?'); 754 printf("%c", val<256?val:'?');
692 break; 755 break;
693 756
694 case URX_STRING: 757 case URX_STRING:
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
739 printf("??????"); 802 printf("??????");
740 break; 803 break;
741 } 804 }
742 printf("\n"); 805 printf("\n");
743 #endif 806 #endif
744 } 807 }
745 808
746 809
747 void RegexPattern::dumpPattern() const { 810 void RegexPattern::dumpPattern() const {
748 #if defined(REGEX_DEBUG) 811 #if defined(REGEX_DEBUG)
812 // TODO: This function assumes an ASCII based charset.
749 int index; 813 int index;
750 int i; 814 int i;
751 815
752 printf("Original Pattern: "); 816 printf("Original Pattern: ");
753 UChar32 c = utext_next32From(fPattern, 0); 817 UChar32 c = utext_next32From(fPattern, 0);
754 while (c != U_SENTINEL) { 818 while (c != U_SENTINEL) {
755 if (c<32 || c>256) { 819 if (c<32 || c>256) {
756 c = '.'; 820 c = '.';
757 } 821 }
758 printf("%c", c); 822 printf("%c", c);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
790 854
791 } else if (fStartType == START_CHAR) { 855 } else if (fStartType == START_CHAR) {
792 printf(" First char of Match : "); 856 printf(" First char of Match : ");
793 if (0x20 < fInitialChar && fInitialChar<0x7e) { 857 if (0x20 < fInitialChar && fInitialChar<0x7e) {
794 printf("%c\n", fInitialChar); 858 printf("%c\n", fInitialChar);
795 } else { 859 } else {
796 printf("%#x\n", fInitialChar); 860 printf("%#x\n", fInitialChar);
797 } 861 }
798 } 862 }
799 863
864 printf("Named Capture Groups:\n");
865 if (uhash_count(fNamedCaptureMap) == 0) {
866 printf(" None\n");
867 } else {
868 int32_t pos = UHASH_FIRST;
869 const UHashElement *el = NULL;
870 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
871 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
872 char s[100];
873 name->extract(0, 99, s, sizeof(s), US_INV); // capture group names are invariant.
874 int32_t number = el->value.integer;
875 printf(" %d\t%s\n", number, s);
876 }
877 }
878
800 printf("\nIndex Binary Type Operand\n" \ 879 printf("\nIndex Binary Type Operand\n" \
801 "-------------------------------------------\n"); 880 "-------------------------------------------\n");
802 for (index = 0; index<fCompiledPat->size(); index++) { 881 for (index = 0; index<fCompiledPat->size(); index++) {
803 dumpOp(index); 882 dumpOp(index);
804 } 883 }
805 printf("\n\n"); 884 printf("\n\n");
806 #endif 885 #endif
807 } 886 }
808 887
809 888
810 889
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 890 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
812 891
813 U_NAMESPACE_END 892 U_NAMESPACE_END
814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 893 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
OLDNEW
« no previous file with comments | « source/i18n/rematch.cpp ('k') | source/i18n/rulebasedcollator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698