Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(199)

Side by Side Diff: third_party/icu38/source/i18n/rematch.cpp

Issue 40038: Apply a security patch for ICU regex. (http://bugs.icu-project.org/trac/ticke... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 ************************************************************************** 2 **************************************************************************
3 * Copyright (C) 2002-2007 International Business Machines Corporation * 3 * Copyright (C) 2002-2008 International Business Machines Corporation *
4 * and others. All rights reserved. * 4 * and others. All rights reserved. *
5 ************************************************************************** 5 **************************************************************************
6 */ 6 */
7 // 7 //
8 // file: rematch.cpp 8 // file: rematch.cpp
9 // 9 //
10 // Contains the implementation of class RegexMatcher, 10 // Contains the implementation of class RegexMatcher,
11 // which is one of the main API classes for the ICU regular expression p ackage. 11 // which is one of the main API classes for the ICU regular expression p ackage.
12 // 12 //
13 13
14 #include "unicode/utypes.h" 14 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16 16
17 #include "unicode/regex.h" 17 #include "unicode/regex.h"
18 #include "unicode/uniset.h" 18 #include "unicode/uniset.h"
19 #include "unicode/uchar.h" 19 #include "unicode/uchar.h"
20 #include "unicode/ustring.h" 20 #include "unicode/ustring.h"
21 #include "unicode/rbbi.h" 21 #include "unicode/rbbi.h"
22 #include "uassert.h" 22 #include "uassert.h"
23 #include "cmemory.h" 23 #include "cmemory.h"
24 #include "uvector.h" 24 #include "uvector.h"
25 #include "uvectr32.h" 25 #include "uvectr32.h"
26 #include "regeximp.h" 26 #include "regeximp.h"
27 #include "regexst.h" 27 #include "regexst.h"
28 28
29 // #include <malloc.h> // Needed for heapcheck testing 29 // #include <malloc.h> // Needed for heapcheck testing
30 30
31 U_NAMESPACE_BEGIN 31 U_NAMESPACE_BEGIN
32 32
33 // Limit the size of the back track stack, to avoid system failures caused
34 // by heap exhaustion. Units are in 32 bit words, not bytes.
35 // This value puts ICU's limits higher than most other regexp implementations,
36 // which use recursion rather than the heap, and take more storage per
37 // backtrack point.
38 // This constant is _temporary_. Proper API to control the value will added.
39 //
40 static const int32_t BACKTRACK_STACK_CAPACITY = 8000000;
41
33 //----------------------------------------------------------------------------- 42 //-----------------------------------------------------------------------------
34 // 43 //
35 // Constructor and Destructor 44 // Constructor and Destructor
36 // 45 //
37 //----------------------------------------------------------------------------- 46 //-----------------------------------------------------------------------------
38 RegexMatcher::RegexMatcher(const RegexPattern *pat) { 47 RegexMatcher::RegexMatcher(const RegexPattern *pat) {
39 fPattern = pat; 48 fPattern = pat;
40 fPatternOwned = NULL; 49 fPatternOwned = NULL;
41 fInput = NULL; 50 fInput = NULL;
42 fTraceDebug = FALSE; 51 fTraceDebug = FALSE;
43 fDeferredStatus = U_ZERO_ERROR; 52 fDeferredStatus = U_ZERO_ERROR;
44 fStack = new UVector32(fDeferredStatus); 53 fStack = new UVector32(fDeferredStatus);
45 fData = fSmallData; 54 fData = fSmallData;
46 fWordBreakItr = NULL; 55 fWordBreakItr = NULL;
47 if (pat==NULL) { 56 if (pat==NULL) {
48 fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR; 57 fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
49 return; 58 return;
50 } 59 }
51 if (pat->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) { 60 if (pat->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
52 fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t)); 61 fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t));
53 } 62 }
54 if (fStack == NULL || fData == NULL) { 63 if (fStack == NULL || fData == NULL) {
55 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 64 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
65 } else {
66 fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
56 } 67 }
57
58 reset(RegexStaticSets::gStaticSets->fEmptyString); 68 reset(RegexStaticSets::gStaticSets->fEmptyString);
59 } 69 }
60 70
61 71
62 72
63 RegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &inp ut, 73 RegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &inp ut,
64 uint32_t flags, UErrorCode &status) { 74 uint32_t flags, UErrorCode &status) {
65 UParseError pe; 75 UParseError pe;
66 fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 76 fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
67 fPattern = fPatternOwned; 77 fPattern = fPatternOwned;
68 fTraceDebug = FALSE; 78 fTraceDebug = FALSE;
69 fDeferredStatus = U_ZERO_ERROR; 79 fDeferredStatus = U_ZERO_ERROR;
70 fStack = new UVector32(status); 80 fStack = new UVector32(status);
71 fData = fSmallData; 81 fData = fSmallData;
72 fWordBreakItr = NULL; 82 fWordBreakItr = NULL;
73 if (U_FAILURE(status)) { 83 if (U_FAILURE(status)) {
74 return; 84 return;
75 } 85 }
76 if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) { 86 if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
77 fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); 87 fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
78 } 88 }
79 if (fStack == NULL || fData == NULL) { 89 if (fStack == NULL || fData == NULL) {
80 status = U_MEMORY_ALLOCATION_ERROR; 90 status = U_MEMORY_ALLOCATION_ERROR;
91 } else {
92 fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
81 } 93 }
82 reset(input); 94 reset(input);
83 } 95 }
84 96
85 97
86 RegexMatcher::RegexMatcher(const UnicodeString &regexp, 98 RegexMatcher::RegexMatcher(const UnicodeString &regexp,
87 uint32_t flags, UErrorCode &status) { 99 uint32_t flags, UErrorCode &status) {
88 UParseError pe; 100 UParseError pe;
89 fTraceDebug = FALSE; 101 fTraceDebug = FALSE;
90 fDeferredStatus = U_ZERO_ERROR; 102 fDeferredStatus = U_ZERO_ERROR;
91 fStack = new UVector32(status); 103 fStack = new UVector32(status);
92 fData = fSmallData; 104 fData = fSmallData;
93 fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 105 fPatternOwned = RegexPattern::compile(regexp, flags, pe, status);
94 fPattern = fPatternOwned; 106 fPattern = fPatternOwned;
95 fWordBreakItr = NULL; 107 fWordBreakItr = NULL;
96 if (U_FAILURE(status)) { 108 if (U_FAILURE(status)) {
97 return; 109 return;
98 } 110 }
99 111
100 if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) { 112 if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
101 fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); 113 fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
102 } 114 }
103 if (fStack == NULL || fData == NULL) { 115 if (fStack == NULL || fData == NULL) {
104 status = U_MEMORY_ALLOCATION_ERROR; 116 status = U_MEMORY_ALLOCATION_ERROR;
117 } else {
118 fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
105 } 119 }
106 reset(RegexStaticSets::gStaticSets->fEmptyString); 120 reset(RegexStaticSets::gStaticSets->fEmptyString);
107 } 121 }
108 122
109 123
110 124
111 RegexMatcher::~RegexMatcher() { 125 RegexMatcher::~RegexMatcher() {
112 delete fStack; 126 delete fStack;
113 if (fData != fSmallData) { 127 if (fData != fSmallData) {
114 uprv_free(fData); 128 uprv_free(fData);
(...skipping 892 matching lines...) Expand 10 before | Expand all | Expand 10 after
1007 // in the opcode. Execution of the engine continues with the state in 1021 // in the opcode. Execution of the engine continues with the state in
1008 // the newly created stack frame 1022 // the newly created stack frame
1009 // 1023 //
1010 // Note that reserveBlock() may grow the stack, resulting in the 1024 // Note that reserveBlock() may grow the stack, resulting in the
1011 // whole thing being relocated in memory. 1025 // whole thing being relocated in memory.
1012 // 1026 //
1013 //------------------------------------------------------------------------------ -- 1027 //------------------------------------------------------------------------------ --
1014 inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatId x, int32_t frameSize, UErrorCode &status) { 1028 inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatId x, int32_t frameSize, UErrorCode &status) {
1015 // push storage for a new frame. 1029 // push storage for a new frame.
1016 int32_t *newFP = fStack->reserveBlock(frameSize, status); 1030 int32_t *newFP = fStack->reserveBlock(frameSize, status);
1031 if (newFP == NULL) {
1032 // Heap allocation error on attempted stack expansion.
1033 // We need to return a writable stack frame, so just return the
1034 // previous frame. The match operation will stop quickly
1035 // becuase of the error status, after which the frame will never
1036 // be looked at again.
1037 return fp;
1038 }
1017 fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. 1039 fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack.
1018 1040
1019 // New stack frame = copy of old top frame. 1041 // New stack frame = copy of old top frame.
1020 int32_t *source = (int32_t *)fp; 1042 int32_t *source = (int32_t *)fp;
1021 int32_t *dest = newFP; 1043 int32_t *dest = newFP;
1022 for (;;) { 1044 for (;;) {
1023 *dest++ = *source++; 1045 *dest++ = *source++;
1024 if (source == newFP) { 1046 if (source == newFP) {
1025 break; 1047 break;
1026 } 1048 }
1027 } 1049 }
1028 1050
1029 fp->fPatIdx = savePatIdx; 1051 fp->fPatIdx = savePatIdx;
1030 return (REStackFrame *)newFP; 1052 return (REStackFrame *)newFP;
1031 } 1053 }
1032 1054
1033 1055
1034 //------------------------------------------------------------------------------ -- 1056 //------------------------------------------------------------------------------ --
1035 // 1057 //
1036 // MatchAt This is the actual matching engine. 1058 // MatchAt This is the actual matching engine.
1037 // 1059 //
1038 //------------------------------------------------------------------------------ -- 1060 //------------------------------------------------------------------------------ --
1039 void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) { 1061 void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
1040 UBool isMatch = FALSE; // True if the we have a match. 1062 UBool isMatch = FALSE; // True if the we have a match.
1041 1063
1042 int32_t op; // Operation from the compiled pattern, s plit into 1064 int32_t op; // Operation from the compiled pattern, s plit into
1043 int32_t opType; // the opcode 1065 int32_t opType; // the opcode
(...skipping 1210 matching lines...) Expand 10 before | Expand all | Expand 10 after
2254 2276
2255 2277
2256 2278
2257 default: 2279 default:
2258 // Trouble. The compiled pattern contains an entry with an 2280 // Trouble. The compiled pattern contains an entry with an
2259 // unrecognized type tag. 2281 // unrecognized type tag.
2260 U_ASSERT(FALSE); 2282 U_ASSERT(FALSE);
2261 } 2283 }
2262 2284
2263 if (U_FAILURE(status)) { 2285 if (U_FAILURE(status)) {
2286 isMatch = FALSE;
2264 break; 2287 break;
2265 } 2288 }
2266 } 2289 }
2267 2290
2268 breakFromLoop: 2291 breakFromLoop:
2269 fMatch = isMatch; 2292 fMatch = isMatch;
2270 if (isMatch) { 2293 if (isMatch) {
2271 fLastMatchEnd = fMatchEnd; 2294 fLastMatchEnd = fMatchEnd;
2272 fMatchStart = startIdx; 2295 fMatchStart = startIdx;
2273 fMatchEnd = fp->fInputIdx; 2296 fMatchEnd = fp->fInputIdx;
(...skipping 16 matching lines...) Expand all
2290 } 2313 }
2291 2314
2292 2315
2293 2316
2294 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) 2317 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher)
2295 2318
2296 U_NAMESPACE_END 2319 U_NAMESPACE_END
2297 2320
2298 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 2321 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
2299 2322
OLDNEW
« no previous file with comments | « third_party/icu38/source/i18n/regexcmp.cpp ('k') | third_party/icu38/source/test/intltest/regextst.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698