Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1219)

Side by Side Diff: regexp2000/test/cctest/test-regexp.cc

Issue 8871: Experimental RegExp: changed handling of non-standard escape sequences. (Closed)
Patch Set: RegExp escape handling, with review comments. Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « regexp2000/src/parser.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
64 64
65 65
66 #ifdef USE_FUZZ_TEST_DATA 66 #ifdef USE_FUZZ_TEST_DATA
67 #include "regexp-test-data.cc" 67 #include "regexp-test-data.cc"
68 #else 68 #else
69 static const int kCaseCount = 0; 69 static const int kCaseCount = 0;
70 static const RegExpTestCase kCases[1] = { RegExpTestCase() }; 70 static const RegExpTestCase kCases[1] = { RegExpTestCase() };
71 #endif 71 #endif
72 72
73 73
74 static void ExpectParse(const char* input, 74 static SmartPointer<char> Parse(const char* input) {
75 const char* expected) {
76 v8::HandleScope scope; 75 v8::HandleScope scope;
77 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 76 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
78 ZoneScope zone_scope(DELETE_ON_EXIT); 77 ZoneScope zone_scope(DELETE_ON_EXIT);
79 Handle<String> error; 78 Handle<String> error;
80 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); 79 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error);
81 CHECK(node != NULL); 80 CHECK(node != NULL);
82 CHECK(error.is_null()); 81 CHECK(error.is_null());
83 SmartPointer<char> output = node->ToString(); 82 SmartPointer<char> output = node->ToString();
84 CHECK_EQ(expected, *output); 83 return output;
85 } 84 }
86 85
87 86
87 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
88
89
88 TEST(Parser) { 90 TEST(Parser) {
89 V8::Initialize(NULL); 91 V8::Initialize(NULL);
90 ExpectParse("abc", "'abc'"); 92 CHECK_PARSE_EQ("abc", "'abc'");
91 ExpectParse("", "%"); 93 CHECK_PARSE_EQ("", "%");
92 ExpectParse("abc|def", "(| 'abc' 'def')"); 94 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
93 ExpectParse("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 95 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
94 ExpectParse("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])"); 96 CHECK_PARSE_EQ("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])");
95 ExpectParse("^xxx$", "(: @^i 'xxx' @$i)"); 97 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
96 ExpectParse("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); 98 CHECK_PARSE_EQ("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')");
97 ExpectParse("\\w|\\s|.", "(| [&w] [&s] [&.])"); 99 CHECK_PARSE_EQ("\\w|\\s|.", "(| [&w] [&s] [&.])");
98 ExpectParse("a*", "(# 0 - g 'a')"); 100 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
99 ExpectParse("a*?", "(# 0 - n 'a')"); 101 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
100 ExpectParse("abc+", "(# 1 - g 'abc')"); 102 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')");
101 ExpectParse("abc+?", "(# 1 - n 'abc')"); 103 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')");
102 ExpectParse("xyz?", "(# 0 1 g 'xyz')"); 104 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')");
103 ExpectParse("xyz??", "(# 0 1 n 'xyz')"); 105 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')");
104 ExpectParse("xyz{0,1}", "(# 0 1 g 'xyz')"); 106 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')");
105 ExpectParse("xyz{0,1}?", "(# 0 1 n 'xyz')"); 107 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')");
106 ExpectParse("xyz{93}", "(# 93 93 g 'xyz')"); 108 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')");
107 ExpectParse("xyz{93}?", "(# 93 93 n 'xyz')"); 109 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')");
108 ExpectParse("xyz{1,32}", "(# 1 32 g 'xyz')"); 110 CHECK_PARSE_EQ("xyz{1,32}", "(# 1 32 g 'xyz')");
109 ExpectParse("xyz{1,32}?", "(# 1 32 n 'xyz')"); 111 CHECK_PARSE_EQ("xyz{1,32}?", "(# 1 32 n 'xyz')");
110 ExpectParse("xyz{1,}", "(# 1 - g 'xyz')"); 112 CHECK_PARSE_EQ("xyz{1,}", "(# 1 - g 'xyz')");
111 ExpectParse("xyz{1,}?", "(# 1 - n 'xyz')"); 113 CHECK_PARSE_EQ("xyz{1,}?", "(# 1 - n 'xyz')");
112 ExpectParse("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'"); 114 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
113 ExpectParse("a\\nb\\bc", "(: 'a\nb' @b 'c')"); 115 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')");
114 ExpectParse("(?:foo)", "'foo'"); 116 CHECK_PARSE_EQ("(?:foo)", "'foo'");
115 ExpectParse("(?: foo )", "' foo '"); 117 CHECK_PARSE_EQ("(?: foo )", "' foo '");
116 ExpectParse("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 118 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
117 ExpectParse("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 119 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
118 ExpectParse("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 120 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
119 ExpectParse("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 121 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
120 ExpectParse("()", "(^ %)"); 122 CHECK_PARSE_EQ("()", "(^ %)");
121 ExpectParse("(?=)", "(-> + %)"); 123 CHECK_PARSE_EQ("(?=)", "(-> + %)");
122 ExpectParse("[]", "%"); 124 CHECK_PARSE_EQ("[]", "%");
123 ExpectParse("[x]", "[x]"); 125 CHECK_PARSE_EQ("[x]", "[x]");
124 ExpectParse("[xyz]", "[x y z]"); 126 CHECK_PARSE_EQ("[xyz]", "[x y z]");
125 ExpectParse("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 127 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
126 ExpectParse("[-123]", "[- 1 2 3]"); 128 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
127 ExpectParse("[^123]", "^[1 2 3]"); 129 CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
128 ExpectParse("]", "']'"); 130 CHECK_PARSE_EQ("]", "']'");
129 ExpectParse("}", "'}'"); 131 CHECK_PARSE_EQ("}", "'}'");
130 ExpectParse("[a-b-c]", "[a-b - c]"); 132 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
131 ExpectParse("[\\w]", "[&w]"); 133 CHECK_PARSE_EQ("[\\w]", "[&w]");
132 ExpectParse("[x\\wz]", "[x &w z]"); 134 CHECK_PARSE_EQ("[x\\wz]", "[x &w z]");
133 ExpectParse("[\\w-z]", "[&w - z]"); 135 CHECK_PARSE_EQ("[\\w-z]", "[&w - z]");
134 ExpectParse("[\\w-\\d]", "[&w - &d]"); 136 CHECK_PARSE_EQ("[\\w-\\d]", "[&w - &d]");
135 ExpectParse("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); 137 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
136 ExpectParse("[a\\]c]", "[a ] c]"); 138 CHECK_PARSE_EQ("\\c!", "'c!'");
137 ExpectParse("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 139 CHECK_PARSE_EQ("\\c_", "'c_'");
138 ExpectParse("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 140 CHECK_PARSE_EQ("\\c~", "'c~'");
139 ExpectParse("\\0", "'\0'"); 141 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
140 ExpectParse("\\11", "'\t'"); 142 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
141 ExpectParse("\\11a", "'\ta'"); 143 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
142 ExpectParse("\\011", "'\t'"); 144 CHECK_PARSE_EQ("\\0", "'\0'");
143 ExpectParse("\\00011", "'\t'"); 145 CHECK_PARSE_EQ("\\8", "'8'");
144 ExpectParse("\\118", "'\t8'"); 146 CHECK_PARSE_EQ("\\9", "'9'");
145 ExpectParse("\\111", "'I'"); 147 CHECK_PARSE_EQ("\\11", "'\t'");
146 ExpectParse("\\1111", "'I1'"); 148 CHECK_PARSE_EQ("\\11a", "'\ta'");
147 ExpectParse("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))"); 149 CHECK_PARSE_EQ("\\011", "'\t'");
148 ExpectParse("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))"); 150 CHECK_PARSE_EQ("\\00011", "'\00011'");
149 ExpectParse("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))"); 151 CHECK_PARSE_EQ("\\118", "'\t8'");
150 ExpectParse("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')"); 152 CHECK_PARSE_EQ("\\111", "'I'");
151 ExpectParse("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 153 CHECK_PARSE_EQ("\\1111", "'I1'");
154 CHECK_PARSE_EQ("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))");
155 CHECK_PARSE_EQ("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))");
156 CHECK_PARSE_EQ("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))");
157 CHECK_PARSE_EQ("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')");
158 CHECK_PARSE_EQ("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
152 " (# 0 - g (<- 1)))"); 159 " (# 0 - g (<- 1)))");
153 ExpectParse("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 160 CHECK_PARSE_EQ("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
154 " (# 0 - g (<- 2)))"); 161 " (# 0 - g (<- 2)))");
155 ExpectParse("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 162 CHECK_PARSE_EQ("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
156 " (# 0 - g (<- 3)))"); 163 " (# 0 - g (<- 3)))");
157 ExpectParse("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 164 CHECK_PARSE_EQ("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
158 " (# 0 - g '\x04'))"); 165 " (# 0 - g '\x04'))");
159 ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10", 166 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10",
160 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" 167 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])"
161 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))"); 168 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))");
162 ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11", 169 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11",
163 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" 170 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])"
164 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')"); 171 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')");
165 ExpectParse("[\\0]", "[\0]"); 172 CHECK_PARSE_EQ("[\\0]", "[\0]");
166 ExpectParse("[\\11]", "[\t]"); 173 CHECK_PARSE_EQ("[\\11]", "[\t]");
167 ExpectParse("[\\11a]", "[\t a]"); 174 CHECK_PARSE_EQ("[\\11a]", "[\t a]");
168 ExpectParse("[\\011]", "[\t]"); 175 CHECK_PARSE_EQ("[\\011]", "[\t]");
169 ExpectParse("[\\00011]", "[\t]"); 176 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
170 ExpectParse("[\\118]", "[\t 8]"); 177 CHECK_PARSE_EQ("[\\118]", "[\t 8]");
171 ExpectParse("[\\111]", "[I]"); 178 CHECK_PARSE_EQ("[\\111]", "[I]");
172 ExpectParse("[\\1111]", "[I 1]"); 179 CHECK_PARSE_EQ("[\\1111]", "[I 1]");
173 ExpectParse("\\x34", "'\x34'"); 180 CHECK_PARSE_EQ("\\x34", "'\x34'");
174 ExpectParse("\\x3z", "'\x03z'"); 181 CHECK_PARSE_EQ("\\x3z", "'x3z'");
182 CHECK_PARSE_EQ("\\u0034", "'\x34'");
183 CHECK_PARSE_EQ("\\u003z", "'u003z'");
175 } 184 }
176 185
177 186
178 static void ExpectError(const char* input, 187 static void ExpectError(const char* input,
179 const char* expected) { 188 const char* expected) {
180 v8::HandleScope scope; 189 v8::HandleScope scope;
181 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 190 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
182 ZoneScope zone_scope(DELETE_ON_EXIT); 191 ZoneScope zone_scope(DELETE_ON_EXIT);
183 Handle<String> error; 192 Handle<String> error;
184 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); 193 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error);
(...skipping 21 matching lines...) Expand all
206 ExpectError("(foo", kUnterminatedGroup); 215 ExpectError("(foo", kUnterminatedGroup);
207 const char* kInvalidGroup = "Invalid group"; 216 const char* kInvalidGroup = "Invalid group";
208 ExpectError("(?", kInvalidGroup); 217 ExpectError("(?", kInvalidGroup);
209 const char* kUnterminatedCharacterClass = "Unterminated character class"; 218 const char* kUnterminatedCharacterClass = "Unterminated character class";
210 ExpectError("[", kUnterminatedCharacterClass); 219 ExpectError("[", kUnterminatedCharacterClass);
211 ExpectError("[a-", kUnterminatedCharacterClass); 220 ExpectError("[a-", kUnterminatedCharacterClass);
212 const char* kIllegalCharacterClass = "Illegal character class"; 221 const char* kIllegalCharacterClass = "Illegal character class";
213 ExpectError("[a-\\w]", kIllegalCharacterClass); 222 ExpectError("[a-\\w]", kIllegalCharacterClass);
214 const char* kEndControl = "\\c at end of pattern"; 223 const char* kEndControl = "\\c at end of pattern";
215 ExpectError("\\c", kEndControl); 224 ExpectError("\\c", kEndControl);
216 const char* kIllegalControl = "Illegal control letter";
217 ExpectError("\\c!", kIllegalControl);
218 } 225 }
219 226
220 227
221 static void Execute(bool expected, const char* input, const char* str) { 228 static void Execute(bool expected, const char* input, const char* str) {
222 v8::HandleScope scops; 229 v8::HandleScope scops;
223 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 230 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
224 ZoneScope zone_scope(DELETE_ON_EXIT); 231 ZoneScope zone_scope(DELETE_ON_EXIT);
225 Handle<String> error; 232 Handle<String> error;
226 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); 233 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error);
227 CHECK(tree != NULL); 234 CHECK(tree != NULL);
(...skipping 28 matching lines...) Expand all
256 } else { 263 } else {
257 CHECK(node != NULL); 264 CHECK(node != NULL);
258 CHECK(error.is_null()); 265 CHECK(error.is_null());
259 } 266 }
260 } 267 }
261 } 268 }
262 269
263 270
264 // "123456789abcdb".match(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(\11)/) 271 // "123456789abcdb".match(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(\11)/)
265 // 123456789abcdb,1,2,3,4,5,6,7,8,9,a,b,c,d,b 272 // 123456789abcdb,1,2,3,4,5,6,7,8,9,a,b,c,d,b
OLDNEW
« no previous file with comments | « regexp2000/src/parser.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698