Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: regexp2000/test/cctest/test-regexp.cc

Issue 9110: Experimental: Fixed bug in RegExp Parser. Added feature counting in parser. (Closed)
Patch Set: Merged changes to tip of experimental branch. Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « regexp2000/src/parser.cc ('k') | regexp2000/test/mjsunit/non-ascii-replace.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
64 const char* input_; 64 const char* input_;
65 const char* compile_error_; 65 const char* compile_error_;
66 }; 66 };
67 67
68 68
69 static SmartPointer<char> Parse(const char* input) { 69 static SmartPointer<char> Parse(const char* input) {
70 v8::HandleScope scope; 70 v8::HandleScope scope;
71 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 71 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
72 ZoneScope zone_scope(DELETE_ON_EXIT); 72 ZoneScope zone_scope(DELETE_ON_EXIT);
73 Handle<String> error; 73 Handle<String> error;
74 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); 74 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
75 CHECK(node != NULL); 75 CHECK(node != NULL);
76 CHECK(error.is_null()); 76 CHECK(error.is_null());
77 SmartPointer<char> output = node->ToString(); 77 SmartPointer<char> output = node->ToString();
78 return output; 78 return output;
79 } 79 }
80 80
81 static bool ParseEscapes(const char* input) {
82 v8::HandleScope scope;
83 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
84 ZoneScope zone_scope(DELETE_ON_EXIT);
85 Handle<String> error;
86 bool has_escapes;
87 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, &has_escapes);
88 CHECK(node != NULL);
89 CHECK(error.is_null());
90 return has_escapes;
91 }
92
81 93
82 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) 94 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
83 95 #define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \
96 ParseEscapes(input));
84 97
85 TEST(Parser) { 98 TEST(Parser) {
86 V8::Initialize(NULL); 99 V8::Initialize(NULL);
87 CHECK_PARSE_EQ("abc", "'abc'"); 100 CHECK_PARSE_EQ("abc", "'abc'");
88 CHECK_PARSE_EQ("", "%"); 101 CHECK_PARSE_EQ("", "%");
89 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); 102 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
90 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 103 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
91 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); 104 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
92 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); 105 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
93 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); 106 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
94 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); 107 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
95 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); 108 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
96 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')"); 109 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
97 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')"); 110 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
98 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')"); 111 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
99 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')"); 112 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
100 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')"); 113 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
101 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')"); 114 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
102 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')"); 115 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
103 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')"); 116 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
104 CHECK_PARSE_EQ("xyz{1,32}", "(# 1 32 g 'xyz')"); 117 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
105 CHECK_PARSE_EQ("xyz{1,32}?", "(# 1 32 n 'xyz')"); 118 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
106 CHECK_PARSE_EQ("xyz{1,}", "(# 1 - g 'xyz')"); 119 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
107 CHECK_PARSE_EQ("xyz{1,}?", "(# 1 - n 'xyz')"); 120 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
108 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'"); 121 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
109 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')"); 122 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')");
110 CHECK_PARSE_EQ("(?:foo)", "'foo'"); 123 CHECK_PARSE_EQ("(?:foo)", "'foo'");
111 CHECK_PARSE_EQ("(?: foo )", "' foo '"); 124 CHECK_PARSE_EQ("(?: foo )", "' foo '");
112 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 125 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
113 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 126 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
114 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 127 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
115 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 128 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
116 CHECK_PARSE_EQ("()", "(^ %)"); 129 CHECK_PARSE_EQ("()", "(^ %)");
117 CHECK_PARSE_EQ("(?=)", "(-> + %)"); 130 CHECK_PARSE_EQ("(?=)", "(-> + %)");
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')" 169 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
157 " (# 0 - g (<- 3)))"); 170 " (# 0 - g (<- 3)))");
158 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')" 171 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
159 " (# 0 - g '\x04'))"); 172 " (# 0 - g '\x04'))");
160 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", 173 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
161 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 174 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
162 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); 175 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
163 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", 176 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
164 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 177 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
165 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')"); 178 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')");
179 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
180 CHECK_PARSE_EQ("(a\\1)", "(^ (: 'a' (<- 1)))");
181 CHECK_PARSE_EQ("(\\1a)", "(^ (: (<- 1) 'a'))");
182 CHECK_PARSE_EQ("\\1(a)", "(: '\x01' (^ 'a'))");
166 CHECK_PARSE_EQ("[\\0]", "[\0]"); 183 CHECK_PARSE_EQ("[\\0]", "[\0]");
167 CHECK_PARSE_EQ("[\\11]", "[\t]"); 184 CHECK_PARSE_EQ("[\\11]", "[\t]");
168 CHECK_PARSE_EQ("[\\11a]", "[\t a]"); 185 CHECK_PARSE_EQ("[\\11a]", "[\t a]");
169 CHECK_PARSE_EQ("[\\011]", "[\t]"); 186 CHECK_PARSE_EQ("[\\011]", "[\t]");
170 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]"); 187 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
171 CHECK_PARSE_EQ("[\\118]", "[\t 8]"); 188 CHECK_PARSE_EQ("[\\118]", "[\t 8]");
172 CHECK_PARSE_EQ("[\\111]", "[I]"); 189 CHECK_PARSE_EQ("[\\111]", "[I]");
173 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); 190 CHECK_PARSE_EQ("[\\1111]", "[I 1]");
174 CHECK_PARSE_EQ("\\x34", "'\x34'"); 191 CHECK_PARSE_EQ("\\x34", "'\x34'");
192 CHECK_PARSE_EQ("\\x60", "'\x60'");
175 CHECK_PARSE_EQ("\\x3z", "'x3z'"); 193 CHECK_PARSE_EQ("\\x3z", "'x3z'");
176 CHECK_PARSE_EQ("\\u0034", "'\x34'"); 194 CHECK_PARSE_EQ("\\u0034", "'\x34'");
177 CHECK_PARSE_EQ("\\u003z", "'u003z'"); 195 CHECK_PARSE_EQ("\\u003z", "'u003z'");
196
197 CHECK_ESCAPES("a", false);
198 CHECK_ESCAPES("a|b", false);
199 CHECK_ESCAPES("a\\n", true);
200 CHECK_ESCAPES("^a", false);
201 CHECK_ESCAPES("a$", false);
202 CHECK_ESCAPES("a\\b!", false);
203 CHECK_ESCAPES("a\\Bb", false);
204 CHECK_ESCAPES("a*", false);
205 CHECK_ESCAPES("a*?", false);
206 CHECK_ESCAPES("a?", false);
207 CHECK_ESCAPES("a??", false);
208 CHECK_ESCAPES("a{0,1}?", false);
209 CHECK_ESCAPES("a{1,1}?", false);
210 CHECK_ESCAPES("a{1,2}?", false);
211 CHECK_ESCAPES("a+?", false);
212 CHECK_ESCAPES("(a)", false);
213 CHECK_ESCAPES("(a)\\1", false);
214 CHECK_ESCAPES("(\\1a)", false);
215 CHECK_ESCAPES("\\1(a)", true);
216 CHECK_ESCAPES("a\\s", false);
217 CHECK_ESCAPES("a\\S", false);
218 CHECK_ESCAPES("a\\d", false);
219 CHECK_ESCAPES("a\\D", false);
220 CHECK_ESCAPES("a\\w", false);
221 CHECK_ESCAPES("a\\W", false);
222 CHECK_ESCAPES("a.", false);
223 CHECK_ESCAPES("a\\q", true);
224 CHECK_ESCAPES("a[a]", false);
225 CHECK_ESCAPES("a[^a]", false);
226 CHECK_ESCAPES("a[a-z]", false);
227 CHECK_ESCAPES("a[\\q]", false);
228 CHECK_ESCAPES("a(?:b)", false);
229 CHECK_ESCAPES("a(?=b)", false);
230 CHECK_ESCAPES("a(?!b)", false);
231 CHECK_ESCAPES("\\x60", true);
232 CHECK_ESCAPES("\\u0060", true);
233 CHECK_ESCAPES("\\cA", true);
234 CHECK_ESCAPES("\\q", true);
235 CHECK_ESCAPES("\\1112", true);
236 CHECK_ESCAPES("\\0", true);
237 CHECK_ESCAPES("(a)\\1", false);
178 } 238 }
179 239
180 240
181 static void ExpectError(const char* input, 241 static void ExpectError(const char* input,
182 const char* expected) { 242 const char* expected) {
183 v8::HandleScope scope; 243 v8::HandleScope scope;
184 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 244 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
185 ZoneScope zone_scope(DELETE_ON_EXIT); 245 ZoneScope zone_scope(DELETE_ON_EXIT);
186 Handle<String> error; 246 Handle<String> error;
187 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); 247 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
188 CHECK(node == NULL); 248 CHECK(node == NULL);
189 CHECK(!error.is_null()); 249 CHECK(!error.is_null());
190 SmartPointer<char> str = error->ToCString(ALLOW_NULLS); 250 SmartPointer<char> str = error->ToCString(ALLOW_NULLS);
191 CHECK_EQ(expected, *str); 251 CHECK_EQ(expected, *str);
192 } 252 }
193 253
194 254
195 TEST(Errors) { 255 TEST(Errors) {
196 V8::Initialize(NULL); 256 V8::Initialize(NULL);
197 const char* kEndBackslash = "\\ at end of pattern"; 257 const char* kEndBackslash = "\\ at end of pattern";
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
286 TestCharacterClassEscapes('w', IsWord); 346 TestCharacterClassEscapes('w', IsWord);
287 TestCharacterClassEscapes('W', NotWord); 347 TestCharacterClassEscapes('W', NotWord);
288 } 348 }
289 349
290 350
291 static void Execute(bool expected, const char* input, const char* str) { 351 static void Execute(bool expected, const char* input, const char* str) {
292 v8::HandleScope scope; 352 v8::HandleScope scope;
293 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 353 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
294 ZoneScope zone_scope(DELETE_ON_EXIT); 354 ZoneScope zone_scope(DELETE_ON_EXIT);
295 Handle<String> error; 355 Handle<String> error;
296 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); 356 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error, NULL);
297 CHECK(tree != NULL); 357 CHECK(tree != NULL);
298 CHECK(error.is_null()); 358 CHECK(error.is_null());
299 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree); 359 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree);
300 bool outcome = RegExpEngine::Execute(node, CStrVector(str)); 360 bool outcome = RegExpEngine::Execute(node, CStrVector(str));
301 CHECK_EQ(outcome, expected); 361 CHECK_EQ(outcome, expected);
302 } 362 }
303 363
304 364
305 TEST(Execution) { 365 TEST(Execution) {
306 V8::Initialize(NULL); 366 V8::Initialize(NULL);
307 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh"); 367 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
308 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh"); 368 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
309 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd"); 369 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
310 } 370 }
311 371
312 372
313 class TestConfig { 373 class TestConfig {
314 public: 374 public:
315 typedef int Key; 375 typedef int Key;
316 typedef int Value; 376 typedef int Value;
317 static const int kNoKey; 377 static const int kNoKey;
318 static const int kNoValue; 378 static const int kNoValue;
319 static inline int Compare(int a, int b) { 379 static inline int Compare(int a, int b) {
320 if (a < b) return -1; 380 if (a < b)
321 else if (a > b) return 1; 381 return -1;
322 else return 0; 382 else if (a > b)
383 return 1;
384 else
385 return 0;
323 } 386 }
324 }; 387 };
325 388
326 389
327 const int TestConfig::kNoKey = 0; 390 const int TestConfig::kNoKey = 0;
328 const int TestConfig::kNoValue = 0; 391 const int TestConfig::kNoValue = 0;
329 392
330 393
331 static int PseudoRandom(int i, int j) { 394 static int PseudoRandom(int i, int j) {
332 return ~(~((i * 781) ^ (j * 329))); 395 return ~(~((i * 781) ^ (j * 329)));
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 } 447 }
385 } 448 }
386 } 449 }
387 } 450 }
388 } 451 }
389 452
390 453
391 static int CompareChars(const void* ap, const void* bp) { 454 static int CompareChars(const void* ap, const void* bp) {
392 uc16 a = *static_cast<const uc16*>(ap); 455 uc16 a = *static_cast<const uc16*>(ap);
393 uc16 b = *static_cast<const uc16*>(bp); 456 uc16 b = *static_cast<const uc16*>(bp);
394 if (a < b) return -1; 457 if (a < b)
395 else if (a > b) return 1; 458 return -1;
396 else return 0; 459 else if (a > b)
460 return 1;
461 else
462 return 0;
397 } 463 }
398 464
399 465
400 TEST(DispatchTableConstruction) { 466 TEST(DispatchTableConstruction) {
401 // Initialize test data. 467 // Initialize test data.
402 static const int kLimit = 1000; 468 static const int kLimit = 1000;
403 static const int kRangeCount = 8; 469 static const int kRangeCount = 8;
404 static const int kRangeSize = 16; 470 static const int kRangeSize = 16;
405 uc16 ranges[kRangeCount][2 * kRangeSize]; 471 uc16 ranges[kRangeCount][2 * kRangeSize];
406 for (int i = 0; i < kRangeCount; i++) { 472 for (int i = 0; i < kRangeCount; i++) {
(...skipping 16 matching lines...) Expand all
423 OutSet outs = table.Get(p); 489 OutSet outs = table.Get(p);
424 for (int j = 0; j < kRangeCount; j++) { 490 for (int j = 0; j < kRangeCount; j++) {
425 uc16* range = ranges[j]; 491 uc16* range = ranges[j];
426 bool is_on = false; 492 bool is_on = false;
427 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) 493 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
428 is_on = (range[k] <= p && p <= range[k + 1]); 494 is_on = (range[k] <= p && p <= range[k + 1]);
429 CHECK_EQ(is_on, outs.Get(j)); 495 CHECK_EQ(is_on, outs.Get(j));
430 } 496 }
431 } 497 }
432 } 498 }
OLDNEW
« no previous file with comments | « regexp2000/src/parser.cc ('k') | regexp2000/test/mjsunit/non-ascii-replace.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698