Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2008 the V8 project authors. All rights reserved. | 1 // Copyright 2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 315 return unibrow::Space::Is(c); | 315 return unibrow::Space::Is(c); |
| 316 } | 316 } |
| 317 } | 317 } |
| 318 | 318 |
| 319 | 319 |
| 320 static bool NotWhiteSpace(uc16 c) { | 320 static bool NotWhiteSpace(uc16 c) { |
| 321 return !IsWhiteSpace(c); | 321 return !IsWhiteSpace(c); |
| 322 } | 322 } |
| 323 | 323 |
| 324 | 324 |
| 325 static bool IsWord(uc16 c) { | |
| 326 return ('a' <= c && c <= 'z') | |
| 327 || ('A' <= c && c <= 'Z') | |
| 328 || ('0' <= c && c <= '9') | |
| 329 || (c == '_'); | |
| 330 } | |
| 331 | |
| 332 | |
| 333 static bool NotWord(uc16 c) { | 325 static bool NotWord(uc16 c) { |
|
Lasse Reichstein
2008/12/01 11:55:47
Symmetry suggests NotRegExpWord.
| |
| 334 return !IsWord(c); | 326 return !IsRegExpWord(c); |
| 335 } | 327 } |
| 336 | 328 |
| 337 | 329 |
| 338 static bool Dot(uc16 c) { | |
| 339 switch (c) { | |
| 340 // CR LF LS PS | |
| 341 case 0x000A: case 0x000D: case 0x2028: case 0x2029: | |
| 342 return false; | |
| 343 default: | |
| 344 return true; | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 | |
| 349 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { | 330 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { |
| 350 ZoneScope scope(DELETE_ON_EXIT); | 331 ZoneScope scope(DELETE_ON_EXIT); |
| 351 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 332 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 352 CharacterRange::AddClassEscape(c, ranges); | 333 CharacterRange::AddClassEscape(c, ranges); |
| 353 for (unsigned i = 0; i < (1 << 16); i++) { | 334 for (unsigned i = 0; i < (1 << 16); i++) { |
| 354 bool in_class = false; | 335 bool in_class = false; |
| 355 for (int j = 0; !in_class && j < ranges->length(); j++) { | 336 for (int j = 0; !in_class && j < ranges->length(); j++) { |
| 356 CharacterRange& range = ranges->at(j); | 337 CharacterRange& range = ranges->at(j); |
| 357 in_class = (range.from() <= i && i <= range.to()); | 338 in_class = (range.from() <= i && i <= range.to()); |
| 358 } | 339 } |
| 359 CHECK_EQ(pred(i), in_class); | 340 CHECK_EQ(pred(i), in_class); |
| 360 } | 341 } |
| 361 } | 342 } |
| 362 | 343 |
| 363 | 344 |
| 364 TEST(CharacterClassEscapes) { | 345 TEST(CharacterClassEscapes) { |
| 365 TestCharacterClassEscapes('.', Dot); | 346 TestCharacterClassEscapes('.', IsRegExpNewline); |
|
Lasse Reichstein
2008/12/01 11:55:47
Shouldn't this be IsNOTRegExpNewline?
| |
| 366 TestCharacterClassEscapes('d', IsDigit); | 347 TestCharacterClassEscapes('d', IsDigit); |
| 367 TestCharacterClassEscapes('D', NotDigit); | 348 TestCharacterClassEscapes('D', NotDigit); |
| 368 TestCharacterClassEscapes('s', IsWhiteSpace); | 349 TestCharacterClassEscapes('s', IsWhiteSpace); |
| 369 TestCharacterClassEscapes('S', NotWhiteSpace); | 350 TestCharacterClassEscapes('S', NotWhiteSpace); |
| 370 TestCharacterClassEscapes('w', IsWord); | 351 TestCharacterClassEscapes('w', IsRegExpWord); |
| 371 TestCharacterClassEscapes('W', NotWord); | 352 TestCharacterClassEscapes('W', NotWord); |
| 372 } | 353 } |
| 373 | 354 |
| 374 | 355 |
| 375 static RegExpNode* Compile(const char* input, bool multiline) { | 356 static RegExpNode* Compile(const char* input, bool multiline) { |
| 376 FlatStringReader reader(CStrVector(input)); | 357 FlatStringReader reader(CStrVector(input)); |
| 377 RegExpParseResult result; | 358 RegExpParseResult result; |
| 378 if (!v8::internal::ParseRegExp(&reader, multiline, &result)) | 359 if (!v8::internal::ParseRegExp(&reader, multiline, &result)) |
| 379 return NULL; | 360 return NULL; |
| 380 RegExpNode* node = NULL; | 361 RegExpNode* node = NULL; |
| 381 RegExpEngine::Compile(&result, &node, false, multiline); | 362 RegExpEngine::Compile(&result, &node, false, multiline); |
| 382 return node; | 363 return node; |
| 383 } | 364 } |
| 384 | 365 |
| 385 | 366 |
| 386 static void Execute(const char* input, | 367 static void Execute(const char* input, |
| 387 bool multiline, | 368 bool multiline, |
| 388 bool dot_output = false) { | 369 bool dot_output = false) { |
| 389 v8::HandleScope scope; | 370 v8::HandleScope scope; |
| 390 ZoneScope zone_scope(DELETE_ON_EXIT); | 371 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 391 RegExpNode* node = Compile(input, multiline); | 372 RegExpNode* node = Compile(input, multiline); |
| 392 USE(node); | 373 USE(node); |
| 393 #ifdef DEBUG | 374 #ifdef DEBUG |
| 394 if (dot_output) { | 375 if (dot_output) { |
| 395 RegExpEngine::DotPrint(input, node); | 376 RegExpEngine::DotPrint(input, node, false); |
| 396 exit(0); | 377 exit(0); |
| 397 } | 378 } |
| 398 #endif // DEBUG | 379 #endif // DEBUG |
| 399 } | 380 } |
| 400 | 381 |
| 401 | 382 |
| 402 class TestConfig { | 383 class TestConfig { |
| 403 public: | 384 public: |
| 404 typedef int Key; | 385 typedef int Key; |
| 405 typedef int Value; | 386 typedef int Value; |
| (...skipping 475 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 881 ZoneScope zone_scope(DELETE_ON_EXIT); | 862 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 882 ZoneList<CharacterRange>* ranges = | 863 ZoneList<CharacterRange>* ranges = |
| 883 new ZoneList<CharacterRange>(kRangeCount); | 864 new ZoneList<CharacterRange>(kRangeCount); |
| 884 for (int i = 0; i < kRangeCount; i++) { | 865 for (int i = 0; i < kRangeCount; i++) { |
| 885 int from = PseudoRandom(t + 87, i + 25) % kLimit; | 866 int from = PseudoRandom(t + 87, i + 25) % kLimit; |
| 886 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); | 867 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); |
| 887 if (to > kLimit) to = kLimit; | 868 if (to > kLimit) to = kLimit; |
| 888 ranges->Add(CharacterRange(from, to)); | 869 ranges->Add(CharacterRange(from, to)); |
| 889 } | 870 } |
| 890 DispatchTable table; | 871 DispatchTable table; |
| 891 DispatchTableConstructor cons(&table); | 872 DispatchTableConstructor cons(&table, false); |
| 892 cons.set_choice_index(0); | 873 cons.set_choice_index(0); |
| 893 cons.AddInverse(ranges); | 874 cons.AddInverse(ranges); |
| 894 for (int i = 0; i < kLimit; i++) { | 875 for (int i = 0; i < kLimit; i++) { |
| 895 bool is_on = false; | 876 bool is_on = false; |
| 896 for (int j = 0; !is_on && j < kRangeCount; j++) | 877 for (int j = 0; !is_on && j < kRangeCount; j++) |
| 897 is_on = ranges->at(j).Contains(i); | 878 is_on = ranges->at(j).Contains(i); |
| 898 OutSet* set = table.Get(i); | 879 OutSet* set = table.Get(i); |
| 899 CHECK_EQ(is_on, set->Get(0) == false); | 880 CHECK_EQ(is_on, set->Get(0) == false); |
| 900 } | 881 } |
| 901 } | 882 } |
| 902 ZoneScope zone_scope(DELETE_ON_EXIT); | 883 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 903 ZoneList<CharacterRange>* ranges = | 884 ZoneList<CharacterRange>* ranges = |
| 904 new ZoneList<CharacterRange>(1); | 885 new ZoneList<CharacterRange>(1); |
| 905 ranges->Add(CharacterRange(0xFFF0, 0xFFFE)); | 886 ranges->Add(CharacterRange(0xFFF0, 0xFFFE)); |
| 906 DispatchTable table; | 887 DispatchTable table; |
| 907 DispatchTableConstructor cons(&table); | 888 DispatchTableConstructor cons(&table, false); |
| 908 cons.set_choice_index(0); | 889 cons.set_choice_index(0); |
| 909 cons.AddInverse(ranges); | 890 cons.AddInverse(ranges); |
| 910 CHECK(!table.Get(0xFFFE)->Get(0)); | 891 CHECK(!table.Get(0xFFFE)->Get(0)); |
| 911 CHECK(table.Get(0xFFFF)->Get(0)); | 892 CHECK(table.Get(0xFFFF)->Get(0)); |
| 912 } | 893 } |
| 913 | 894 |
| 914 | 895 |
| 915 static uc32 canonicalize(uc32 c) { | 896 static uc32 canonicalize(uc32 c) { |
| 916 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; | 897 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; |
| 917 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); | 898 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); |
| (...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1079 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), | 1060 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), |
| 1080 CharacterRange('a', 'z')); | 1061 CharacterRange('a', 'z')); |
| 1081 // Here we need to add [l-z] to complete the case independence of | 1062 // Here we need to add [l-z] to complete the case independence of |
| 1082 // [A-Za-z] but we expect [a-z] to be added since we always add a | 1063 // [A-Za-z] but we expect [a-z] to be added since we always add a |
| 1083 // whole block at a time. | 1064 // whole block at a time. |
| 1084 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), | 1065 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), |
| 1085 CharacterRange('a', 'z')); | 1066 CharacterRange('a', 'z')); |
| 1086 } | 1067 } |
| 1087 | 1068 |
| 1088 | 1069 |
| 1070 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) { | |
| 1071 if (ranges == NULL) | |
| 1072 return false; | |
| 1073 for (int i = 0; i < ranges->length(); i++) { | |
| 1074 CharacterRange range = ranges->at(i); | |
| 1075 if (range.from() <= c && c <= range.to()) | |
| 1076 return true; | |
| 1077 } | |
| 1078 return false; | |
| 1079 } | |
| 1080 | |
| 1081 | |
| 1082 TEST(CharClassDifference) { | |
| 1083 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 1084 ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1); | |
| 1085 base->Add(CharacterRange::Everything()); | |
| 1086 Vector<const uc16> overlay = CharacterRange::GetWordBounds(); | |
| 1087 ZoneList<CharacterRange>* included = NULL; | |
| 1088 ZoneList<CharacterRange>* excluded = NULL; | |
| 1089 CharacterRange::Split(base, overlay, &included, &excluded); | |
| 1090 for (int i = 0; i < (1 << 16); i++) { | |
| 1091 bool in_base = InClass(i, base); | |
| 1092 if (in_base) { | |
| 1093 bool in_overlay = false; | |
| 1094 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) { | |
| 1095 if (overlay[j] <= i && i <= overlay[j+1]) | |
| 1096 in_overlay = true; | |
| 1097 } | |
| 1098 CHECK_EQ(in_overlay, InClass(i, included)); | |
| 1099 CHECK_EQ(!in_overlay, InClass(i, excluded)); | |
| 1100 } else { | |
| 1101 CHECK(!InClass(i, included)); | |
| 1102 CHECK(!InClass(i, excluded)); | |
| 1103 } | |
| 1104 } | |
| 1105 } | |
| 1106 | |
| 1107 | |
| 1089 TEST(Graph) { | 1108 TEST(Graph) { |
| 1090 V8::Initialize(NULL); | 1109 V8::Initialize(NULL); |
| 1091 Execute("foo$(?!bar)", false, true); | 1110 Execute("\\b\\w", false, true); |
| 1092 } | 1111 } |
| OLD | NEW |