| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 64 | 64 |
| 65 | 65 |
| 66 #ifdef USE_FUZZ_TEST_DATA | 66 #ifdef USE_FUZZ_TEST_DATA |
| 67 #include "regexp-test-data.cc" | 67 #include "regexp-test-data.cc" |
| 68 #else | 68 #else |
| 69 static const int kCaseCount = 0; | 69 static const int kCaseCount = 0; |
| 70 static const RegExpTestCase kCases[1] = { RegExpTestCase() }; | 70 static const RegExpTestCase kCases[1] = { RegExpTestCase() }; |
| 71 #endif | 71 #endif |
| 72 | 72 |
| 73 | 73 |
| 74 static void ExpectParse(const char* input, | 74 static SmartPointer<char> Parse(const char* input) { |
| 75 const char* expected) { | |
| 76 v8::HandleScope scope; | 75 v8::HandleScope scope; |
| 77 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | 76 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 78 ZoneScope zone_scope(DELETE_ON_EXIT); | 77 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 79 Handle<String> error; | 78 Handle<String> error; |
| 80 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); | 79 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| 81 CHECK(node != NULL); | 80 CHECK(node != NULL); |
| 82 CHECK(error.is_null()); | 81 CHECK(error.is_null()); |
| 83 SmartPointer<char> output = node->ToString(); | 82 SmartPointer<char> output = node->ToString(); |
| 84 CHECK_EQ(expected, *output); | 83 return output; |
| 85 } | 84 } |
| 86 | 85 |
| 87 | 86 |
| 87 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) |
| 88 |
| 89 |
| 88 TEST(Parser) { | 90 TEST(Parser) { |
| 89 V8::Initialize(NULL); | 91 V8::Initialize(NULL); |
| 90 ExpectParse("abc", "'abc'"); | 92 CHECK_PARSE_EQ("abc", "'abc'"); |
| 91 ExpectParse("", "%"); | 93 CHECK_PARSE_EQ("", "%"); |
| 92 ExpectParse("abc|def", "(| 'abc' 'def')"); | 94 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); |
| 93 ExpectParse("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); | 95 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); |
| 94 ExpectParse("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])"); | 96 CHECK_PARSE_EQ("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])"); |
| 95 ExpectParse("^xxx$", "(: @^i 'xxx' @$i)"); | 97 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); |
| 96 ExpectParse("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); | 98 CHECK_PARSE_EQ("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); |
| 97 ExpectParse("\\w|\\s|.", "(| [&w] [&s] [&.])"); | 99 CHECK_PARSE_EQ("\\w|\\s|.", "(| [&w] [&s] [&.])"); |
| 98 ExpectParse("a*", "(# 0 - g 'a')"); | 100 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); |
| 99 ExpectParse("a*?", "(# 0 - n 'a')"); | 101 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); |
| 100 ExpectParse("abc+", "(# 1 - g 'abc')"); | 102 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')"); |
| 101 ExpectParse("abc+?", "(# 1 - n 'abc')"); | 103 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')"); |
| 102 ExpectParse("xyz?", "(# 0 1 g 'xyz')"); | 104 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')"); |
| 103 ExpectParse("xyz??", "(# 0 1 n 'xyz')"); | 105 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')"); |
| 104 ExpectParse("xyz{0,1}", "(# 0 1 g 'xyz')"); | 106 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')"); |
| 105 ExpectParse("xyz{0,1}?", "(# 0 1 n 'xyz')"); | 107 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')"); |
| 106 ExpectParse("xyz{93}", "(# 93 93 g 'xyz')"); | 108 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')"); |
| 107 ExpectParse("xyz{93}?", "(# 93 93 n 'xyz')"); | 109 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')"); |
| 108 ExpectParse("xyz{1,32}", "(# 1 32 g 'xyz')"); | 110 CHECK_PARSE_EQ("xyz{1,32}", "(# 1 32 g 'xyz')"); |
| 109 ExpectParse("xyz{1,32}?", "(# 1 32 n 'xyz')"); | 111 CHECK_PARSE_EQ("xyz{1,32}?", "(# 1 32 n 'xyz')"); |
| 110 ExpectParse("xyz{1,}", "(# 1 - g 'xyz')"); | 112 CHECK_PARSE_EQ("xyz{1,}", "(# 1 - g 'xyz')"); |
| 111 ExpectParse("xyz{1,}?", "(# 1 - n 'xyz')"); | 113 CHECK_PARSE_EQ("xyz{1,}?", "(# 1 - n 'xyz')"); |
| 112 ExpectParse("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'"); | 114 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'"); |
| 113 ExpectParse("a\\nb\\bc", "(: 'a\nb' @b 'c')"); | 115 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')"); |
| 114 ExpectParse("(?:foo)", "'foo'"); | 116 CHECK_PARSE_EQ("(?:foo)", "'foo'"); |
| 115 ExpectParse("(?: foo )", "' foo '"); | 117 CHECK_PARSE_EQ("(?: foo )", "' foo '"); |
| 116 ExpectParse("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); | 118 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); |
| 117 ExpectParse("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); | 119 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); |
| 118 ExpectParse("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); | 120 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); |
| 119 ExpectParse("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); | 121 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); |
| 120 ExpectParse("()", "(^ %)"); | 122 CHECK_PARSE_EQ("()", "(^ %)"); |
| 121 ExpectParse("(?=)", "(-> + %)"); | 123 CHECK_PARSE_EQ("(?=)", "(-> + %)"); |
| 122 ExpectParse("[]", "%"); | 124 CHECK_PARSE_EQ("[]", "%"); |
| 123 ExpectParse("[x]", "[x]"); | 125 CHECK_PARSE_EQ("[x]", "[x]"); |
| 124 ExpectParse("[xyz]", "[x y z]"); | 126 CHECK_PARSE_EQ("[xyz]", "[x y z]"); |
| 125 ExpectParse("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); | 127 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); |
| 126 ExpectParse("[-123]", "[- 1 2 3]"); | 128 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); |
| 127 ExpectParse("[^123]", "^[1 2 3]"); | 129 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); |
| 128 ExpectParse("]", "']'"); | 130 CHECK_PARSE_EQ("]", "']'"); |
| 129 ExpectParse("}", "'}'"); | 131 CHECK_PARSE_EQ("}", "'}'"); |
| 130 ExpectParse("[a-b-c]", "[a-b - c]"); | 132 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); |
| 131 ExpectParse("[\\w]", "[&w]"); | 133 CHECK_PARSE_EQ("[\\w]", "[&w]"); |
| 132 ExpectParse("[x\\wz]", "[x &w z]"); | 134 CHECK_PARSE_EQ("[x\\wz]", "[x &w z]"); |
| 133 ExpectParse("[\\w-z]", "[&w - z]"); | 135 CHECK_PARSE_EQ("[\\w-z]", "[&w - z]"); |
| 134 ExpectParse("[\\w-\\d]", "[&w - &d]"); | 136 CHECK_PARSE_EQ("[\\w-\\d]", "[&w - &d]"); |
| 135 ExpectParse("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); | 137 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); |
| 136 ExpectParse("[a\\]c]", "[a ] c]"); | 138 CHECK_PARSE_EQ("\\c!", "'c!'"); |
| 137 ExpectParse("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); | 139 CHECK_PARSE_EQ("\\c_", "'c_'"); |
| 138 ExpectParse("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); | 140 CHECK_PARSE_EQ("\\c~", "'c~'"); |
| 139 ExpectParse("\\0", "'\0'"); | 141 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); |
| 140 ExpectParse("\\11", "'\t'"); | 142 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); |
| 141 ExpectParse("\\11a", "'\ta'"); | 143 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); |
| 142 ExpectParse("\\011", "'\t'"); | 144 CHECK_PARSE_EQ("\\0", "'\0'"); |
| 143 ExpectParse("\\00011", "'\t'"); | 145 CHECK_PARSE_EQ("\\8", "'8'"); |
| 144 ExpectParse("\\118", "'\t8'"); | 146 CHECK_PARSE_EQ("\\9", "'9'"); |
| 145 ExpectParse("\\111", "'I'"); | 147 CHECK_PARSE_EQ("\\11", "'\t'"); |
| 146 ExpectParse("\\1111", "'I1'"); | 148 CHECK_PARSE_EQ("\\11a", "'\ta'"); |
| 147 ExpectParse("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))"); | 149 CHECK_PARSE_EQ("\\011", "'\t'"); |
| 148 ExpectParse("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))"); | 150 CHECK_PARSE_EQ("\\00011", "'\00011'"); |
| 149 ExpectParse("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))"); | 151 CHECK_PARSE_EQ("\\118", "'\t8'"); |
| 150 ExpectParse("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')"); | 152 CHECK_PARSE_EQ("\\111", "'I'"); |
| 151 ExpectParse("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 153 CHECK_PARSE_EQ("\\1111", "'I1'"); |
| 154 CHECK_PARSE_EQ("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))"); |
| 155 CHECK_PARSE_EQ("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))"); |
| 156 CHECK_PARSE_EQ("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))"); |
| 157 CHECK_PARSE_EQ("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')"); |
| 158 CHECK_PARSE_EQ("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])" |
| 152 " (# 0 - g (<- 1)))"); | 159 " (# 0 - g (<- 1)))"); |
| 153 ExpectParse("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 160 CHECK_PARSE_EQ("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])" |
| 154 " (# 0 - g (<- 2)))"); | 161 " (# 0 - g (<- 2)))"); |
| 155 ExpectParse("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 162 CHECK_PARSE_EQ("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])" |
| 156 " (# 0 - g (<- 3)))"); | 163 " (# 0 - g (<- 3)))"); |
| 157 ExpectParse("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 164 CHECK_PARSE_EQ("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])" |
| 158 " (# 0 - g '\x04'))"); | 165 " (# 0 - g '\x04'))"); |
| 159 ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10", | 166 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10", |
| 160 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" | 167 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" |
| 161 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))"); | 168 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))"); |
| 162 ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11", | 169 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11", |
| 163 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" | 170 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" |
| 164 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')"); | 171 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')"); |
| 165 ExpectParse("[\\0]", "[\0]"); | 172 CHECK_PARSE_EQ("[\\0]", "[\0]"); |
| 166 ExpectParse("[\\11]", "[\t]"); | 173 CHECK_PARSE_EQ("[\\11]", "[\t]"); |
| 167 ExpectParse("[\\11a]", "[\t a]"); | 174 CHECK_PARSE_EQ("[\\11a]", "[\t a]"); |
| 168 ExpectParse("[\\011]", "[\t]"); | 175 CHECK_PARSE_EQ("[\\011]", "[\t]"); |
| 169 ExpectParse("[\\00011]", "[\t]"); | 176 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]"); |
| 170 ExpectParse("[\\118]", "[\t 8]"); | 177 CHECK_PARSE_EQ("[\\118]", "[\t 8]"); |
| 171 ExpectParse("[\\111]", "[I]"); | 178 CHECK_PARSE_EQ("[\\111]", "[I]"); |
| 172 ExpectParse("[\\1111]", "[I 1]"); | 179 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); |
| 173 ExpectParse("\\x34", "'\x34'"); | 180 CHECK_PARSE_EQ("\\x34", "'\x34'"); |
| 174 ExpectParse("\\x3z", "'\x03z'"); | 181 CHECK_PARSE_EQ("\\x3z", "'x3z'"); |
| 182 CHECK_PARSE_EQ("\\u0034", "'\x34'"); |
| 183 CHECK_PARSE_EQ("\\u003z", "'u003z'"); |
| 175 } | 184 } |
| 176 | 185 |
| 177 | 186 |
| 178 static void ExpectError(const char* input, | 187 static void ExpectError(const char* input, |
| 179 const char* expected) { | 188 const char* expected) { |
| 180 v8::HandleScope scope; | 189 v8::HandleScope scope; |
| 181 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | 190 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 182 ZoneScope zone_scope(DELETE_ON_EXIT); | 191 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 183 Handle<String> error; | 192 Handle<String> error; |
| 184 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); | 193 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 206 ExpectError("(foo", kUnterminatedGroup); | 215 ExpectError("(foo", kUnterminatedGroup); |
| 207 const char* kInvalidGroup = "Invalid group"; | 216 const char* kInvalidGroup = "Invalid group"; |
| 208 ExpectError("(?", kInvalidGroup); | 217 ExpectError("(?", kInvalidGroup); |
| 209 const char* kUnterminatedCharacterClass = "Unterminated character class"; | 218 const char* kUnterminatedCharacterClass = "Unterminated character class"; |
| 210 ExpectError("[", kUnterminatedCharacterClass); | 219 ExpectError("[", kUnterminatedCharacterClass); |
| 211 ExpectError("[a-", kUnterminatedCharacterClass); | 220 ExpectError("[a-", kUnterminatedCharacterClass); |
| 212 const char* kIllegalCharacterClass = "Illegal character class"; | 221 const char* kIllegalCharacterClass = "Illegal character class"; |
| 213 ExpectError("[a-\\w]", kIllegalCharacterClass); | 222 ExpectError("[a-\\w]", kIllegalCharacterClass); |
| 214 const char* kEndControl = "\\c at end of pattern"; | 223 const char* kEndControl = "\\c at end of pattern"; |
| 215 ExpectError("\\c", kEndControl); | 224 ExpectError("\\c", kEndControl); |
| 216 const char* kIllegalControl = "Illegal control letter"; | |
| 217 ExpectError("\\c!", kIllegalControl); | |
| 218 } | 225 } |
| 219 | 226 |
| 220 | 227 |
| 221 static void Execute(bool expected, const char* input, const char* str) { | 228 static void Execute(bool expected, const char* input, const char* str) { |
| 222 v8::HandleScope scops; | 229 v8::HandleScope scops; |
| 223 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | 230 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 224 ZoneScope zone_scope(DELETE_ON_EXIT); | 231 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 225 Handle<String> error; | 232 Handle<String> error; |
| 226 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); | 233 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); |
| 227 CHECK(tree != NULL); | 234 CHECK(tree != NULL); |
| (...skipping 28 matching lines...) Expand all Loading... |
| 256 } else { | 263 } else { |
| 257 CHECK(node != NULL); | 264 CHECK(node != NULL); |
| 258 CHECK(error.is_null()); | 265 CHECK(error.is_null()); |
| 259 } | 266 } |
| 260 } | 267 } |
| 261 } | 268 } |
| 262 | 269 |
| 263 | 270 |
| 264 // "123456789abcdb".match(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(\11)/) | 271 // "123456789abcdb".match(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(\11)/) |
| 265 // 123456789abcdb,1,2,3,4,5,6,7,8,9,a,b,c,d,b | 272 // 123456789abcdb,1,2,3,4,5,6,7,8,9,a,b,c,d,b |
| OLD | NEW |