| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| 11 // with the distribution. | 11 // with the distribution. |
| 12 // * Neither the name of Google Inc. nor the names of its | 12 // * Neither the name of Google Inc. nor the names of its |
| 13 // contributors may be used to endorse or promote products derived | 13 // contributors may be used to endorse or promote products derived |
| 14 // from this software without specific prior written permission. | 14 // from this software without specific prior written permission. |
| 15 // | 15 // |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 | 28 |
| 29 #include <stdlib.h> | 29 #include <stdlib.h> |
| 30 #include <set> |
| 30 | 31 |
| 31 #include "v8.h" | 32 #include "v8.h" |
| 32 | 33 |
| 33 #include "cctest.h" | 34 #include "cctest.h" |
| 34 #include "zone-inl.h" | 35 #include "zone-inl.h" |
| 35 #include "parser.h" | 36 #include "parser.h" |
| 36 #include "ast.h" | 37 #include "ast.h" |
| 37 #include "jsregexp-inl.h" | 38 #include "jsregexp-inl.h" |
| 38 | 39 |
| 39 | 40 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 58 const char* pattern() const { return pattern_; } | 59 const char* pattern() const { return pattern_; } |
| 59 bool expect_error() const { return compile_error_ != NULL; } | 60 bool expect_error() const { return compile_error_ != NULL; } |
| 60 private: | 61 private: |
| 61 const char* pattern_; | 62 const char* pattern_; |
| 62 const char* flags_; | 63 const char* flags_; |
| 63 const char* input_; | 64 const char* input_; |
| 64 const char* compile_error_; | 65 const char* compile_error_; |
| 65 }; | 66 }; |
| 66 | 67 |
| 67 | 68 |
| 68 #ifdef USE_FUZZ_TEST_DATA | |
| 69 #include "regexp-test-data.cc" | |
| 70 #else | |
| 71 static const int kCaseCount = 0; | |
| 72 static const RegExpTestCase kCases[1] = { RegExpTestCase() }; | |
| 73 #endif | |
| 74 | |
| 75 | |
| 76 static SmartPointer<char> Parse(const char* input) { | 69 static SmartPointer<char> Parse(const char* input) { |
| 77 v8::HandleScope scope; | 70 v8::HandleScope scope; |
| 78 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | 71 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 79 ZoneScope zone_scope(DELETE_ON_EXIT); | 72 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 80 Handle<String> error; | 73 Handle<String> error; |
| 81 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); | 74 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| 82 CHECK(node != NULL); | 75 CHECK(node != NULL); |
| 83 CHECK(error.is_null()); | 76 CHECK(error.is_null()); |
| 84 SmartPointer<char> output = node->ToString(); | 77 SmartPointer<char> output = node->ToString(); |
| 85 return output; | 78 return output; |
| 86 } | 79 } |
| 87 | 80 |
| 88 | 81 |
| 89 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) | 82 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) |
| 90 | 83 |
| 91 | 84 |
| 92 TEST(Parser) { | 85 TEST(Parser) { |
| 93 V8::Initialize(NULL); | 86 V8::Initialize(NULL); |
| 94 CHECK_PARSE_EQ("abc", "'abc'"); | 87 CHECK_PARSE_EQ("abc", "'abc'"); |
| 95 CHECK_PARSE_EQ("", "%"); | 88 CHECK_PARSE_EQ("", "%"); |
| 96 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); | 89 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); |
| 97 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); | 90 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); |
| 98 CHECK_PARSE_EQ("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])"); | |
| 99 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); | 91 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); |
| 100 CHECK_PARSE_EQ("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); | 92 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); |
| 101 CHECK_PARSE_EQ("\\w|\\s|.", "(| [&w] [&s] [&.])"); | 93 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); |
| 102 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); | 94 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); |
| 103 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); | 95 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); |
| 104 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')"); | 96 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')"); |
| 105 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')"); | 97 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')"); |
| 106 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')"); | 98 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')"); |
| 107 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')"); | 99 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')"); |
| 108 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')"); | 100 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')"); |
| 109 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')"); | 101 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')"); |
| 110 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')"); | 102 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')"); |
| 111 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')"); | 103 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')"); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 125 CHECK_PARSE_EQ("(?=)", "(-> + %)"); | 117 CHECK_PARSE_EQ("(?=)", "(-> + %)"); |
| 126 CHECK_PARSE_EQ("[]", "%"); | 118 CHECK_PARSE_EQ("[]", "%"); |
| 127 CHECK_PARSE_EQ("[x]", "[x]"); | 119 CHECK_PARSE_EQ("[x]", "[x]"); |
| 128 CHECK_PARSE_EQ("[xyz]", "[x y z]"); | 120 CHECK_PARSE_EQ("[xyz]", "[x y z]"); |
| 129 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); | 121 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); |
| 130 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); | 122 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); |
| 131 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); | 123 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); |
| 132 CHECK_PARSE_EQ("]", "']'"); | 124 CHECK_PARSE_EQ("]", "']'"); |
| 133 CHECK_PARSE_EQ("}", "'}'"); | 125 CHECK_PARSE_EQ("}", "'}'"); |
| 134 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); | 126 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); |
| 135 CHECK_PARSE_EQ("[\\w]", "[&w]"); | 127 CHECK_PARSE_EQ("[\\d]", "[0-9]"); |
| 136 CHECK_PARSE_EQ("[x\\wz]", "[x &w z]"); | 128 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]"); |
| 137 CHECK_PARSE_EQ("[\\w-z]", "[&w - z]"); | 129 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]"); |
| 138 CHECK_PARSE_EQ("[\\w-\\d]", "[&w - &d]"); | 130 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]"); |
| 139 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); | 131 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); |
| 140 CHECK_PARSE_EQ("\\c!", "'c!'"); | 132 CHECK_PARSE_EQ("\\c!", "'c!'"); |
| 141 CHECK_PARSE_EQ("\\c_", "'c_'"); | 133 CHECK_PARSE_EQ("\\c_", "'c_'"); |
| 142 CHECK_PARSE_EQ("\\c~", "'c~'"); | 134 CHECK_PARSE_EQ("\\c~", "'c~'"); |
| 143 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); | 135 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); |
| 144 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); | 136 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); |
| 145 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); | 137 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); |
| 146 CHECK_PARSE_EQ("\\0", "'\0'"); | 138 CHECK_PARSE_EQ("\\0", "'\0'"); |
| 147 CHECK_PARSE_EQ("\\8", "'8'"); | 139 CHECK_PARSE_EQ("\\8", "'8'"); |
| 148 CHECK_PARSE_EQ("\\9", "'9'"); | 140 CHECK_PARSE_EQ("\\9", "'9'"); |
| 149 CHECK_PARSE_EQ("\\11", "'\t'"); | 141 CHECK_PARSE_EQ("\\11", "'\t'"); |
| 150 CHECK_PARSE_EQ("\\11a", "'\ta'"); | 142 CHECK_PARSE_EQ("\\11a", "'\ta'"); |
| 151 CHECK_PARSE_EQ("\\011", "'\t'"); | 143 CHECK_PARSE_EQ("\\011", "'\t'"); |
| 152 CHECK_PARSE_EQ("\\00011", "'\00011'"); | 144 CHECK_PARSE_EQ("\\00011", "'\00011'"); |
| 153 CHECK_PARSE_EQ("\\118", "'\t8'"); | 145 CHECK_PARSE_EQ("\\118", "'\t8'"); |
| 154 CHECK_PARSE_EQ("\\111", "'I'"); | 146 CHECK_PARSE_EQ("\\111", "'I'"); |
| 155 CHECK_PARSE_EQ("\\1111", "'I1'"); | 147 CHECK_PARSE_EQ("\\1111", "'I1'"); |
| 156 CHECK_PARSE_EQ("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))"); | 148 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); |
| 157 CHECK_PARSE_EQ("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))"); | 149 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); |
| 158 CHECK_PARSE_EQ("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))"); | 150 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); |
| 159 CHECK_PARSE_EQ("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')"); | 151 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\x04')"); |
| 160 CHECK_PARSE_EQ("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 152 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')" |
| 161 " (# 0 - g (<- 1)))"); | 153 " (# 0 - g (<- 1)))"); |
| 162 CHECK_PARSE_EQ("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 154 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')" |
| 163 " (# 0 - g (<- 2)))"); | 155 " (# 0 - g (<- 2)))"); |
| 164 CHECK_PARSE_EQ("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 156 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')" |
| 165 " (# 0 - g (<- 3)))"); | 157 " (# 0 - g (<- 3)))"); |
| 166 CHECK_PARSE_EQ("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])" | 158 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')" |
| 167 " (# 0 - g '\x04'))"); | 159 " (# 0 - g '\x04'))"); |
| 168 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10", | 160 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", |
| 169 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" | 161 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" |
| 170 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))"); | 162 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); |
| 171 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11", | 163 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", |
| 172 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" | 164 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" |
| 173 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')"); | 165 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')"); |
| 174 CHECK_PARSE_EQ("[\\0]", "[\0]"); | 166 CHECK_PARSE_EQ("[\\0]", "[\0]"); |
| 175 CHECK_PARSE_EQ("[\\11]", "[\t]"); | 167 CHECK_PARSE_EQ("[\\11]", "[\t]"); |
| 176 CHECK_PARSE_EQ("[\\11a]", "[\t a]"); | 168 CHECK_PARSE_EQ("[\\11a]", "[\t a]"); |
| 177 CHECK_PARSE_EQ("[\\011]", "[\t]"); | 169 CHECK_PARSE_EQ("[\\011]", "[\t]"); |
| 178 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]"); | 170 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]"); |
| 179 CHECK_PARSE_EQ("[\\118]", "[\t 8]"); | 171 CHECK_PARSE_EQ("[\\118]", "[\t 8]"); |
| 180 CHECK_PARSE_EQ("[\\111]", "[I]"); | 172 CHECK_PARSE_EQ("[\\111]", "[I]"); |
| 181 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); | 173 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); |
| 182 CHECK_PARSE_EQ("\\x34", "'\x34'"); | 174 CHECK_PARSE_EQ("\\x34", "'\x34'"); |
| 183 CHECK_PARSE_EQ("\\x3z", "'x3z'"); | 175 CHECK_PARSE_EQ("\\x3z", "'x3z'"); |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 220 const char* kUnterminatedCharacterClass = "Unterminated character class"; | 212 const char* kUnterminatedCharacterClass = "Unterminated character class"; |
| 221 ExpectError("[", kUnterminatedCharacterClass); | 213 ExpectError("[", kUnterminatedCharacterClass); |
| 222 ExpectError("[a-", kUnterminatedCharacterClass); | 214 ExpectError("[a-", kUnterminatedCharacterClass); |
| 223 const char* kIllegalCharacterClass = "Illegal character class"; | 215 const char* kIllegalCharacterClass = "Illegal character class"; |
| 224 ExpectError("[a-\\w]", kIllegalCharacterClass); | 216 ExpectError("[a-\\w]", kIllegalCharacterClass); |
| 225 const char* kEndControl = "\\c at end of pattern"; | 217 const char* kEndControl = "\\c at end of pattern"; |
| 226 ExpectError("\\c", kEndControl); | 218 ExpectError("\\c", kEndControl); |
| 227 } | 219 } |
| 228 | 220 |
| 229 | 221 |
| 230 static void Execute(bool expected, const char* input, const char* str) { | |
| 231 v8::HandleScope scops; | |
| 232 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | |
| 233 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 234 Handle<String> error; | |
| 235 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); | |
| 236 CHECK(tree != NULL); | |
| 237 CHECK(error.is_null()); | |
| 238 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree); | |
| 239 bool outcome = RegExpEngine::Execute(node, CStrVector(str)); | |
| 240 CHECK_EQ(outcome, expected); | |
| 241 } | |
| 242 | |
| 243 | |
| 244 TEST(Execution) { | |
| 245 V8::Initialize(NULL); | |
| 246 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh"); | |
| 247 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh"); | |
| 248 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd"); | |
| 249 } | |
| 250 | |
| 251 | |
| 252 TEST(Fuzz) { | |
| 253 V8::Initialize(NULL); | |
| 254 for (int i = 0; i < kCaseCount; i++) { | |
| 255 const RegExpTestCase* c = &kCases[i]; | |
| 256 v8::HandleScope scope; | |
| 257 printf("%s\n", c->pattern()); | |
| 258 unibrow::Utf8InputBuffer<> buffer(c->pattern(), strlen(c->pattern())); | |
| 259 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 260 Handle<String> error; | |
| 261 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); | |
| 262 if (c->expect_error()) { | |
| 263 CHECK(node == NULL); | |
| 264 CHECK(!error.is_null()); | |
| 265 } else { | |
| 266 CHECK(node != NULL); | |
| 267 CHECK(error.is_null()); | |
| 268 } | |
| 269 } | |
| 270 } | |
| 271 | |
| 272 | |
| 273 TEST(SingletonField) { | |
| 274 // Test all bits from 0 to 256 | |
| 275 for (int i = 0; i < 256; i++) { | |
| 276 CharacterClass entry = CharacterClass::SingletonField(i); | |
| 277 for (int j = 0; j < 256; j++) { | |
| 278 CHECK_EQ(i == j, entry.Contains(j)); | |
| 279 } | |
| 280 } | |
| 281 // Test upwards through the data range | |
| 282 static const uint32_t kMax = 1 << 16; | |
| 283 for (uint32_t i = 0; i < kMax; i = 1 + static_cast<uint32_t>(i * 1.2)) { | |
| 284 CharacterClass entry = CharacterClass::SingletonField(i); | |
| 285 for (uint32_t j = 0; j < kMax; j = 1 + static_cast<uint32_t>(j * 1.2)) { | |
| 286 CHECK_EQ(i == j, entry.Contains(j)); | |
| 287 } | |
| 288 } | |
| 289 } | |
| 290 | |
| 291 | |
| 292 TEST(RangeField) { | |
| 293 // Test bitfields containing a single range. | |
| 294 for (int i = 256; i < 320; i++) { | |
| 295 for (int j = i; j < 320; j++) { | |
| 296 CharacterClass::Range range(i, j); | |
| 297 CharacterClass entry = CharacterClass::RangeField(range); | |
| 298 for (int k = 256; k < 320; k++) { | |
| 299 CHECK_EQ(i <= k && k <= j, entry.Contains(k)); | |
| 300 } | |
| 301 } | |
| 302 } | |
| 303 } | |
| 304 | |
| 305 | |
| 306 static void TestBuiltins(CharacterClass klass, bool (pred)(uc16)) { | |
| 307 for (int i = 0; i < (1 << 16); i++) | |
| 308 CHECK_EQ(pred(i), klass.Contains(i)); | |
| 309 } | |
| 310 | |
| 311 | |
| 312 static bool IsDigit(uc16 c) { | 222 static bool IsDigit(uc16 c) { |
| 313 return ('0' <= c && c <= '9'); | 223 return ('0' <= c && c <= '9'); |
| 314 } | 224 } |
| 315 | 225 |
| 316 | 226 |
| 227 static bool NotDigit(uc16 c) { |
| 228 return !IsDigit(c); |
| 229 } |
| 230 |
| 231 |
| 317 static bool IsWhiteSpace(uc16 c) { | 232 static bool IsWhiteSpace(uc16 c) { |
| 318 switch (c) { | 233 switch (c) { |
| 319 case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0: | 234 case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0: |
| 320 return true; | 235 return true; |
| 321 default: | 236 default: |
| 322 return unibrow::Space::Is(c); | 237 return unibrow::Space::Is(c); |
| 323 } | 238 } |
| 324 } | 239 } |
| 325 | 240 |
| 326 | 241 |
| 242 static bool NotWhiteSpace(uc16 c) { |
| 243 return !IsWhiteSpace(c); |
| 244 } |
| 245 |
| 246 |
| 327 static bool IsWord(uc16 c) { | 247 static bool IsWord(uc16 c) { |
| 328 return ('a' <= c && c <= 'z') | 248 return ('a' <= c && c <= 'z') |
| 329 || ('A' <= c && c <= 'Z') | 249 || ('A' <= c && c <= 'Z') |
| 330 || ('0' <= c && c <= '9') | 250 || ('0' <= c && c <= '9') |
| 331 || (c == '_'); | 251 || (c == '_'); |
| 332 } | 252 } |
| 333 | 253 |
| 334 | 254 |
| 335 TEST(Builtins) { | 255 static bool NotWord(uc16 c) { |
| 336 TestBuiltins(*CharacterClass::GetCharacterClass('d'), IsDigit); | 256 return !IsWord(c); |
| 337 TestBuiltins(*CharacterClass::GetCharacterClass('s'), IsWhiteSpace); | |
| 338 TestBuiltins(*CharacterClass::GetCharacterClass('w'), IsWord); | |
| 339 } | 257 } |
| 340 | 258 |
| 341 | 259 |
| 342 TEST(SimpleRanges) { | 260 static bool Dot(uc16 c) { |
| 343 // Test range classes containing a single range. | 261 return true; |
| 344 for (int i = 365; i < 730; i += 3) { | 262 } |
| 345 for (int j = i; j < 1095; j += 3) { | 263 |
| 346 EmbeddedVector<CharacterClass::Range, 1> entries; | 264 |
| 347 entries[0] = CharacterClass::Range(i, j); | 265 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { |
| 348 CharacterClass klass = CharacterClass::Ranges(entries, NULL); | 266 ZoneScope scope(DELETE_ON_EXIT); |
| 349 for (int k = 0; k < 1095; k += 3) { | 267 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 350 CHECK_EQ(i <= k && k <= j, klass.Contains(k)); | 268 CharacterRange::AddClassEscape(c, ranges); |
| 269 for (unsigned i = 0; i < (1 << 16); i++) { |
| 270 bool in_class = false; |
| 271 for (int j = 0; !in_class && j < ranges->length(); j++) { |
| 272 CharacterRange& range = ranges->at(j); |
| 273 in_class = (range.from() <= i && i <= range.to()); |
| 274 } |
| 275 CHECK_EQ(pred(i), in_class); |
| 276 } |
| 277 } |
| 278 |
| 279 |
| 280 TEST(CharacterClassEscapes) { |
| 281 TestCharacterClassEscapes('.', Dot); |
| 282 TestCharacterClassEscapes('d', IsDigit); |
| 283 TestCharacterClassEscapes('D', NotDigit); |
| 284 TestCharacterClassEscapes('s', IsWhiteSpace); |
| 285 TestCharacterClassEscapes('S', NotWhiteSpace); |
| 286 TestCharacterClassEscapes('w', IsWord); |
| 287 TestCharacterClassEscapes('W', NotWord); |
| 288 } |
| 289 |
| 290 |
| 291 static void Execute(bool expected, const char* input, const char* str) { |
| 292 v8::HandleScope scope; |
| 293 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 294 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 295 Handle<String> error; |
| 296 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); |
| 297 CHECK(tree != NULL); |
| 298 CHECK(error.is_null()); |
| 299 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree); |
| 300 bool outcome = RegExpEngine::Execute(node, CStrVector(str)); |
| 301 CHECK_EQ(outcome, expected); |
| 302 } |
| 303 |
| 304 |
| 305 TEST(Execution) { |
| 306 V8::Initialize(NULL); |
| 307 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh"); |
| 308 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh"); |
| 309 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd"); |
| 310 } |
| 311 |
| 312 |
| 313 class TestConfig { |
| 314 public: |
| 315 typedef int Key; |
| 316 typedef int Value; |
| 317 static const int kNoKey; |
| 318 static const int kNoValue; |
| 319 static inline int Compare(int a, int b) { |
| 320 if (a < b) return -1; |
| 321 else if (a > b) return 1; |
| 322 else return 0; |
| 323 } |
| 324 }; |
| 325 |
| 326 |
| 327 const int TestConfig::kNoKey = 0; |
| 328 const int TestConfig::kNoValue = 0; |
| 329 |
| 330 |
| 331 static int PseudoRandom(int i, int j) { |
| 332 return ~(~((i * 781) ^ (j * 329))); |
| 333 } |
| 334 |
| 335 |
| 336 TEST(SplayTreeSimple) { |
| 337 static const int kLimit = 1000; |
| 338 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 339 ZoneSplayTree<TestConfig> tree; |
| 340 std::set<int> seen; |
| 341 #define CHECK_MAPS_EQUAL() do { \ |
| 342 for (int k = 0; k < kLimit; k++) \ |
| 343 CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \ |
| 344 } while (false) |
| 345 for (int i = 0; i < 50; i++) { |
| 346 for (int j = 0; j < 50; j++) { |
| 347 int next = PseudoRandom(i, j) % kLimit; |
| 348 if (seen.find(next) != seen.end()) { |
| 349 // We've already seen this one. Check the value and remove |
| 350 // it. |
| 351 ZoneSplayTree<TestConfig>::Locator loc; |
| 352 CHECK(tree.Find(next, &loc)); |
| 353 CHECK_EQ(next, loc.key()); |
| 354 CHECK_EQ(3 * next, loc.value()); |
| 355 tree.Remove(next); |
| 356 seen.erase(next); |
| 357 CHECK_MAPS_EQUAL(); |
| 358 } else { |
| 359 // Check that it wasn't there already and then add it. |
| 360 ZoneSplayTree<TestConfig>::Locator loc; |
| 361 CHECK(!tree.Find(next, &loc)); |
| 362 CHECK(tree.Insert(next, &loc)); |
| 363 CHECK_EQ(next, loc.key()); |
| 364 loc.set_value(3 * next); |
| 365 seen.insert(next); |
| 366 CHECK_MAPS_EQUAL(); |
| 367 } |
| 368 int val = PseudoRandom(j, i) % kLimit; |
| 369 for (int k = val; k >= 0; k--) { |
| 370 if (seen.find(val) != seen.end()) { |
| 371 ZoneSplayTree<TestConfig>::Locator loc; |
| 372 CHECK(tree.FindGreatestLessThan(val, &loc)); |
| 373 CHECK_EQ(loc.key(), val); |
| 374 break; |
| 375 } |
| 376 } |
| 377 val = PseudoRandom(i + j, i - j) % kLimit; |
| 378 for (int k = val; k < kLimit; k++) { |
| 379 if (seen.find(val) != seen.end()) { |
| 380 ZoneSplayTree<TestConfig>::Locator loc; |
| 381 CHECK(tree.FindLeastGreaterThan(val, &loc)); |
| 382 CHECK_EQ(loc.key(), val); |
| 383 break; |
| 384 } |
| 351 } | 385 } |
| 352 } | 386 } |
| 353 } | 387 } |
| 354 } | 388 } |
| 355 | 389 |
| 356 | 390 |
| 357 static unsigned PseudoRandom(int i, int j) { | 391 static int CompareChars(const void* ap, const void* bp) { |
| 358 return (~((i * 781) ^ (j * 329))); | 392 uc16 a = *static_cast<const uc16*>(ap); |
| 393 uc16 b = *static_cast<const uc16*>(bp); |
| 394 if (a < b) return -1; |
| 395 else if (a > b) return 1; |
| 396 else return 0; |
| 359 } | 397 } |
| 360 | 398 |
| 361 | 399 |
| 362 // Generates pseudo-random character-class with kCount pseudo-random | 400 TEST(DispatchTableConstruction) { |
| 363 // ranges set. | 401 // Initialize test data. |
| 364 template <int kCount> | 402 static const int kLimit = 1000; |
| 365 class SimpleRangeGenerator { | 403 static const int kRangeCount = 8; |
| 366 public: | 404 static const int kRangeSize = 16; |
| 367 SimpleRangeGenerator() : i_(0) { } | 405 uc16 ranges[kRangeCount][2 * kRangeSize]; |
| 368 | 406 for (int i = 0; i < kRangeCount; i++) { |
| 369 // Returns the next character class and sets the ranges vector. The | 407 uc16* range = ranges[i]; |
| 370 // returned value will not have any ranges that extend beyond the 'max' | 408 for (int j = 0; j < 2 * kRangeSize; j++) { |
| 371 // value. | 409 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; |
| 372 CharacterClass Next(int max) { | |
| 373 for (int j = 0; j < 2 * kCount; j++) { | |
| 374 entries_[j] = PseudoRandom(i_, j) % max; | |
| 375 } | 410 } |
| 376 i_++; | 411 qsort(range, 2 * kRangeSize, sizeof(uc16), CompareChars); |
| 377 qsort(entries_.start(), 2 * kCount, sizeof(uc16), Compare); | |
| 378 for (int j = 0; j < kCount; j++) { | |
| 379 ranges_[j] = CharacterClass::Range(entries_[2 * j], | |
| 380 entries_[2 * j + 1]); | |
| 381 } | |
| 382 return CharacterClass::Ranges(ranges_, NULL); | |
| 383 } | 412 } |
| 384 | 413 // Enter test data into dispatch table. |
| 385 // Returns the ranges of the last range that was returned. Note that | 414 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 386 // the returned vector will be clobbered the next time Next() is called. | 415 DispatchTable table; |
| 387 Vector<CharacterClass::Range> ranges() { return ranges_; } | 416 for (int i = 0; i < kRangeCount; i++) { |
| 388 private: | 417 uc16* range = ranges[i]; |
| 389 | 418 for (int j = 0; j < 2 * kRangeSize; j += 2) |
| 390 static int Compare(const void* a, const void* b) { | 419 table.AddRange(CharacterRange(range[j], range[j + 1]), i); |
| 391 return *static_cast<const uc16*>(a) - *static_cast<const uc16*>(b); | |
| 392 } | 420 } |
| 393 | 421 // Check that the table looks as we would expect |
| 394 int i_; | 422 for (int p = 0; p < kLimit; p++) { |
| 395 EmbeddedVector<uc16, 2 * kCount> entries_; | 423 OutSet outs = table.Get(p); |
| 396 EmbeddedVector<CharacterClass::Range, kCount> ranges_; | 424 for (int j = 0; j < kRangeCount; j++) { |
| 397 }; | 425 uc16* range = ranges[j]; |
| 398 | |
| 399 | |
| 400 TEST(LessSimpleRanges) { | |
| 401 // Tests range character classes containing 3 pseudo-random ranges. | |
| 402 SimpleRangeGenerator<3> gen; | |
| 403 for (int i = 0; i < 1024; i++) { | |
| 404 CharacterClass klass = gen.Next(256); | |
| 405 Vector<CharacterClass::Range> entries = gen.ranges(); | |
| 406 for (int j = 0; j < 256; j++) { | |
| 407 bool is_on = false; | 426 bool is_on = false; |
| 408 for (int k = 0; !is_on && k < 3; k++) | 427 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) |
| 409 is_on = (entries[k].from() <= j && j <= entries[k].to()); | 428 is_on = (range[k] <= p && p <= range[k + 1]); |
| 410 CHECK_EQ(is_on, klass.Contains(j)); | 429 CHECK_EQ(is_on, outs.Get(j)); |
| 411 } | 430 } |
| 412 } | 431 } |
| 413 } | 432 } |
| 414 | |
| 415 | |
| 416 TEST(Unions) { | |
| 417 SimpleRangeGenerator<3> gen1; | |
| 418 SimpleRangeGenerator<3> gen2; | |
| 419 for (int i = 0; i < 1024; i++) { | |
| 420 CharacterClass klass1 = gen1.Next(256); | |
| 421 CharacterClass klass2 = gen2.Next(256); | |
| 422 CharacterClass uhnion = CharacterClass::Union(&klass1, &klass2); | |
| 423 for (int j = 0; j < 256; j++) | |
| 424 CHECK_EQ(klass1.Contains(j) || klass2.Contains(j), uhnion.Contains(j)); | |
| 425 } | |
| 426 } | |
| OLD | NEW |