| OLD | NEW |
| (Empty) | |
| 1 #include "v8.h" |
| 2 |
| 3 |
| 4 #include "cctest.h" |
| 5 #include "zone-inl.h" |
| 6 #include "parser.h" |
| 7 #include "ast.h" |
| 8 #include "jsregexp.h" |
| 9 |
| 10 |
| 11 using namespace v8::internal; |
| 12 |
| 13 |
| 14 class RegExpTestCase { |
| 15 public: |
| 16 RegExpTestCase() |
| 17 : pattern_(NULL), |
| 18 flags_(NULL), |
| 19 input_(NULL), |
| 20 compile_error_(NULL) { } |
| 21 RegExpTestCase(const char* pattern, |
| 22 const char* flags, |
| 23 const char* input, |
| 24 const char* compile_error) |
| 25 : pattern_(pattern), |
| 26 flags_(flags), |
| 27 input_(input), |
| 28 compile_error_(compile_error) { } |
| 29 const char* pattern() const { return pattern_; } |
| 30 bool expect_error() const { return compile_error_ != NULL; } |
| 31 private: |
| 32 const char* pattern_; |
| 33 const char* flags_; |
| 34 const char* input_; |
| 35 const char* compile_error_; |
| 36 }; |
| 37 |
| 38 |
| 39 #ifdef USE_FUZZ_TEST_DATA |
| 40 #include "regexp-test-data.cc" |
| 41 #else |
| 42 static const int kCaseCount = 0; |
| 43 static const RegExpTestCase kCases[1] = { RegExpTestCase() }; |
| 44 #endif |
| 45 |
| 46 |
| 47 static void ExpectParse(const char* input, |
| 48 const char* expected) { |
| 49 v8::HandleScope scope; |
| 50 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 51 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 52 Handle<String> error; |
| 53 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| 54 CHECK(node != NULL); |
| 55 CHECK(error.is_null()); |
| 56 SmartPointer<char> output = node->ToString(); |
| 57 CHECK_EQ(expected, *output); |
| 58 } |
| 59 |
| 60 |
| 61 TEST(Parser) { |
| 62 V8::Initialize(NULL); |
| 63 ExpectParse("abc", "'abc'"); |
| 64 ExpectParse("", "%"); |
| 65 ExpectParse("abc|def", "(| 'abc' 'def')"); |
| 66 ExpectParse("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); |
| 67 ExpectParse("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])"); |
| 68 ExpectParse("^xxx$", "(: @^ 'xxx' @$)"); |
| 69 ExpectParse("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); |
| 70 ExpectParse("\\w|\\s|.", "(| [&w] [&s] [&.])"); |
| 71 ExpectParse("a*", "(# 0 - g 'a')"); |
| 72 ExpectParse("a*?", "(# 0 - n 'a')"); |
| 73 ExpectParse("abc+", "(# 1 - g 'abc')"); |
| 74 ExpectParse("abc+?", "(# 1 - n 'abc')"); |
| 75 ExpectParse("xyz?", "(# 0 1 g 'xyz')"); |
| 76 ExpectParse("xyz??", "(# 0 1 n 'xyz')"); |
| 77 ExpectParse("xyz{0,1}", "(# 0 1 g 'xyz')"); |
| 78 ExpectParse("xyz{0,1}?", "(# 0 1 n 'xyz')"); |
| 79 ExpectParse("xyz{93}", "(# 93 93 g 'xyz')"); |
| 80 ExpectParse("xyz{93}?", "(# 93 93 n 'xyz')"); |
| 81 ExpectParse("xyz{1,32}", "(# 1 32 g 'xyz')"); |
| 82 ExpectParse("xyz{1,32}?", "(# 1 32 n 'xyz')"); |
| 83 ExpectParse("xyz{1,}", "(# 1 - g 'xyz')"); |
| 84 ExpectParse("xyz{1,}?", "(# 1 - n 'xyz')"); |
| 85 ExpectParse("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'"); |
| 86 ExpectParse("a\\nb\\bc", "(: 'a\nb' @b 'c')"); |
| 87 ExpectParse("(?:foo)", "'foo'"); |
| 88 ExpectParse("(?: foo )", "' foo '"); |
| 89 ExpectParse("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); |
| 90 ExpectParse("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); |
| 91 ExpectParse("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); |
| 92 ExpectParse("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); |
| 93 ExpectParse("()", "(^ %)"); |
| 94 ExpectParse("(?=)", "(-> + %)"); |
| 95 ExpectParse("[]", "%"); |
| 96 ExpectParse("[x]", "[x]"); |
| 97 ExpectParse("[xyz]", "[x y z]"); |
| 98 ExpectParse("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); |
| 99 ExpectParse("[-123]", "[- 1 2 3]"); |
| 100 ExpectParse("[^123]", "^[1 2 3]"); |
| 101 ExpectParse("]", "']'"); |
| 102 ExpectParse("}", "'}'"); |
| 103 ExpectParse("[a-b-c]", "[a-b - c]"); |
| 104 ExpectParse("[\\w]", "[&w]"); |
| 105 ExpectParse("[x\\wz]", "[x &w z]"); |
| 106 ExpectParse("[\\w-z]", "[&w - z]"); |
| 107 ExpectParse("[\\w-\\d]", "[&w - &d]"); |
| 108 ExpectParse("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); |
| 109 ExpectParse("[a\\]c]", "[a ] c]"); |
| 110 ExpectParse("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); |
| 111 ExpectParse("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); |
| 112 ExpectParse("\\0", "'\0'"); |
| 113 ExpectParse("\\11", "'\t'"); |
| 114 ExpectParse("\\11a", "'\ta'"); |
| 115 ExpectParse("\\011", "'\t'"); |
| 116 ExpectParse("\\118", "'\t8'"); |
| 117 ExpectParse("\\111", "'I'"); |
| 118 ExpectParse("\\1111", "'I1'"); |
| 119 ExpectParse("[\\0]", "[\0]"); |
| 120 ExpectParse("[\\11]", "[\t]"); |
| 121 ExpectParse("[\\11a]", "[\t a]"); |
| 122 ExpectParse("[\\011]", "[\t]"); |
| 123 ExpectParse("[\\118]", "[\t 8]"); |
| 124 ExpectParse("[\\111]", "[I]"); |
| 125 ExpectParse("[\\1111]", "[I 1]"); |
| 126 ExpectParse("\\x34", "'\x34'"); |
| 127 ExpectParse("\\x3z", "'\x03z'"); |
| 128 } |
| 129 |
| 130 |
| 131 static void ExpectError(const char* input, |
| 132 const char* expected) { |
| 133 v8::HandleScope scope; |
| 134 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 135 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 136 Handle<String> error; |
| 137 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| 138 CHECK(node == NULL); |
| 139 CHECK(!error.is_null()); |
| 140 SmartPointer<char> str = error->ToCString(ALLOW_NULLS); |
| 141 CHECK_EQ(expected, *str); |
| 142 } |
| 143 |
| 144 |
| 145 TEST(Errors) { |
| 146 V8::Initialize(NULL); |
| 147 const char* kEndBackslash = "\\ at end of pattern"; |
| 148 ExpectError("\\", kEndBackslash); |
| 149 const char* kInvalidQuantifier = "Invalid quantifier"; |
| 150 ExpectError("a{}", kInvalidQuantifier); |
| 151 ExpectError("a{,}", kInvalidQuantifier); |
| 152 ExpectError("a{", kInvalidQuantifier); |
| 153 ExpectError("a{z}", kInvalidQuantifier); |
| 154 ExpectError("a{1z}", kInvalidQuantifier); |
| 155 ExpectError("a{12z}", kInvalidQuantifier); |
| 156 ExpectError("a{12,", kInvalidQuantifier); |
| 157 ExpectError("a{12,3b", kInvalidQuantifier); |
| 158 const char* kUnterminatedGroup = "Unterminated group"; |
| 159 ExpectError("(foo", kUnterminatedGroup); |
| 160 const char* kInvalidGroup = "Invalid group"; |
| 161 ExpectError("(?", kInvalidGroup); |
| 162 const char* kUnterminatedCharacterClass = "Unterminated character class"; |
| 163 ExpectError("[", kUnterminatedCharacterClass); |
| 164 ExpectError("[a-", kUnterminatedCharacterClass); |
| 165 const char* kIllegalCharacterClass = "Illegal character class"; |
| 166 ExpectError("[a-\\w]", kIllegalCharacterClass); |
| 167 const char* kEndControl = "\\c at end of pattern"; |
| 168 ExpectError("\\c", kEndControl); |
| 169 const char* kIllegalControl = "Illegal control letter"; |
| 170 ExpectError("\\c!", kIllegalControl); |
| 171 } |
| 172 |
| 173 |
| 174 static void Execute(bool expected, const char* input, const char* str) { |
| 175 v8::HandleScope scops; |
| 176 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); |
| 177 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 178 Handle<String> error; |
| 179 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error); |
| 180 CHECK(tree != NULL); |
| 181 CHECK(error.is_null()); |
| 182 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree); |
| 183 bool outcome = RegExpEngine::Execute(node, CStrVector(str)); |
| 184 CHECK_EQ(outcome, expected); |
| 185 } |
| 186 |
| 187 |
| 188 TEST(Execution) { |
| 189 V8::Initialize(NULL); |
| 190 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh"); |
| 191 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh"); |
| 192 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd"); |
| 193 } |
| 194 |
| 195 |
| 196 TEST(Fuzz) { |
| 197 V8::Initialize(NULL); |
| 198 for (int i = 0; i < kCaseCount; i++) { |
| 199 const RegExpTestCase* c = &kCases[i]; |
| 200 v8::HandleScope scope; |
| 201 printf("%s\n", c->pattern()); |
| 202 unibrow::Utf8InputBuffer<> buffer(c->pattern(), strlen(c->pattern())); |
| 203 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 204 Handle<String> error; |
| 205 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); |
| 206 if (c->expect_error()) { |
| 207 CHECK(node == NULL); |
| 208 CHECK(!error.is_null()); |
| 209 } else { |
| 210 CHECK(node != NULL); |
| 211 CHECK(error.is_null()); |
| 212 } |
| 213 } |
| 214 } |
| 215 |
| 216 |
| 217 // "123456789abcdb".match(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(\11)/) |
| 218 // 123456789abcdb,1,2,3,4,5,6,7,8,9,a,b,c,d,b |
| OLD | NEW |