Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: test/cctest/test-regexp.cc

Issue 9104: Dispatch tables (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/jsregexp.cc ('K') | « src/parser.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution. 11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its 12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived 13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission. 14 // from this software without specific prior written permission.
15 // 15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 28
29 #include <stdlib.h> 29 #include <stdlib.h>
30 #include <set>
30 31
31 #include "v8.h" 32 #include "v8.h"
32 33
33 #include "cctest.h" 34 #include "cctest.h"
34 #include "zone-inl.h" 35 #include "zone-inl.h"
35 #include "parser.h" 36 #include "parser.h"
36 #include "ast.h" 37 #include "ast.h"
37 #include "jsregexp-inl.h" 38 #include "jsregexp-inl.h"
38 39
39 40
(...skipping 18 matching lines...) Expand all
58 const char* pattern() const { return pattern_; } 59 const char* pattern() const { return pattern_; }
59 bool expect_error() const { return compile_error_ != NULL; } 60 bool expect_error() const { return compile_error_ != NULL; }
60 private: 61 private:
61 const char* pattern_; 62 const char* pattern_;
62 const char* flags_; 63 const char* flags_;
63 const char* input_; 64 const char* input_;
64 const char* compile_error_; 65 const char* compile_error_;
65 }; 66 };
66 67
67 68
68 #ifdef USE_FUZZ_TEST_DATA
69 #include "regexp-test-data.cc"
70 #else
71 static const int kCaseCount = 0;
72 static const RegExpTestCase kCases[1] = { RegExpTestCase() };
73 #endif
74
75
76 static SmartPointer<char> Parse(const char* input) { 69 static SmartPointer<char> Parse(const char* input) {
77 v8::HandleScope scope; 70 v8::HandleScope scope;
78 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); 71 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
79 ZoneScope zone_scope(DELETE_ON_EXIT); 72 ZoneScope zone_scope(DELETE_ON_EXIT);
80 Handle<String> error; 73 Handle<String> error;
81 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error); 74 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error);
82 CHECK(node != NULL); 75 CHECK(node != NULL);
83 CHECK(error.is_null()); 76 CHECK(error.is_null());
84 SmartPointer<char> output = node->ToString(); 77 SmartPointer<char> output = node->ToString();
85 return output; 78 return output;
86 } 79 }
87 80
88 81
89 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) 82 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
90 83
91 84
92 TEST(Parser) { 85 TEST(Parser) {
93 V8::Initialize(NULL); 86 V8::Initialize(NULL);
94 CHECK_PARSE_EQ("abc", "'abc'"); 87 CHECK_PARSE_EQ("abc", "'abc'");
95 CHECK_PARSE_EQ("", "%"); 88 CHECK_PARSE_EQ("", "%");
96 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); 89 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
97 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 90 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
98 CHECK_PARSE_EQ("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])");
99 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); 91 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
100 CHECK_PARSE_EQ("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')"); 92 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
101 CHECK_PARSE_EQ("\\w|\\s|.", "(| [&w] [&s] [&.])"); 93 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
102 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); 94 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
103 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); 95 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
104 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')"); 96 CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')");
105 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')"); 97 CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')");
106 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')"); 98 CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')");
107 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')"); 99 CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')");
108 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')"); 100 CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')");
109 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')"); 101 CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')");
110 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')"); 102 CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')");
111 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')"); 103 CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')");
(...skipping 13 matching lines...) Expand all
125 CHECK_PARSE_EQ("(?=)", "(-> + %)"); 117 CHECK_PARSE_EQ("(?=)", "(-> + %)");
126 CHECK_PARSE_EQ("[]", "%"); 118 CHECK_PARSE_EQ("[]", "%");
127 CHECK_PARSE_EQ("[x]", "[x]"); 119 CHECK_PARSE_EQ("[x]", "[x]");
128 CHECK_PARSE_EQ("[xyz]", "[x y z]"); 120 CHECK_PARSE_EQ("[xyz]", "[x y z]");
129 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 121 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
130 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); 122 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
131 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); 123 CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
132 CHECK_PARSE_EQ("]", "']'"); 124 CHECK_PARSE_EQ("]", "']'");
133 CHECK_PARSE_EQ("}", "'}'"); 125 CHECK_PARSE_EQ("}", "'}'");
134 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); 126 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
135 CHECK_PARSE_EQ("[\\w]", "[&w]"); 127 CHECK_PARSE_EQ("[\\d]", "[0-9]");
136 CHECK_PARSE_EQ("[x\\wz]", "[x &w z]"); 128 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
137 CHECK_PARSE_EQ("[\\w-z]", "[&w - z]"); 129 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
138 CHECK_PARSE_EQ("[\\w-\\d]", "[&w - &d]"); 130 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
139 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'"); 131 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
140 CHECK_PARSE_EQ("\\c!", "'c!'"); 132 CHECK_PARSE_EQ("\\c!", "'c!'");
141 CHECK_PARSE_EQ("\\c_", "'c_'"); 133 CHECK_PARSE_EQ("\\c_", "'c_'");
142 CHECK_PARSE_EQ("\\c~", "'c~'"); 134 CHECK_PARSE_EQ("\\c~", "'c~'");
143 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); 135 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
144 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 136 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
145 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 137 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
146 CHECK_PARSE_EQ("\\0", "'\0'"); 138 CHECK_PARSE_EQ("\\0", "'\0'");
147 CHECK_PARSE_EQ("\\8", "'8'"); 139 CHECK_PARSE_EQ("\\8", "'8'");
148 CHECK_PARSE_EQ("\\9", "'9'"); 140 CHECK_PARSE_EQ("\\9", "'9'");
149 CHECK_PARSE_EQ("\\11", "'\t'"); 141 CHECK_PARSE_EQ("\\11", "'\t'");
150 CHECK_PARSE_EQ("\\11a", "'\ta'"); 142 CHECK_PARSE_EQ("\\11a", "'\ta'");
151 CHECK_PARSE_EQ("\\011", "'\t'"); 143 CHECK_PARSE_EQ("\\011", "'\t'");
152 CHECK_PARSE_EQ("\\00011", "'\00011'"); 144 CHECK_PARSE_EQ("\\00011", "'\00011'");
153 CHECK_PARSE_EQ("\\118", "'\t8'"); 145 CHECK_PARSE_EQ("\\118", "'\t8'");
154 CHECK_PARSE_EQ("\\111", "'I'"); 146 CHECK_PARSE_EQ("\\111", "'I'");
155 CHECK_PARSE_EQ("\\1111", "'I1'"); 147 CHECK_PARSE_EQ("\\1111", "'I1'");
156 CHECK_PARSE_EQ("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))"); 148 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
157 CHECK_PARSE_EQ("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))"); 149 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
158 CHECK_PARSE_EQ("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))"); 150 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
159 CHECK_PARSE_EQ("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')"); 151 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\x04')");
160 CHECK_PARSE_EQ("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 152 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
161 " (# 0 - g (<- 1)))"); 153 " (# 0 - g (<- 1)))");
162 CHECK_PARSE_EQ("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 154 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
163 " (# 0 - g (<- 2)))"); 155 " (# 0 - g (<- 2)))");
164 CHECK_PARSE_EQ("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 156 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
165 " (# 0 - g (<- 3)))"); 157 " (# 0 - g (<- 3)))");
166 CHECK_PARSE_EQ("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])" 158 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
167 " (# 0 - g '\x04'))"); 159 " (# 0 - g '\x04'))");
168 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10", 160 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
169 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" 161 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
170 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))"); 162 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
171 CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11", 163 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
172 "(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])" 164 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
173 " (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')"); 165 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')");
174 CHECK_PARSE_EQ("[\\0]", "[\0]"); 166 CHECK_PARSE_EQ("[\\0]", "[\0]");
175 CHECK_PARSE_EQ("[\\11]", "[\t]"); 167 CHECK_PARSE_EQ("[\\11]", "[\t]");
176 CHECK_PARSE_EQ("[\\11a]", "[\t a]"); 168 CHECK_PARSE_EQ("[\\11a]", "[\t a]");
177 CHECK_PARSE_EQ("[\\011]", "[\t]"); 169 CHECK_PARSE_EQ("[\\011]", "[\t]");
178 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]"); 170 CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
179 CHECK_PARSE_EQ("[\\118]", "[\t 8]"); 171 CHECK_PARSE_EQ("[\\118]", "[\t 8]");
180 CHECK_PARSE_EQ("[\\111]", "[I]"); 172 CHECK_PARSE_EQ("[\\111]", "[I]");
181 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); 173 CHECK_PARSE_EQ("[\\1111]", "[I 1]");
182 CHECK_PARSE_EQ("\\x34", "'\x34'"); 174 CHECK_PARSE_EQ("\\x34", "'\x34'");
183 CHECK_PARSE_EQ("\\x3z", "'x3z'"); 175 CHECK_PARSE_EQ("\\x3z", "'x3z'");
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
220 const char* kUnterminatedCharacterClass = "Unterminated character class"; 212 const char* kUnterminatedCharacterClass = "Unterminated character class";
221 ExpectError("[", kUnterminatedCharacterClass); 213 ExpectError("[", kUnterminatedCharacterClass);
222 ExpectError("[a-", kUnterminatedCharacterClass); 214 ExpectError("[a-", kUnterminatedCharacterClass);
223 const char* kIllegalCharacterClass = "Illegal character class"; 215 const char* kIllegalCharacterClass = "Illegal character class";
224 ExpectError("[a-\\w]", kIllegalCharacterClass); 216 ExpectError("[a-\\w]", kIllegalCharacterClass);
225 const char* kEndControl = "\\c at end of pattern"; 217 const char* kEndControl = "\\c at end of pattern";
226 ExpectError("\\c", kEndControl); 218 ExpectError("\\c", kEndControl);
227 } 219 }
228 220
229 221
230 static void Execute(bool expected, const char* input, const char* str) {
231 v8::HandleScope scops;
232 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
233 ZoneScope zone_scope(DELETE_ON_EXIT);
234 Handle<String> error;
235 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error);
236 CHECK(tree != NULL);
237 CHECK(error.is_null());
238 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree);
239 bool outcome = RegExpEngine::Execute(node, CStrVector(str));
240 CHECK_EQ(outcome, expected);
241 }
242
243
244 TEST(Execution) {
245 V8::Initialize(NULL);
246 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
247 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
248 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
249 }
250
251
252 TEST(Fuzz) {
253 V8::Initialize(NULL);
254 for (int i = 0; i < kCaseCount; i++) {
255 const RegExpTestCase* c = &kCases[i];
256 v8::HandleScope scope;
257 printf("%s\n", c->pattern());
258 unibrow::Utf8InputBuffer<> buffer(c->pattern(), strlen(c->pattern()));
259 ZoneScope zone_scope(DELETE_ON_EXIT);
260 Handle<String> error;
261 RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error);
262 if (c->expect_error()) {
263 CHECK(node == NULL);
264 CHECK(!error.is_null());
265 } else {
266 CHECK(node != NULL);
267 CHECK(error.is_null());
268 }
269 }
270 }
271
272
273 TEST(SingletonField) {
274 // Test all bits from 0 to 256
275 for (int i = 0; i < 256; i++) {
276 CharacterClass entry = CharacterClass::SingletonField(i);
277 for (int j = 0; j < 256; j++) {
278 CHECK_EQ(i == j, entry.Contains(j));
279 }
280 }
281 // Test upwards through the data range
282 static const uint32_t kMax = 1 << 16;
283 for (uint32_t i = 0; i < kMax; i = 1 + static_cast<uint32_t>(i * 1.2)) {
284 CharacterClass entry = CharacterClass::SingletonField(i);
285 for (uint32_t j = 0; j < kMax; j = 1 + static_cast<uint32_t>(j * 1.2)) {
286 CHECK_EQ(i == j, entry.Contains(j));
287 }
288 }
289 }
290
291
292 TEST(RangeField) {
293 // Test bitfields containing a single range.
294 for (int i = 256; i < 320; i++) {
295 for (int j = i; j < 320; j++) {
296 CharacterClass::Range range(i, j);
297 CharacterClass entry = CharacterClass::RangeField(range);
298 for (int k = 256; k < 320; k++) {
299 CHECK_EQ(i <= k && k <= j, entry.Contains(k));
300 }
301 }
302 }
303 }
304
305
306 static void TestBuiltins(CharacterClass klass, bool (pred)(uc16)) {
307 for (int i = 0; i < (1 << 16); i++)
308 CHECK_EQ(pred(i), klass.Contains(i));
309 }
310
311
312 static bool IsDigit(uc16 c) { 222 static bool IsDigit(uc16 c) {
313 return ('0' <= c && c <= '9'); 223 return ('0' <= c && c <= '9');
314 } 224 }
315 225
316 226
227 static bool NotDigit(uc16 c) {
228 return !IsDigit(c);
229 }
230
231
317 static bool IsWhiteSpace(uc16 c) { 232 static bool IsWhiteSpace(uc16 c) {
318 switch (c) { 233 switch (c) {
319 case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0: 234 case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0:
320 return true; 235 return true;
321 default: 236 default:
322 return unibrow::Space::Is(c); 237 return unibrow::Space::Is(c);
323 } 238 }
324 } 239 }
325 240
326 241
242 static bool NotWhiteSpace(uc16 c) {
243 return !IsWhiteSpace(c);
244 }
245
246
327 static bool IsWord(uc16 c) { 247 static bool IsWord(uc16 c) {
328 return ('a' <= c && c <= 'z') 248 return ('a' <= c && c <= 'z')
329 || ('A' <= c && c <= 'Z') 249 || ('A' <= c && c <= 'Z')
330 || ('0' <= c && c <= '9') 250 || ('0' <= c && c <= '9')
331 || (c == '_'); 251 || (c == '_');
332 } 252 }
333 253
334 254
335 TEST(Builtins) { 255 static bool NotWord(uc16 c) {
336 TestBuiltins(*CharacterClass::GetCharacterClass('d'), IsDigit); 256 return !IsWord(c);
337 TestBuiltins(*CharacterClass::GetCharacterClass('s'), IsWhiteSpace);
338 TestBuiltins(*CharacterClass::GetCharacterClass('w'), IsWord);
339 } 257 }
340 258
341 259
342 TEST(SimpleRanges) { 260 static bool Dot(uc16 c) {
343 // Test range classes containing a single range. 261 return true;
344 for (int i = 365; i < 730; i += 3) { 262 }
345 for (int j = i; j < 1095; j += 3) { 263
346 EmbeddedVector<CharacterClass::Range, 1> entries; 264
347 entries[0] = CharacterClass::Range(i, j); 265 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
348 CharacterClass klass = CharacterClass::Ranges(entries, NULL); 266 ZoneScope scope(DELETE_ON_EXIT);
349 for (int k = 0; k < 1095; k += 3) { 267 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
350 CHECK_EQ(i <= k && k <= j, klass.Contains(k)); 268 CharacterRange::AddClassEscape(c, ranges);
269 for (unsigned i = 0; i < (1 << 16); i++) {
270 bool in_class = false;
271 for (int j = 0; !in_class && j < ranges->length(); j++) {
272 CharacterRange& range = ranges->at(j);
273 in_class = (range.from() <= i && i <= range.to());
274 }
275 CHECK_EQ(pred(i), in_class);
276 }
277 }
278
279
280 TEST(CharacterClassEscapes) {
281 TestCharacterClassEscapes('.', Dot);
282 TestCharacterClassEscapes('d', IsDigit);
283 TestCharacterClassEscapes('D', NotDigit);
284 TestCharacterClassEscapes('s', IsWhiteSpace);
285 TestCharacterClassEscapes('S', NotWhiteSpace);
286 TestCharacterClassEscapes('w', IsWord);
287 TestCharacterClassEscapes('W', NotWord);
288 }
289
290
291 static void Execute(bool expected, const char* input, const char* str) {
292 v8::HandleScope scope;
293 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
294 ZoneScope zone_scope(DELETE_ON_EXIT);
295 Handle<String> error;
296 RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error);
297 CHECK(tree != NULL);
298 CHECK(error.is_null());
299 RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree);
300 bool outcome = RegExpEngine::Execute(node, CStrVector(str));
301 CHECK_EQ(outcome, expected);
302 }
303
304
305 TEST(Execution) {
306 V8::Initialize(NULL);
307 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
308 Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
309 Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
310 }
311
312
313 class TestConfig {
314 public:
315 typedef int Key;
316 typedef int Value;
317 static const int kNoKey;
318 static const int kNoValue;
319 static inline int Compare(int a, int b) {
320 if (a < b) return -1;
321 else if (a > b) return 1;
322 else return 0;
323 }
324 };
325
326
327 const int TestConfig::kNoKey = 0;
328 const int TestConfig::kNoValue = 0;
329
330
331 static int PseudoRandom(int i, int j) {
332 return ~(~((i * 781) ^ (j * 329)));
333 }
334
335
336 TEST(SplayTreeSimple) {
337 static const int kLimit = 1000;
338 ZoneScope zone_scope(DELETE_ON_EXIT);
339 ZoneSplayTree<TestConfig> tree;
340 std::set<int> seen;
341 #define CHECK_MAPS_EQUAL() do { \
342 for (int k = 0; k < kLimit; k++) \
343 CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \
344 } while (false)
345 for (int i = 0; i < 50; i++) {
346 for (int j = 0; j < 50; j++) {
347 int next = PseudoRandom(i, j) % kLimit;
348 if (seen.find(next) != seen.end()) {
349 // We've already seen this one. Check the value and remove
350 // it.
351 ZoneSplayTree<TestConfig>::Locator loc;
352 CHECK(tree.Find(next, &loc));
353 CHECK_EQ(next, loc.key());
354 CHECK_EQ(3 * next, loc.value());
355 tree.Remove(next);
356 seen.erase(next);
357 CHECK_MAPS_EQUAL();
358 } else {
359 // Check that it wasn't there already and then add it.
360 ZoneSplayTree<TestConfig>::Locator loc;
361 CHECK(!tree.Find(next, &loc));
362 CHECK(tree.Insert(next, &loc));
363 CHECK_EQ(next, loc.key());
364 loc.set_value(3 * next);
365 seen.insert(next);
366 CHECK_MAPS_EQUAL();
367 }
368 int val = PseudoRandom(j, i) % kLimit;
369 for (int k = val; k >= 0; k--) {
370 if (seen.find(val) != seen.end()) {
371 ZoneSplayTree<TestConfig>::Locator loc;
372 CHECK(tree.FindGreatestLessThan(val, &loc));
373 CHECK_EQ(loc.key(), val);
374 break;
375 }
376 }
377 val = PseudoRandom(i + j, i - j) % kLimit;
378 for (int k = val; k < kLimit; k++) {
379 if (seen.find(val) != seen.end()) {
380 ZoneSplayTree<TestConfig>::Locator loc;
381 CHECK(tree.FindLeastGreaterThan(val, &loc));
382 CHECK_EQ(loc.key(), val);
383 break;
384 }
351 } 385 }
352 } 386 }
353 } 387 }
354 } 388 }
355 389
356 390
357 static unsigned PseudoRandom(int i, int j) { 391 static int CompareChars(const void* ap, const void* bp) {
358 return (~((i * 781) ^ (j * 329))); 392 uc16 a = *static_cast<const uc16*>(ap);
393 uc16 b = *static_cast<const uc16*>(bp);
394 if (a < b) return -1;
395 else if (a > b) return 1;
396 else return 0;
359 } 397 }
360 398
361 399
362 // Generates pseudo-random character-class with kCount pseudo-random 400 TEST(DispatchTableConstruction) {
363 // ranges set. 401 // Initialize test data.
364 template <int kCount> 402 static const int kLimit = 1000;
365 class SimpleRangeGenerator { 403 static const int kRangeCount = 8;
366 public: 404 static const int kRangeSize = 16;
367 SimpleRangeGenerator() : i_(0) { } 405 uc16 ranges[kRangeCount][2 * kRangeSize];
368 406 for (int i = 0; i < kRangeCount; i++) {
369 // Returns the next character class and sets the ranges vector. The 407 uc16* range = ranges[i];
370 // returned value will not have any ranges that extend beyond the 'max' 408 for (int j = 0; j < 2 * kRangeSize; j++) {
371 // value. 409 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
372 CharacterClass Next(int max) {
373 for (int j = 0; j < 2 * kCount; j++) {
374 entries_[j] = PseudoRandom(i_, j) % max;
375 } 410 }
376 i_++; 411 qsort(range, 2 * kRangeSize, sizeof(uc16), CompareChars);
377 qsort(entries_.start(), 2 * kCount, sizeof(uc16), Compare);
378 for (int j = 0; j < kCount; j++) {
379 ranges_[j] = CharacterClass::Range(entries_[2 * j],
380 entries_[2 * j + 1]);
381 }
382 return CharacterClass::Ranges(ranges_, NULL);
383 } 412 }
384 413 // Enter test data into dispatch table.
385 // Returns the ranges of the last range that was returned. Note that 414 ZoneScope zone_scope(DELETE_ON_EXIT);
386 // the returned vector will be clobbered the next time Next() is called. 415 DispatchTable table;
387 Vector<CharacterClass::Range> ranges() { return ranges_; } 416 for (int i = 0; i < kRangeCount; i++) {
388 private: 417 uc16* range = ranges[i];
389 418 for (int j = 0; j < 2 * kRangeSize; j += 2)
390 static int Compare(const void* a, const void* b) { 419 table.AddRange(CharacterRange(range[j], range[j + 1]), i);
391 return *static_cast<const uc16*>(a) - *static_cast<const uc16*>(b);
392 } 420 }
393 421 // Check that the table looks as we would expect
394 int i_; 422 for (int p = 0; p < kLimit; p++) {
395 EmbeddedVector<uc16, 2 * kCount> entries_; 423 OutSet outs = table.Get(p);
396 EmbeddedVector<CharacterClass::Range, kCount> ranges_; 424 for (int j = 0; j < kRangeCount; j++) {
397 }; 425 uc16* range = ranges[j];
398
399
400 TEST(LessSimpleRanges) {
401 // Tests range character classes containing 3 pseudo-random ranges.
402 SimpleRangeGenerator<3> gen;
403 for (int i = 0; i < 1024; i++) {
404 CharacterClass klass = gen.Next(256);
405 Vector<CharacterClass::Range> entries = gen.ranges();
406 for (int j = 0; j < 256; j++) {
407 bool is_on = false; 426 bool is_on = false;
408 for (int k = 0; !is_on && k < 3; k++) 427 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
409 is_on = (entries[k].from() <= j && j <= entries[k].to()); 428 is_on = (range[k] <= p && p <= range[k + 1]);
410 CHECK_EQ(is_on, klass.Contains(j)); 429 CHECK_EQ(is_on, outs.Get(j));
411 } 430 }
412 } 431 }
413 } 432 }
414
415
416 TEST(Unions) {
417 SimpleRangeGenerator<3> gen1;
418 SimpleRangeGenerator<3> gen2;
419 for (int i = 0; i < 1024; i++) {
420 CharacterClass klass1 = gen1.Next(256);
421 CharacterClass klass2 = gen2.Next(256);
422 CharacterClass uhnion = CharacterClass::Union(&klass1, &klass2);
423 for (int j = 0; j < 256; j++)
424 CHECK_EQ(klass1.Contains(j) || klass2.Contains(j), uhnion.Contains(j));
425 }
426 }
OLDNEW
« src/jsregexp.cc ('K') | « src/parser.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698