Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | |
|
Erik Corry
2008/11/25 11:03:52
Correct year?
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed
| |
| 2 // Redistribution and use in source and binary forms, with or without | |
| 3 // modification, are permitted provided that the following conditions are | |
| 4 // met: | |
| 5 // | |
| 6 // * Redistributions of source code must retain the above copyright | |
| 7 // notice, this list of conditions and the following disclaimer. | |
| 8 // * Redistributions in binary form must reproduce the above | |
| 9 // copyright notice, this list of conditions and the following | |
| 10 // disclaimer in the documentation and/or other materials provided | |
| 11 // with the distribution. | |
| 12 // * Neither the name of Google Inc. nor the names of its | |
| 13 // contributors may be used to endorse or promote products derived | |
| 14 // from this software without specific prior written permission. | |
| 15 // | |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 | |
| 29 #include <stdlib.h> | |
| 30 #include <set> | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Please remove. We are not using <set>, right?
Christian Plesner Hansen
2008/11/26 06:49:56
Yes we are.
| |
| 31 | |
| 32 #include "v8.h" | |
| 33 | |
| 34 #include "cctest.h" | |
| 35 #include "zone-inl.h" | |
| 36 #include "parser.h" | |
| 37 #include "ast.h" | |
| 38 #include "jsregexp-inl.h" | |
| 39 #include "assembler-irregexp.h" | |
| 40 #include "regexp-macro-assembler.h" | |
| 41 #include "regexp-macro-assembler-irregexp.h" | |
| 42 #include "regexp-macro-assembler-ia32.h" | |
| 43 #include "interpreter-irregexp.h" | |
| 44 | |
| 45 | |
| 46 using namespace v8::internal; | |
| 47 | |
| 48 | |
| 49 static SmartPointer<const char> Parse(const char* input) { | |
| 50 v8::HandleScope scope; | |
| 51 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 52 FlatStringReader reader(CStrVector(input)); | |
| 53 RegExpParseResult result; | |
| 54 CHECK(v8::internal::ParseRegExp(&reader, &result)); | |
| 55 CHECK(result.tree != NULL); | |
| 56 CHECK(result.error.is_null()); | |
| 57 SmartPointer<const char> output = result.tree->ToString(); | |
| 58 return output; | |
| 59 } | |
| 60 | |
| 61 static bool ParseEscapes(const char* input) { | |
| 62 v8::HandleScope scope; | |
| 63 unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); | |
| 64 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 65 FlatStringReader reader(CStrVector(input)); | |
| 66 RegExpParseResult result; | |
| 67 CHECK(v8::internal::ParseRegExp(&reader, &result)); | |
| 68 CHECK(result.tree != NULL); | |
| 69 CHECK(result.error.is_null()); | |
| 70 return result.has_character_escapes; | |
| 71 } | |
| 72 | |
| 73 | |
| 74 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) | |
| 75 #define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \ | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
How about:
#define CHECK_ESCAPES(input, has_escap
| |
| 76 ParseEscapes(input)); | |
| 77 | |
| 78 TEST(Parser) { | |
| 79 V8::Initialize(NULL); | |
| 80 CHECK_PARSE_EQ("abc", "'abc'"); | |
| 81 CHECK_PARSE_EQ("", "%"); | |
| 82 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); | |
| 83 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); | |
| 84 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); | |
| 85 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); | |
| 86 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); | |
| 87 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); | |
| 88 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); | |
| 89 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))"); | |
| 90 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))"); | |
| 91 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))"); | |
| 92 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))"); | |
| 93 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))"); | |
| 94 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))"); | |
| 95 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))"); | |
| 96 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))"); | |
| 97 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))"); | |
| 98 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))"); | |
| 99 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))"); | |
| 100 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))"); | |
| 101 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'"); | |
| 102 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')"); | |
| 103 CHECK_PARSE_EQ("(?:foo)", "'foo'"); | |
| 104 CHECK_PARSE_EQ("(?: foo )", "' foo '"); | |
| 105 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); | |
| 106 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); | |
| 107 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); | |
| 108 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); | |
| 109 CHECK_PARSE_EQ("()", "(^ %)"); | |
| 110 CHECK_PARSE_EQ("(?=)", "(-> + %)"); | |
| 111 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows | |
| 112 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252 | |
| 113 CHECK_PARSE_EQ("[x]", "[x]"); | |
| 114 CHECK_PARSE_EQ("[xyz]", "[x y z]"); | |
| 115 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); | |
| 116 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); | |
| 117 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); | |
| 118 CHECK_PARSE_EQ("]", "']'"); | |
| 119 CHECK_PARSE_EQ("}", "'}'"); | |
| 120 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); | |
| 121 CHECK_PARSE_EQ("[\\d]", "[0-9]"); | |
| 122 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]"); | |
| 123 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]"); | |
| 124 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]"); | |
| 125 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", | |
| 126 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); | |
| 127 CHECK_PARSE_EQ("\\c!", "'c!'"); | |
| 128 CHECK_PARSE_EQ("\\c_", "'c_'"); | |
| 129 CHECK_PARSE_EQ("\\c~", "'c~'"); | |
| 130 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); | |
| 131 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); | |
| 132 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); | |
| 133 CHECK_PARSE_EQ("\\0", "'\\x00'"); | |
| 134 CHECK_PARSE_EQ("\\8", "'8'"); | |
| 135 CHECK_PARSE_EQ("\\9", "'9'"); | |
| 136 CHECK_PARSE_EQ("\\11", "'\\x09'"); | |
| 137 CHECK_PARSE_EQ("\\11a", "'\\x09a'"); | |
| 138 CHECK_PARSE_EQ("\\011", "'\\x09'"); | |
| 139 CHECK_PARSE_EQ("\\00011", "'\\x0011'"); | |
| 140 CHECK_PARSE_EQ("\\118", "'\\x098'"); | |
| 141 CHECK_PARSE_EQ("\\111", "'I'"); | |
| 142 CHECK_PARSE_EQ("\\1111", "'I1'"); | |
| 143 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); | |
| 144 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); | |
| 145 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); | |
| 146 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')"); | |
| 147 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')" | |
| 148 " (# 0 - g (<- 1)))"); | |
| 149 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')" | |
| 150 " (# 0 - g (<- 2)))"); | |
| 151 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')" | |
| 152 " (# 0 - g (<- 3)))"); | |
| 153 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')" | |
| 154 " (# 0 - g '\\x04'))"); | |
| 155 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", | |
| 156 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" | |
| 157 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); | |
| 158 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", | |
| 159 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" | |
| 160 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')"); | |
| 161 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))"); | |
| 162 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')"); | |
| 163 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')"); | |
| 164 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')"); | |
| 165 CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))"); | |
| 166 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))"); | |
| 167 CHECK_PARSE_EQ("[\\0]", "[\\x00]"); | |
| 168 CHECK_PARSE_EQ("[\\11]", "[\\x09]"); | |
| 169 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]"); | |
| 170 CHECK_PARSE_EQ("[\\011]", "[\\x09]"); | |
| 171 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]"); | |
| 172 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]"); | |
| 173 CHECK_PARSE_EQ("[\\111]", "[I]"); | |
| 174 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); | |
| 175 CHECK_PARSE_EQ("\\x34", "'\x34'"); | |
| 176 CHECK_PARSE_EQ("\\x60", "'\x60'"); | |
| 177 CHECK_PARSE_EQ("\\x3z", "'x3z'"); | |
| 178 CHECK_PARSE_EQ("\\u0034", "'\x34'"); | |
| 179 CHECK_PARSE_EQ("\\u003z", "'u003z'"); | |
| 180 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))"); | |
| 181 | |
| 182 CHECK_ESCAPES("a", false); | |
| 183 CHECK_ESCAPES("a|b", false); | |
| 184 CHECK_ESCAPES("a\\n", true); | |
| 185 CHECK_ESCAPES("^a", false); | |
| 186 CHECK_ESCAPES("a$", false); | |
| 187 CHECK_ESCAPES("a\\b!", false); | |
| 188 CHECK_ESCAPES("a\\Bb", false); | |
| 189 CHECK_ESCAPES("a*", false); | |
| 190 CHECK_ESCAPES("a*?", false); | |
| 191 CHECK_ESCAPES("a?", false); | |
| 192 CHECK_ESCAPES("a??", false); | |
| 193 CHECK_ESCAPES("a{0,1}?", false); | |
| 194 CHECK_ESCAPES("a{1,1}?", false); | |
| 195 CHECK_ESCAPES("a{1,2}?", false); | |
| 196 CHECK_ESCAPES("a+?", false); | |
| 197 CHECK_ESCAPES("(a)", false); | |
| 198 CHECK_ESCAPES("(a)\\1", false); | |
| 199 CHECK_ESCAPES("(\\1a)", false); | |
| 200 CHECK_ESCAPES("\\1(a)", false); | |
| 201 CHECK_ESCAPES("a\\s", false); | |
| 202 CHECK_ESCAPES("a\\S", false); | |
| 203 CHECK_ESCAPES("a\\d", false); | |
| 204 CHECK_ESCAPES("a\\D", false); | |
| 205 CHECK_ESCAPES("a\\w", false); | |
| 206 CHECK_ESCAPES("a\\W", false); | |
| 207 CHECK_ESCAPES("a.", false); | |
| 208 CHECK_ESCAPES("a\\q", true); | |
| 209 CHECK_ESCAPES("a[a]", false); | |
| 210 CHECK_ESCAPES("a[^a]", false); | |
| 211 CHECK_ESCAPES("a[a-z]", false); | |
| 212 CHECK_ESCAPES("a[\\q]", false); | |
| 213 CHECK_ESCAPES("a(?:b)", false); | |
| 214 CHECK_ESCAPES("a(?=b)", false); | |
| 215 CHECK_ESCAPES("a(?!b)", false); | |
| 216 CHECK_ESCAPES("\\x60", true); | |
| 217 CHECK_ESCAPES("\\u0060", true); | |
| 218 CHECK_ESCAPES("\\cA", true); | |
| 219 CHECK_ESCAPES("\\q", true); | |
| 220 CHECK_ESCAPES("\\1112", true); | |
| 221 CHECK_ESCAPES("\\0", true); | |
| 222 CHECK_ESCAPES("(a)\\1", false); | |
| 223 | |
| 224 CHECK_PARSE_EQ("a{}", "'a{}'"); | |
| 225 CHECK_PARSE_EQ("a{,}", "'a{,}'"); | |
| 226 CHECK_PARSE_EQ("a{", "'a{'"); | |
| 227 CHECK_PARSE_EQ("a{z}", "'a{z}'"); | |
| 228 CHECK_PARSE_EQ("a{1z}", "'a{1z}'"); | |
| 229 CHECK_PARSE_EQ("a{12z}", "'a{12z}'"); | |
| 230 CHECK_PARSE_EQ("a{12,", "'a{12,'"); | |
| 231 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'"); | |
| 232 CHECK_PARSE_EQ("{}", "'{}'"); | |
| 233 CHECK_PARSE_EQ("{,}", "'{,}'"); | |
| 234 CHECK_PARSE_EQ("{", "'{'"); | |
| 235 CHECK_PARSE_EQ("{z}", "'{z}'"); | |
| 236 CHECK_PARSE_EQ("{1z}", "'{1z}'"); | |
| 237 CHECK_PARSE_EQ("{12z}", "'{12z}'"); | |
| 238 CHECK_PARSE_EQ("{12,", "'{12,'"); | |
| 239 CHECK_PARSE_EQ("{12,3b", "'{12,3b'"); | |
| 240 } | |
| 241 | |
| 242 TEST(ParserRegression) { | |
| 243 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])"); | |
| 244 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); | |
| 245 CHECK_PARSE_EQ("{", "'{'"); | |
| 246 CHECK_PARSE_EQ("a|", "(| 'a' %)"); | |
| 247 } | |
| 248 | |
| 249 static void ExpectError(const char* input, | |
| 250 const char* expected) { | |
| 251 v8::HandleScope scope; | |
| 252 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 253 FlatStringReader reader(CStrVector(input)); | |
| 254 RegExpParseResult result; | |
| 255 CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result)); | |
| 256 CHECK(result.tree == NULL); | |
| 257 CHECK(!result.error.is_null()); | |
| 258 SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS); | |
| 259 CHECK_EQ(expected, *str); | |
| 260 } | |
| 261 | |
| 262 | |
| 263 TEST(Errors) { | |
| 264 V8::Initialize(NULL); | |
| 265 const char* kEndBackslash = "\\ at end of pattern"; | |
| 266 ExpectError("\\", kEndBackslash); | |
| 267 const char* kUnterminatedGroup = "Unterminated group"; | |
| 268 ExpectError("(foo", kUnterminatedGroup); | |
| 269 const char* kInvalidGroup = "Invalid group"; | |
| 270 ExpectError("(?", kInvalidGroup); | |
| 271 const char* kUnterminatedCharacterClass = "Unterminated character class"; | |
| 272 ExpectError("[", kUnterminatedCharacterClass); | |
| 273 ExpectError("[a-", kUnterminatedCharacterClass); | |
| 274 const char* kIllegalCharacterClass = "Illegal character class"; | |
| 275 ExpectError("[a-\\w]", kIllegalCharacterClass); | |
| 276 const char* kEndControl = "\\c at end of pattern"; | |
| 277 ExpectError("\\c", kEndControl); | |
| 278 const char* kNothingToRepeat = "Nothing to repeat"; | |
| 279 ExpectError("*", kNothingToRepeat); | |
| 280 ExpectError("?", kNothingToRepeat); | |
| 281 ExpectError("+", kNothingToRepeat); | |
| 282 ExpectError("{1}", kNothingToRepeat); | |
| 283 ExpectError("{1,2}", kNothingToRepeat); | |
| 284 ExpectError("{1,}", kNothingToRepeat); | |
| 285 } | |
| 286 | |
| 287 | |
| 288 static bool IsDigit(uc16 c) { | |
| 289 return ('0' <= c && c <= '9'); | |
| 290 } | |
| 291 | |
| 292 | |
| 293 static bool NotDigit(uc16 c) { | |
| 294 return !IsDigit(c); | |
| 295 } | |
| 296 | |
| 297 | |
| 298 static bool IsWhiteSpace(uc16 c) { | |
| 299 switch (c) { | |
| 300 case 0x09: | |
| 301 case 0x0A: | |
| 302 case 0x0B: | |
| 303 case 0x0C: | |
| 304 case 0x0d: | |
| 305 case 0x20: | |
| 306 case 0xA0: | |
| 307 case 0x2028: | |
| 308 case 0x2029: | |
| 309 return true; | |
| 310 default: | |
| 311 return unibrow::Space::Is(c); | |
| 312 } | |
| 313 } | |
| 314 | |
| 315 | |
| 316 static bool NotWhiteSpace(uc16 c) { | |
| 317 return !IsWhiteSpace(c); | |
| 318 } | |
| 319 | |
| 320 | |
| 321 static bool IsWord(uc16 c) { | |
| 322 return ('a' <= c && c <= 'z') | |
| 323 || ('A' <= c && c <= 'Z') | |
| 324 || ('0' <= c && c <= '9') | |
| 325 || (c == '_'); | |
| 326 } | |
| 327 | |
| 328 | |
| 329 static bool NotWord(uc16 c) { | |
| 330 return !IsWord(c); | |
| 331 } | |
| 332 | |
| 333 | |
| 334 static bool Dot(uc16 c) { | |
| 335 switch (c) { | |
| 336 // CR LF LS PS | |
| 337 case 0x000A: case 0x000D: case 0x2028: case 0x2029: | |
| 338 return false; | |
| 339 default: | |
| 340 return true; | |
| 341 } | |
| 342 } | |
| 343 | |
| 344 | |
| 345 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { | |
| 346 ZoneScope scope(DELETE_ON_EXIT); | |
| 347 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 348 CharacterRange::AddClassEscape(c, ranges); | |
| 349 for (unsigned i = 0; i < (1 << 16); i++) { | |
| 350 bool in_class = false; | |
| 351 for (int j = 0; !in_class && j < ranges->length(); j++) { | |
| 352 CharacterRange& range = ranges->at(j); | |
| 353 in_class = (range.from() <= i && i <= range.to()); | |
| 354 } | |
| 355 CHECK_EQ(pred(i), in_class); | |
| 356 } | |
| 357 } | |
| 358 | |
| 359 | |
| 360 TEST(CharacterClassEscapes) { | |
| 361 TestCharacterClassEscapes('.', Dot); | |
| 362 TestCharacterClassEscapes('d', IsDigit); | |
| 363 TestCharacterClassEscapes('D', NotDigit); | |
| 364 TestCharacterClassEscapes('s', IsWhiteSpace); | |
| 365 TestCharacterClassEscapes('S', NotWhiteSpace); | |
| 366 TestCharacterClassEscapes('w', IsWord); | |
| 367 TestCharacterClassEscapes('W', NotWord); | |
| 368 } | |
| 369 | |
| 370 | |
| 371 static RegExpNode* Compile(const char* input) { | |
| 372 FlatStringReader reader(CStrVector(input)); | |
| 373 RegExpParseResult result; | |
| 374 if (!v8::internal::ParseRegExp(&reader, &result)) | |
| 375 return NULL; | |
| 376 RegExpNode* node = NULL; | |
| 377 RegExpEngine::Compile(&result, &node, false); | |
| 378 return node; | |
| 379 } | |
| 380 | |
| 381 | |
| 382 static void Execute(const char* input, | |
| 383 const char* str, | |
| 384 bool dot_output = false) { | |
| 385 v8::HandleScope scope; | |
| 386 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 387 RegExpNode* node = Compile(input); | |
| 388 USE(node); | |
| 389 #ifdef DEBUG | |
| 390 if (dot_output) { | |
| 391 RegExpEngine::DotPrint(input, node); | |
| 392 exit(0); | |
| 393 } | |
| 394 #endif // DEBUG | |
| 395 } | |
| 396 | |
| 397 | |
| 398 TEST(Execution) { | |
| 399 V8::Initialize(NULL); | |
| 400 Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh"); | |
| 401 Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh"); | |
| 402 Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd"); | |
| 403 } | |
| 404 | |
| 405 | |
| 406 class TestConfig { | |
| 407 public: | |
| 408 typedef int Key; | |
| 409 typedef int Value; | |
| 410 static const int kNoKey; | |
| 411 static const int kNoValue; | |
| 412 static inline int Compare(int a, int b) { | |
| 413 if (a < b) | |
| 414 return -1; | |
| 415 else if (a > b) | |
| 416 return 1; | |
| 417 else | |
| 418 return 0; | |
| 419 } | |
| 420 }; | |
| 421 | |
| 422 | |
| 423 const int TestConfig::kNoKey = 0; | |
| 424 const int TestConfig::kNoValue = 0; | |
| 425 | |
| 426 | |
| 427 static int PseudoRandom(int i, int j) { | |
| 428 return ~(~((i * 781) ^ (j * 329))); | |
| 429 } | |
| 430 | |
| 431 | |
| 432 TEST(SplayTreeSimple) { | |
| 433 static const int kLimit = 1000; | |
| 434 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 435 ZoneSplayTree<TestConfig> tree; | |
| 436 std::set<int> seen; | |
| 437 #define CHECK_MAPS_EQUAL() do { \ | |
| 438 for (int k = 0; k < kLimit; k++) \ | |
| 439 CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \ | |
| 440 } while (false) | |
| 441 for (int i = 0; i < 50; i++) { | |
| 442 for (int j = 0; j < 50; j++) { | |
| 443 int next = PseudoRandom(i, j) % kLimit; | |
| 444 if (seen.find(next) != seen.end()) { | |
| 445 // We've already seen this one. Check the value and remove | |
| 446 // it. | |
| 447 ZoneSplayTree<TestConfig>::Locator loc; | |
| 448 CHECK(tree.Find(next, &loc)); | |
| 449 CHECK_EQ(next, loc.key()); | |
| 450 CHECK_EQ(3 * next, loc.value()); | |
| 451 tree.Remove(next); | |
| 452 seen.erase(next); | |
| 453 CHECK_MAPS_EQUAL(); | |
| 454 } else { | |
| 455 // Check that it wasn't there already and then add it. | |
| 456 ZoneSplayTree<TestConfig>::Locator loc; | |
| 457 CHECK(!tree.Find(next, &loc)); | |
| 458 CHECK(tree.Insert(next, &loc)); | |
| 459 CHECK_EQ(next, loc.key()); | |
| 460 loc.set_value(3 * next); | |
| 461 seen.insert(next); | |
| 462 CHECK_MAPS_EQUAL(); | |
| 463 } | |
| 464 int val = PseudoRandom(j, i) % kLimit; | |
| 465 for (int k = val; k >= 0; k--) { | |
| 466 if (seen.find(val) != seen.end()) { | |
| 467 ZoneSplayTree<TestConfig>::Locator loc; | |
| 468 CHECK(tree.FindGreatestLessThan(val, &loc)); | |
| 469 CHECK_EQ(loc.key(), val); | |
| 470 break; | |
| 471 } | |
| 472 } | |
| 473 val = PseudoRandom(i + j, i - j) % kLimit; | |
| 474 for (int k = val; k < kLimit; k++) { | |
| 475 if (seen.find(val) != seen.end()) { | |
| 476 ZoneSplayTree<TestConfig>::Locator loc; | |
| 477 CHECK(tree.FindLeastGreaterThan(val, &loc)); | |
| 478 CHECK_EQ(loc.key(), val); | |
| 479 break; | |
| 480 } | |
| 481 } | |
| 482 } | |
| 483 } | |
| 484 } | |
| 485 | |
| 486 | |
| 487 TEST(DispatchTableConstruction) { | |
| 488 // Initialize test data. | |
| 489 static const int kLimit = 1000; | |
| 490 static const int kRangeCount = 8; | |
| 491 static const int kRangeSize = 16; | |
| 492 uc16 ranges[kRangeCount][2 * kRangeSize]; | |
| 493 for (int i = 0; i < kRangeCount; i++) { | |
| 494 Vector<uc16> range(ranges[i], 2 * kRangeSize); | |
| 495 for (int j = 0; j < 2 * kRangeSize; j++) { | |
| 496 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; | |
| 497 } | |
| 498 range.Sort(); | |
| 499 for (int j = 1; j < 2 * kRangeSize; j++) { | |
| 500 CHECK(range[j-1] <= range[j]); | |
| 501 } | |
| 502 } | |
| 503 // Enter test data into dispatch table. | |
| 504 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 505 DispatchTable table; | |
| 506 for (int i = 0; i < kRangeCount; i++) { | |
| 507 uc16* range = ranges[i]; | |
| 508 for (int j = 0; j < 2 * kRangeSize; j += 2) | |
| 509 table.AddRange(CharacterRange(range[j], range[j + 1]), i); | |
| 510 } | |
| 511 // Check that the table looks as we would expect | |
| 512 for (int p = 0; p < kLimit; p++) { | |
| 513 OutSet* outs = table.Get(p); | |
| 514 for (int j = 0; j < kRangeCount; j++) { | |
| 515 uc16* range = ranges[j]; | |
| 516 bool is_on = false; | |
| 517 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) | |
| 518 is_on = (range[k] <= p && p <= range[k + 1]); | |
| 519 CHECK_EQ(is_on, outs->Get(j)); | |
| 520 } | |
| 521 } | |
| 522 } | |
| 523 | |
| 524 | |
| 525 TEST(Assembler) { | |
| 526 V8::Initialize(NULL); | |
| 527 byte codes[1024]; | |
| 528 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); | |
| 529 #define __ assembler. | |
| 530 Label advance; | |
| 531 Label look_for_foo; | |
| 532 Label fail; | |
| 533 __ GoTo(&look_for_foo); | |
| 534 __ Bind(&advance); | |
| 535 __ AdvanceCP(1); | |
| 536 __ Bind(&look_for_foo); | |
| 537 __ LoadCurrentChar(0, &fail); | |
| 538 __ CheckNotCharacter('f', &advance); | |
| 539 __ LoadCurrentChar(1, &fail); | |
| 540 __ CheckNotCharacter('o', &advance); | |
| 541 __ LoadCurrentChar(2, &fail); | |
| 542 __ CheckNotCharacter('o', &advance); | |
| 543 __ WriteCurrentPositionToRegister(0); | |
| 544 __ WriteCurrentPositionToRegister(1, 2); | |
| 545 __ Succeed(); | |
| 546 __ Bind(&fail); | |
| 547 __ Fail(); | |
| 548 | |
| 549 v8::HandleScope scope; | |
| 550 Handle<ByteArray> array = Factory::NewByteArray(assembler.length()); | |
| 551 assembler.Copy(array->GetDataStartAddress()); | |
| 552 int captures[2]; | |
| 553 | |
| 554 Handle<String> f1 = | |
| 555 Factory::NewStringFromAscii(CStrVector("Now is the time")); | |
| 556 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1); | |
| 557 CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0)); | |
| 558 | |
| 559 Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz")); | |
| 560 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2); | |
| 561 CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0)); | |
| 562 CHECK_EQ(0, captures[0]); | |
| 563 CHECK_EQ(2, captures[1]); | |
| 564 | |
| 565 Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery")); | |
| 566 Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3); | |
| 567 CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0)); | |
| 568 CHECK_EQ(3, captures[0]); | |
| 569 CHECK_EQ(5, captures[1]); | |
| 570 } | |
| 571 | |
| 572 | |
| 573 TEST(Assembler2) { | |
| 574 V8::Initialize(NULL); | |
| 575 byte codes[1024]; | |
| 576 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); | |
| 577 #define __ assembler. | |
| 578 // /^.*foo/ | |
| 579 Label more_dots; | |
| 580 Label unwind_dot; | |
| 581 Label failure; | |
| 582 Label foo; | |
| 583 Label foo_failed; | |
| 584 Label dot_match; | |
| 585 // ^ | |
| 586 __ PushCurrentPosition(); | |
| 587 __ PushRegister(0); | |
| 588 __ WriteCurrentPositionToRegister(0); | |
| 589 __ PushBacktrack(&failure); | |
| 590 __ GoTo(&dot_match); | |
| 591 // .* | |
| 592 __ Bind(&more_dots); | |
| 593 __ AdvanceCP(1); | |
| 594 __ Bind(&dot_match); | |
| 595 __ PushCurrentPosition(); | |
| 596 __ PushBacktrack(&unwind_dot); | |
| 597 __ LoadCurrentChar(0, &foo); | |
| 598 __ CheckNotCharacter('\n', &more_dots); | |
| 599 // foo | |
| 600 __ Bind(&foo); | |
| 601 __ CheckNotCharacter('f', &foo_failed); | |
| 602 __ LoadCurrentChar(1, &foo_failed); | |
| 603 __ CheckNotCharacter('o', &foo_failed); | |
| 604 __ LoadCurrentChar(2, &foo_failed); | |
| 605 __ CheckNotCharacter('o', &foo_failed); | |
| 606 __ WriteCurrentPositionToRegister(1, 2); | |
| 607 __ Succeed(); | |
| 608 __ Break(); | |
| 609 | |
| 610 __ Bind(&foo_failed); | |
| 611 __ PopBacktrack(); | |
| 612 __ Break(); | |
| 613 | |
| 614 __ Bind(&unwind_dot); | |
| 615 __ PopCurrentPosition(); | |
| 616 __ LoadCurrentChar(0, &foo_failed); | |
| 617 __ GoTo(&foo); | |
| 618 | |
| 619 __ Bind(&failure); | |
| 620 __ PopRegister(0); | |
| 621 __ PopCurrentPosition(); | |
| 622 __ Fail(); | |
| 623 | |
| 624 v8::HandleScope scope; | |
| 625 Handle<ByteArray> array = Factory::NewByteArray(assembler.length()); | |
| 626 assembler.Copy(array->GetDataStartAddress()); | |
| 627 int captures[2]; | |
| 628 | |
| 629 Handle<String> f1 = | |
| 630 Factory::NewStringFromAscii(CStrVector("Now is the time")); | |
| 631 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1); | |
| 632 CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0)); | |
| 633 | |
| 634 Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz")); | |
| 635 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2); | |
| 636 CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0)); | |
| 637 CHECK_EQ(0, captures[0]); | |
| 638 CHECK_EQ(2, captures[1]); | |
| 639 | |
| 640 Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery")); | |
| 641 Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3); | |
| 642 CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0)); | |
| 643 CHECK_EQ(0, captures[0]); | |
| 644 CHECK_EQ(5, captures[1]); | |
| 645 | |
| 646 Handle<String> f4 = | |
| 647 Factory::NewStringFromAscii(CStrVector("football buffoonery")); | |
| 648 Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4); | |
| 649 CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0)); | |
| 650 CHECK_EQ(0, captures[0]); | |
| 651 CHECK_EQ(14, captures[1]); | |
| 652 | |
| 653 Handle<String> f5 = | |
| 654 Factory::NewStringFromAscii(CStrVector("walking\nbarefoot")); | |
| 655 Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5); | |
| 656 CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0)); | |
| 657 } | |
| 658 | |
| 659 | |
| 660 TEST(MacroAssembler) { | |
| 661 V8::Initialize(NULL); | |
| 662 byte codes[1024]; | |
| 663 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); | |
| 664 RegExpMacroAssemblerIrregexp m(&assembler); | |
| 665 // ^f(o)o. | |
| 666 Label fail, fail2, start; | |
| 667 uc16 foo_chars[3]; | |
| 668 foo_chars[0] = 'f'; | |
| 669 foo_chars[1] = 'o'; | |
| 670 foo_chars[2] = 'o'; | |
| 671 Vector<const uc16> foo(foo_chars, 3); | |
| 672 m.SetRegister(4, 42); | |
| 673 m.PushRegister(4); | |
| 674 m.AdvanceRegister(4, 42); | |
| 675 m.GoTo(&start); | |
| 676 m.Fail(); | |
| 677 m.Bind(&start); | |
| 678 m.PushBacktrack(&fail2); | |
| 679 m.CheckCharacters(foo, 0, &fail); | |
| 680 m.WriteCurrentPositionToRegister(0); | |
| 681 m.PushCurrentPosition(); | |
| 682 m.AdvanceCurrentPosition(3); | |
| 683 m.WriteCurrentPositionToRegister(1); | |
| 684 m.PopCurrentPosition(); | |
| 685 m.AdvanceCurrentPosition(1); | |
| 686 m.WriteCurrentPositionToRegister(2); | |
| 687 m.AdvanceCurrentPosition(1); | |
| 688 m.WriteCurrentPositionToRegister(3); | |
| 689 m.Succeed(); | |
| 690 | |
| 691 m.Bind(&fail); | |
| 692 m.Backtrack(); | |
| 693 m.Succeed(); | |
| 694 | |
| 695 m.Bind(&fail2); | |
| 696 m.PopRegister(0); | |
| 697 m.Fail(); | |
| 698 | |
| 699 v8::HandleScope scope; | |
| 700 | |
| 701 Handle<ByteArray> array = Factory::NewByteArray(assembler.length()); | |
| 702 assembler.Copy(array->GetDataStartAddress()); | |
| 703 int captures[5]; | |
| 704 | |
| 705 Handle<String> f1 = | |
| 706 Factory::NewStringFromAscii(CStrVector("foobar")); | |
| 707 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1); | |
| 708 CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0)); | |
| 709 CHECK_EQ(0, captures[0]); | |
| 710 CHECK_EQ(3, captures[1]); | |
| 711 CHECK_EQ(1, captures[2]); | |
| 712 CHECK_EQ(2, captures[3]); | |
| 713 CHECK_EQ(84, captures[4]); | |
| 714 | |
| 715 Handle<String> f2 = | |
| 716 Factory::NewStringFromAscii(CStrVector("barfoo")); | |
| 717 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2); | |
| 718 CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0)); | |
| 719 CHECK_EQ(42, captures[0]); | |
| 720 } | |
| 721 | |
| 722 | |
| 723 TEST(AddInverseToTable) { | |
| 724 static const int kLimit = 1000; | |
| 725 static const int kRangeCount = 16; | |
| 726 for (int t = 0; t < 10; t++) { | |
| 727 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 728 ZoneList<CharacterRange>* ranges = | |
| 729 new ZoneList<CharacterRange>(kRangeCount); | |
| 730 for (int i = 0; i < kRangeCount; i++) { | |
| 731 int from = PseudoRandom(t + 87, i + 25) % kLimit; | |
| 732 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); | |
| 733 if (to > kLimit) to = kLimit; | |
| 734 ranges->Add(CharacterRange(from, to)); | |
| 735 } | |
| 736 DispatchTable table; | |
| 737 DispatchTableConstructor cons(&table); | |
| 738 cons.set_choice_index(0); | |
| 739 cons.AddInverse(ranges); | |
| 740 for (int i = 0; i < kLimit; i++) { | |
| 741 bool is_on = false; | |
| 742 for (int j = 0; !is_on && j < kRangeCount; j++) | |
| 743 is_on = ranges->at(j).Contains(i); | |
| 744 OutSet* set = table.Get(i); | |
| 745 CHECK_EQ(is_on, set->Get(0) == false); | |
| 746 } | |
| 747 } | |
| 748 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 749 ZoneList<CharacterRange>* ranges = | |
| 750 new ZoneList<CharacterRange>(1); | |
| 751 ranges->Add(CharacterRange(0xFFF0, 0xFFFE)); | |
| 752 DispatchTable table; | |
| 753 DispatchTableConstructor cons(&table); | |
| 754 cons.set_choice_index(0); | |
| 755 cons.AddInverse(ranges); | |
| 756 CHECK(!table.Get(0xFFFE)->Get(0)); | |
| 757 CHECK(table.Get(0xFFFF)->Get(0)); | |
| 758 } | |
| 759 | |
| 760 | |
| 761 static uc32 canonicalize(uc32 c) { | |
| 762 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; | |
| 763 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); | |
| 764 if (count == 0) { | |
| 765 return c; | |
| 766 } else { | |
| 767 CHECK_EQ(1, count); | |
| 768 return canon[0]; | |
| 769 } | |
| 770 } | |
| 771 | |
| 772 | |
| 773 TEST(LatinCanonicalize) { | |
| 774 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; | |
| 775 for (char lower = 'a'; lower <= 'z'; lower++) { | |
| 776 char upper = lower + ('A' - 'a'); | |
| 777 CHECK_EQ(canonicalize(lower), canonicalize(upper)); | |
| 778 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 779 int length = un_canonicalize.get(lower, '\0', uncanon); | |
| 780 CHECK_EQ(2, length); | |
| 781 CHECK_EQ(upper, uncanon[0]); | |
| 782 CHECK_EQ(lower, uncanon[1]); | |
| 783 } | |
| 784 for (uc32 c = 128; c < (1 << 21); c++) | |
| 785 CHECK_GE(canonicalize(c), 128); | |
| 786 unibrow::Mapping<unibrow::ToUppercase> to_upper; | |
| 787 for (uc32 c = 0; c < (1 << 21); c++) { | |
| 788 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; | |
| 789 int length = to_upper.get(c, '\0', upper); | |
| 790 if (length == 0) { | |
| 791 length = 1; | |
| 792 upper[0] = c; | |
| 793 } | |
| 794 uc32 u = upper[0]; | |
| 795 if (length > 1 || (c >= 128 && u < 128)) | |
| 796 u = c; | |
| 797 CHECK_EQ(u, canonicalize(c)); | |
| 798 } | |
| 799 } | |
| 800 | |
| 801 | |
| 802 TEST(SimplePropagation) { | |
| 803 v8::HandleScope scope; | |
| 804 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 805 RegExpNode* node = Compile("(a|^b|c)"); | |
| 806 CHECK(node->info()->determine_start); | |
| 807 } | |
| 808 | |
| 809 | |
| 810 static uc32 CanonRange(uc32 c) { | |
| 811 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; | |
| 812 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); | |
| 813 if (count == 0) { | |
| 814 return c; | |
| 815 } else { | |
| 816 CHECK_EQ(1, count); | |
| 817 return canon[0]; | |
| 818 } | |
| 819 } | |
| 820 | |
| 821 | |
| 822 TEST(RangeCanonicalization) { | |
| 823 ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0); | |
| 824 // Check that we arrive at the same result when using the basic | |
| 825 // range canonicalization primitives as when using immediate | |
| 826 // canonicalization. | |
| 827 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; | |
| 828 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) { | |
| 829 int range = CanonRange(i); | |
| 830 int indirect_length = 0; | |
| 831 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 832 if ((range & CharacterRange::kStartMarker) == 0) { | |
| 833 indirect_length = un_canonicalize.get(i - range, '\0', indirect); | |
| 834 for (int i = 0; i < indirect_length; i++) | |
| 835 indirect[i] += range; | |
| 836 } else { | |
| 837 indirect_length = un_canonicalize.get(i, '\0', indirect); | |
| 838 } | |
| 839 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 840 int direct_length = un_canonicalize.get(i, '\0', direct); | |
| 841 CHECK_EQ(direct_length, indirect_length); | |
| 842 } | |
| 843 // Check that we arrive at the same results when skipping over | |
| 844 // canonicalization ranges. | |
| 845 int next_block = 0; | |
| 846 while (next_block < CharacterRange::kRangeCanonicalizeMax) { | |
| 847 uc32 start = CanonRange(next_block); | |
| 848 CHECK_NE((start & CharacterRange::kStartMarker), 0); | |
| 849 unsigned dist = start & CharacterRange::kPayloadMask; | |
| 850 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 851 int first_length = un_canonicalize.get(next_block, '\0', first); | |
| 852 for (unsigned i = 1; i < dist; i++) { | |
| 853 CHECK_EQ(i, CanonRange(i)); | |
| 854 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 855 int succ_length = un_canonicalize.get(next_block + i, '\0', succ); | |
| 856 CHECK_EQ(first_length, succ_length); | |
| 857 for (int j = 0; j < succ_length; j++) { | |
| 858 int calc = first[j] + i; | |
| 859 int found = succ[j]; | |
| 860 CHECK_EQ(calc, found); | |
| 861 } | |
| 862 } | |
| 863 next_block = next_block + dist; | |
| 864 } | |
| 865 } | |
| 866 | |
| 867 | |
| 868 static void TestRangeCaseIndependence(CharacterRange input, | |
| 869 Vector<CharacterRange> expected) { | |
| 870 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 871 int count = expected.length(); | |
| 872 ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count); | |
| 873 input.AddCaseEquivalents(list); | |
| 874 CHECK_EQ(count, list->length()); | |
| 875 for (int i = 0; i < list->length(); i++) { | |
| 876 CHECK_EQ(expected[i].from(), list->at(i).from()); | |
| 877 CHECK_EQ(expected[i].to(), list->at(i).to()); | |
| 878 } | |
| 879 } | |
| 880 | |
| 881 | |
| 882 static void TestSimpleRangeCaseIndependence(CharacterRange input, | |
| 883 CharacterRange expected) { | |
| 884 EmbeddedVector<CharacterRange, 1> vector; | |
| 885 vector[0] = expected; | |
| 886 TestRangeCaseIndependence(input, vector); | |
| 887 } | |
| 888 | |
| 889 | |
| 890 TEST(CharacterRangeCaseIndependence) { | |
| 891 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'), | |
| 892 CharacterRange::Singleton('A')); | |
| 893 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'), | |
| 894 CharacterRange::Singleton('Z')); | |
| 895 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'), | |
| 896 CharacterRange('A', 'Z')); | |
| 897 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'), | |
| 898 CharacterRange('C', 'F')); | |
| 899 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'), | |
| 900 CharacterRange('A', 'B')); | |
| 901 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'), | |
| 902 CharacterRange('Y', 'Z')); | |
| 903 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1), | |
| 904 CharacterRange('A', 'Z')); | |
| 905 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'), | |
| 906 CharacterRange('a', 'z')); | |
| 907 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'), | |
| 908 CharacterRange('c', 'f')); | |
| 909 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), | |
| 910 CharacterRange('a', 'z')); | |
| 911 // Here we need to add [l-z] to complete the case independence of | |
| 912 // [A-Za-z] but we expect [a-z] to be added since we always add a | |
| 913 // whole block at a time. | |
| 914 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), | |
| 915 CharacterRange('a', 'z')); | |
| 916 } | |
| 917 | |
| 918 | |
| 919 TEST(Graph) { | |
| 920 V8::Initialize(NULL); | |
| 921 Execute("(x)?\\1y", "", true); | |
| 922 } | |
| OLD | NEW |