test/cctest/test-regexp.cc - Issue 12427: Merge regexp2000 back into bleeding_edge

Side by Side Diff: test/cctest/test-regexp.cc

Issue 12427: Merge regexp2000 back into bleeding_edge (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 12 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
	Erik Corry 2008/11/25 11:03:52 Correct year? Correct year? Christian Plesner Hansen 2008/11/26 06:49:56 Fixed Fixed
	2 // Redistribution and use in source and binary forms, with or without

	3 // modification, are permitted provided that the following conditions are

	4 // met:

	5 //

	6 // * Redistributions of source code must retain the above copyright

	7 // notice, this list of conditions and the following disclaimer.

	8 // * Redistributions in binary form must reproduce the above

	9 // copyright notice, this list of conditions and the following

	10 // disclaimer in the documentation and/or other materials provided

	11 // with the distribution.

	12 // * Neither the name of Google Inc. nor the names of its

	13 // contributors may be used to endorse or promote products derived

	14 // from this software without specific prior written permission.

	15 //

	16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	27

	28

	29 #include <stdlib.h>

	30 #include <set>
	Mads Ager (chromium) 2008/11/25 21:09:41 Please remove. We are not using <set>, right? Please remove. We are not using <set>, right? Christian Plesner Hansen 2008/11/26 06:49:56 Yes we are. Yes we are.
	31

	32 #include "v8.h"

	33

	34 #include "cctest.h"

	35 #include "zone-inl.h"

	36 #include "parser.h"

	37 #include "ast.h"

	38 #include "jsregexp-inl.h"

	39 #include "assembler-irregexp.h"

	40 #include "regexp-macro-assembler.h"

	41 #include "regexp-macro-assembler-irregexp.h"

	42 #include "regexp-macro-assembler-ia32.h"

	43 #include "interpreter-irregexp.h"

	44

	45

	46 using namespace v8::internal;

	47

	48

	49 static SmartPointer<const char> Parse(const char* input) {

	50 v8::HandleScope scope;

	51 ZoneScope zone_scope(DELETE_ON_EXIT);

	52 FlatStringReader reader(CStrVector(input));

	53 RegExpParseResult result;

	54 CHECK(v8::internal::ParseRegExp(&reader, &result));

	55 CHECK(result.tree != NULL);

	56 CHECK(result.error.is_null());

	57 SmartPointer<const char> output = result.tree->ToString();

	58 return output;

	59 }

	60

	61 static bool ParseEscapes(const char* input) {

	62 v8::HandleScope scope;

	63 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));

	64 ZoneScope zone_scope(DELETE_ON_EXIT);

	65 FlatStringReader reader(CStrVector(input));

	66 RegExpParseResult result;

	67 CHECK(v8::internal::ParseRegExp(&reader, &result));

	68 CHECK(result.tree != NULL);

	69 CHECK(result.error.is_null());

	70 return result.has_character_escapes;

	71 }

	72

	73

	74 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))

	75 #define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \
	Mads Ager (chromium) 2008/11/25 21:09:41 How about: #define CHECK_ESCAPES(input, has_escap How about: #define CHECK_ESCAPES(input, has_escapes) \ CHECK_EQ(has_escapes, ParseEscapes(input))
	76 ParseEscapes(input));

	77

	78 TEST(Parser) {

	79 V8::Initialize(NULL);

	80 CHECK_PARSE_EQ("abc", "'abc'");

	81 CHECK_PARSE_EQ("", "%");

	82 CHECK_PARSE_EQ("abc\|def", "(\| 'abc' 'def')");

	83 CHECK_PARSE_EQ("abc\|def\|ghi", "(\| 'abc' 'def' 'ghi')");

	84 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");

	85 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");

	86 CHECK_PARSE_EQ("\\w\|\\d", "(\| [0-9 A-Z _ a-z] [0-9])");

	87 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");

	88 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");

	89 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");

	90 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");

	91 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");

	92 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");

	93 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");

	94 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");

	95 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");

	96 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");

	97 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");

	98 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");

	99 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");

	100 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");

	101 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");

	102 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");

	103 CHECK_PARSE_EQ("(?:foo)", "'foo'");

	104 CHECK_PARSE_EQ("(?: foo )", "' foo '");

	105 CHECK_PARSE_EQ("(foo\|bar\|baz)", "(^ (\| 'foo' 'bar' 'baz'))");

	106 CHECK_PARSE_EQ("foo\|(bar\|baz)\|quux", "(\| 'foo' (^ (\| 'bar' 'baz')) 'quux')");

	107 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");

	108 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");

	109 CHECK_PARSE_EQ("()", "(^ %)");

	110 CHECK_PARSE_EQ("(?=)", "(-> + %)");

	111 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows

	112 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252

	113 CHECK_PARSE_EQ("[x]", "[x]");

	114 CHECK_PARSE_EQ("[xyz]", "[x y z]");

	115 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");

	116 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");

	117 CHECK_PARSE_EQ("[^123]", "^[1 2 3]");

	118 CHECK_PARSE_EQ("]", "']'");

	119 CHECK_PARSE_EQ("}", "'}'");

	120 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");

	121 CHECK_PARSE_EQ("[\\d]", "[0-9]");

	122 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");

	123 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");

	124 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");

	125 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",

	126 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");

	127 CHECK_PARSE_EQ("\\c!", "'c!'");

	128 CHECK_PARSE_EQ("\\c_", "'c_'");

	129 CHECK_PARSE_EQ("\\c~", "'c~'");

	130 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");

	131 CHECK_PARSE_EQ("\\[\\]\\{\\}\$\$\\%\\^\\#\\ ", "'[]{}()%^# '");

	132 CHECK_PARSE_EQ("[\\[\\]\\{\\}\$\$\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");

	133 CHECK_PARSE_EQ("\\0", "'\\x00'");

	134 CHECK_PARSE_EQ("\\8", "'8'");

	135 CHECK_PARSE_EQ("\\9", "'9'");

	136 CHECK_PARSE_EQ("\\11", "'\\x09'");

	137 CHECK_PARSE_EQ("\\11a", "'\\x09a'");

	138 CHECK_PARSE_EQ("\\011", "'\\x09'");

	139 CHECK_PARSE_EQ("\\00011", "'\\x0011'");

	140 CHECK_PARSE_EQ("\\118", "'\\x098'");

	141 CHECK_PARSE_EQ("\\111", "'I'");

	142 CHECK_PARSE_EQ("\\1111", "'I1'");

	143 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");

	144 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");

	145 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");

	146 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");

	147 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"

	148 " (# 0 - g (<- 1)))");

	149 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"

	150 " (# 0 - g (<- 2)))");

	151 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"

	152 " (# 0 - g (<- 3)))");

	153 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"

	154 " (# 0 - g '\\x04'))");

	155 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",

	156 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"

	157 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");

	158 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",

	159 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"

	160 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");

	161 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");

	162 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");

	163 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");

	164 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");

	165 CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");

	166 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");

	167 CHECK_PARSE_EQ("[\\0]", "[\\x00]");

	168 CHECK_PARSE_EQ("[\\11]", "[\\x09]");

	169 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");

	170 CHECK_PARSE_EQ("[\\011]", "[\\x09]");

	171 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");

	172 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");

	173 CHECK_PARSE_EQ("[\\111]", "[I]");

	174 CHECK_PARSE_EQ("[\\1111]", "[I 1]");

	175 CHECK_PARSE_EQ("\\x34", "'\x34'");

	176 CHECK_PARSE_EQ("\\x60", "'\x60'");

	177 CHECK_PARSE_EQ("\\x3z", "'x3z'");

	178 CHECK_PARSE_EQ("\\u0034", "'\x34'");

	179 CHECK_PARSE_EQ("\\u003z", "'u003z'");

	180 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");

	181

	182 CHECK_ESCAPES("a", false);

	183 CHECK_ESCAPES("a\|b", false);

	184 CHECK_ESCAPES("a\\n", true);

	185 CHECK_ESCAPES("^a", false);

	186 CHECK_ESCAPES("a$", false);

	187 CHECK_ESCAPES("a\\b!", false);

	188 CHECK_ESCAPES("a\\Bb", false);

	189 CHECK_ESCAPES("a*", false);

	190 CHECK_ESCAPES("a*?", false);

	191 CHECK_ESCAPES("a?", false);

	192 CHECK_ESCAPES("a??", false);

	193 CHECK_ESCAPES("a{0,1}?", false);

	194 CHECK_ESCAPES("a{1,1}?", false);

	195 CHECK_ESCAPES("a{1,2}?", false);

	196 CHECK_ESCAPES("a+?", false);

	197 CHECK_ESCAPES("(a)", false);

	198 CHECK_ESCAPES("(a)\\1", false);

	199 CHECK_ESCAPES("(\\1a)", false);

	200 CHECK_ESCAPES("\\1(a)", false);

	201 CHECK_ESCAPES("a\\s", false);

	202 CHECK_ESCAPES("a\\S", false);

	203 CHECK_ESCAPES("a\\d", false);

	204 CHECK_ESCAPES("a\\D", false);

	205 CHECK_ESCAPES("a\\w", false);

	206 CHECK_ESCAPES("a\\W", false);

	207 CHECK_ESCAPES("a.", false);

	208 CHECK_ESCAPES("a\\q", true);

	209 CHECK_ESCAPES("a[a]", false);

	210 CHECK_ESCAPES("a[^a]", false);

	211 CHECK_ESCAPES("a[a-z]", false);

	212 CHECK_ESCAPES("a[\\q]", false);

	213 CHECK_ESCAPES("a(?:b)", false);

	214 CHECK_ESCAPES("a(?=b)", false);

	215 CHECK_ESCAPES("a(?!b)", false);

	216 CHECK_ESCAPES("\\x60", true);

	217 CHECK_ESCAPES("\\u0060", true);

	218 CHECK_ESCAPES("\\cA", true);

	219 CHECK_ESCAPES("\\q", true);

	220 CHECK_ESCAPES("\\1112", true);

	221 CHECK_ESCAPES("\\0", true);

	222 CHECK_ESCAPES("(a)\\1", false);

	223

	224 CHECK_PARSE_EQ("a{}", "'a{}'");

	225 CHECK_PARSE_EQ("a{,}", "'a{,}'");

	226 CHECK_PARSE_EQ("a{", "'a{'");

	227 CHECK_PARSE_EQ("a{z}", "'a{z}'");

	228 CHECK_PARSE_EQ("a{1z}", "'a{1z}'");

	229 CHECK_PARSE_EQ("a{12z}", "'a{12z}'");

	230 CHECK_PARSE_EQ("a{12,", "'a{12,'");

	231 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");

	232 CHECK_PARSE_EQ("{}", "'{}'");

	233 CHECK_PARSE_EQ("{,}", "'{,}'");

	234 CHECK_PARSE_EQ("{", "'{'");

	235 CHECK_PARSE_EQ("{z}", "'{z}'");

	236 CHECK_PARSE_EQ("{1z}", "'{1z}'");

	237 CHECK_PARSE_EQ("{12z}", "'{12z}'");

	238 CHECK_PARSE_EQ("{12,", "'{12,'");

	239 CHECK_PARSE_EQ("{12,3b", "'{12,3b'");

	240 }

	241

	242 TEST(ParserRegression) {

	243 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");

	244 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");

	245 CHECK_PARSE_EQ("{", "'{'");

	246 CHECK_PARSE_EQ("a\|", "(\| 'a' %)");

	247 }

	248

	249 static void ExpectError(const char* input,

	250 const char* expected) {

	251 v8::HandleScope scope;

	252 ZoneScope zone_scope(DELETE_ON_EXIT);

	253 FlatStringReader reader(CStrVector(input));

	254 RegExpParseResult result;

	255 CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));

	256 CHECK(result.tree == NULL);

	257 CHECK(!result.error.is_null());

	258 SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);

	259 CHECK_EQ(expected, *str);

	260 }

	261

	262

	263 TEST(Errors) {

	264 V8::Initialize(NULL);

	265 const char* kEndBackslash = "\\ at end of pattern";

	266 ExpectError("\\", kEndBackslash);

	267 const char* kUnterminatedGroup = "Unterminated group";

	268 ExpectError("(foo", kUnterminatedGroup);

	269 const char* kInvalidGroup = "Invalid group";

	270 ExpectError("(?", kInvalidGroup);

	271 const char* kUnterminatedCharacterClass = "Unterminated character class";

	272 ExpectError("[", kUnterminatedCharacterClass);

	273 ExpectError("[a-", kUnterminatedCharacterClass);

	274 const char* kIllegalCharacterClass = "Illegal character class";

	275 ExpectError("[a-\\w]", kIllegalCharacterClass);

	276 const char* kEndControl = "\\c at end of pattern";

	277 ExpectError("\\c", kEndControl);

	278 const char* kNothingToRepeat = "Nothing to repeat";

	279 ExpectError("*", kNothingToRepeat);

	280 ExpectError("?", kNothingToRepeat);

	281 ExpectError("+", kNothingToRepeat);

	282 ExpectError("{1}", kNothingToRepeat);

	283 ExpectError("{1,2}", kNothingToRepeat);

	284 ExpectError("{1,}", kNothingToRepeat);

	285 }

	286

	287

	288 static bool IsDigit(uc16 c) {

	289 return ('0' <= c && c <= '9');

	290 }

	291

	292

	293 static bool NotDigit(uc16 c) {

	294 return !IsDigit(c);

	295 }

	296

	297

	298 static bool IsWhiteSpace(uc16 c) {

	299 switch (c) {

	300 case 0x09:

	301 case 0x0A:

	302 case 0x0B:

	303 case 0x0C:

	304 case 0x0d:

	305 case 0x20:

	306 case 0xA0:

	307 case 0x2028:

	308 case 0x2029:

	309 return true;

	310 default:

	311 return unibrow::Space::Is(c);

	312 }

	313 }

	314

	315

	316 static bool NotWhiteSpace(uc16 c) {

	317 return !IsWhiteSpace(c);

	318 }

	319

	320

	321 static bool IsWord(uc16 c) {

	322 return ('a' <= c && c <= 'z')

	323 \|\| ('A' <= c && c <= 'Z')

	324 \|\| ('0' <= c && c <= '9')

	325 \|\| (c == '_');

	326 }

	327

	328

	329 static bool NotWord(uc16 c) {

	330 return !IsWord(c);

	331 }

	332

	333

	334 static bool Dot(uc16 c) {

	335 switch (c) {

	336 // CR LF LS PS

	337 case 0x000A: case 0x000D: case 0x2028: case 0x2029:

	338 return false;

	339 default:

	340 return true;

	341 }

	342 }

	343

	344

	345 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {

	346 ZoneScope scope(DELETE_ON_EXIT);

	347 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	348 CharacterRange::AddClassEscape(c, ranges);

	349 for (unsigned i = 0; i < (1 << 16); i++) {

	350 bool in_class = false;

	351 for (int j = 0; !in_class && j < ranges->length(); j++) {

	352 CharacterRange& range = ranges->at(j);

	353 in_class = (range.from() <= i && i <= range.to());

	354 }

	355 CHECK_EQ(pred(i), in_class);

	356 }

	357 }

	358

	359

	360 TEST(CharacterClassEscapes) {

	361 TestCharacterClassEscapes('.', Dot);

	362 TestCharacterClassEscapes('d', IsDigit);

	363 TestCharacterClassEscapes('D', NotDigit);

	364 TestCharacterClassEscapes('s', IsWhiteSpace);

	365 TestCharacterClassEscapes('S', NotWhiteSpace);

	366 TestCharacterClassEscapes('w', IsWord);

	367 TestCharacterClassEscapes('W', NotWord);

	368 }

	369

	370

	371 static RegExpNode* Compile(const char* input) {

	372 FlatStringReader reader(CStrVector(input));

	373 RegExpParseResult result;

	374 if (!v8::internal::ParseRegExp(&reader, &result))

	375 return NULL;

	376 RegExpNode* node = NULL;

	377 RegExpEngine::Compile(&result, &node, false);

	378 return node;

	379 }

	380

	381

	382 static void Execute(const char* input,

	383 const char* str,

	384 bool dot_output = false) {

	385 v8::HandleScope scope;

	386 ZoneScope zone_scope(DELETE_ON_EXIT);

	387 RegExpNode* node = Compile(input);

	388 USE(node);

	389 #ifdef DEBUG

	390 if (dot_output) {

	391 RegExpEngine::DotPrint(input, node);

	392 exit(0);

	393 }

	394 #endif // DEBUG

	395 }

	396

	397

	398 TEST(Execution) {

	399 V8::Initialize(NULL);

	400 Execute(".*?(?:a[bc]d\|e[fg]h)", "xxxabbegh");

	401 Execute(".*?(?:a[bc]d\|e[fg]h)", "xxxabbefh");

	402 Execute(".*?(?:a[bc]d\|e[fg]h)", "xxxabbefd");

	403 }

	404

	405

	406 class TestConfig {

	407 public:

	408 typedef int Key;

	409 typedef int Value;

	410 static const int kNoKey;

	411 static const int kNoValue;

	412 static inline int Compare(int a, int b) {

	413 if (a < b)

	414 return -1;

	415 else if (a > b)

	416 return 1;

	417 else

	418 return 0;

	419 }

	420 };

	421

	422

	423 const int TestConfig::kNoKey = 0;

	424 const int TestConfig::kNoValue = 0;

	425

	426

	427 static int PseudoRandom(int i, int j) {

	428 return ~(~((i * 781) ^ (j * 329)));

	429 }

	430

	431

	432 TEST(SplayTreeSimple) {

	433 static const int kLimit = 1000;

	434 ZoneScope zone_scope(DELETE_ON_EXIT);

	435 ZoneSplayTree<TestConfig> tree;

	436 std::set<int> seen;

	437 #define CHECK_MAPS_EQUAL() do { \

	438 for (int k = 0; k < kLimit; k++) \

	439 CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \

	440 } while (false)

	441 for (int i = 0; i < 50; i++) {

	442 for (int j = 0; j < 50; j++) {

	443 int next = PseudoRandom(i, j) % kLimit;

	444 if (seen.find(next) != seen.end()) {

	445 // We've already seen this one. Check the value and remove

	446 // it.

	447 ZoneSplayTree<TestConfig>::Locator loc;

	448 CHECK(tree.Find(next, &loc));

	449 CHECK_EQ(next, loc.key());

	450 CHECK_EQ(3 * next, loc.value());

	451 tree.Remove(next);

	452 seen.erase(next);

	453 CHECK_MAPS_EQUAL();

	454 } else {

	455 // Check that it wasn't there already and then add it.

	456 ZoneSplayTree<TestConfig>::Locator loc;

	457 CHECK(!tree.Find(next, &loc));

	458 CHECK(tree.Insert(next, &loc));

	459 CHECK_EQ(next, loc.key());

	460 loc.set_value(3 * next);

	461 seen.insert(next);

	462 CHECK_MAPS_EQUAL();

	463 }

	464 int val = PseudoRandom(j, i) % kLimit;

	465 for (int k = val; k >= 0; k--) {

	466 if (seen.find(val) != seen.end()) {

	467 ZoneSplayTree<TestConfig>::Locator loc;

	468 CHECK(tree.FindGreatestLessThan(val, &loc));

	469 CHECK_EQ(loc.key(), val);

	470 break;

	471 }

	472 }

	473 val = PseudoRandom(i + j, i - j) % kLimit;

	474 for (int k = val; k < kLimit; k++) {

	475 if (seen.find(val) != seen.end()) {

	476 ZoneSplayTree<TestConfig>::Locator loc;

	477 CHECK(tree.FindLeastGreaterThan(val, &loc));

	478 CHECK_EQ(loc.key(), val);

	479 break;

	480 }

	481 }

	482 }

	483 }

	484 }

	485

	486

	487 TEST(DispatchTableConstruction) {

	488 // Initialize test data.

	489 static const int kLimit = 1000;

	490 static const int kRangeCount = 8;

	491 static const int kRangeSize = 16;

	492 uc16 ranges[kRangeCount][2 * kRangeSize];

	493 for (int i = 0; i < kRangeCount; i++) {

	494 Vector<uc16> range(ranges[i], 2 * kRangeSize);

	495 for (int j = 0; j < 2 * kRangeSize; j++) {

	496 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;

	497 }

	498 range.Sort();

	499 for (int j = 1; j < 2 * kRangeSize; j++) {

	500 CHECK(range[j-1] <= range[j]);

	501 }

	502 }

	503 // Enter test data into dispatch table.

	504 ZoneScope zone_scope(DELETE_ON_EXIT);

	505 DispatchTable table;

	506 for (int i = 0; i < kRangeCount; i++) {

	507 uc16* range = ranges[i];

	508 for (int j = 0; j < 2 * kRangeSize; j += 2)

	509 table.AddRange(CharacterRange(range[j], range[j + 1]), i);

	510 }

	511 // Check that the table looks as we would expect

	512 for (int p = 0; p < kLimit; p++) {

	513 OutSet* outs = table.Get(p);

	514 for (int j = 0; j < kRangeCount; j++) {

	515 uc16* range = ranges[j];

	516 bool is_on = false;

	517 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)

	518 is_on = (range[k] <= p && p <= range[k + 1]);

	519 CHECK_EQ(is_on, outs->Get(j));

	520 }

	521 }

	522 }

	523

	524

	525 TEST(Assembler) {

	526 V8::Initialize(NULL);

	527 byte codes[1024];

	528 IrregexpAssembler assembler(Vector<byte>(codes, 1024));

	529 #define __ assembler.

	530 Label advance;

	531 Label look_for_foo;

	532 Label fail;

	533 __ GoTo(&look_for_foo);

	534 __ Bind(&advance);

	535 __ AdvanceCP(1);

	536 __ Bind(&look_for_foo);

	537 __ LoadCurrentChar(0, &fail);

	538 __ CheckNotCharacter('f', &advance);

	539 __ LoadCurrentChar(1, &fail);

	540 __ CheckNotCharacter('o', &advance);

	541 __ LoadCurrentChar(2, &fail);

	542 __ CheckNotCharacter('o', &advance);

	543 __ WriteCurrentPositionToRegister(0);

	544 __ WriteCurrentPositionToRegister(1, 2);

	545 __ Succeed();

	546 __ Bind(&fail);

	547 __ Fail();

	548

	549 v8::HandleScope scope;

	550 Handle<ByteArray> array = Factory::NewByteArray(assembler.length());

	551 assembler.Copy(array->GetDataStartAddress());

	552 int captures[2];

	553

	554 Handle<String> f1 =

	555 Factory::NewStringFromAscii(CStrVector("Now is the time"));

	556 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);

	557 CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));

	558

	559 Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));

	560 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);

	561 CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));

	562 CHECK_EQ(0, captures[0]);

	563 CHECK_EQ(2, captures[1]);

	564

	565 Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));

	566 Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);

	567 CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));

	568 CHECK_EQ(3, captures[0]);

	569 CHECK_EQ(5, captures[1]);

	570 }

	571

	572

	573 TEST(Assembler2) {

	574 V8::Initialize(NULL);

	575 byte codes[1024];

	576 IrregexpAssembler assembler(Vector<byte>(codes, 1024));

	577 #define __ assembler.

	578 // /^.*foo/

	579 Label more_dots;

	580 Label unwind_dot;

	581 Label failure;

	582 Label foo;

	583 Label foo_failed;

	584 Label dot_match;

	585 // ^

	586 __ PushCurrentPosition();

	587 __ PushRegister(0);

	588 __ WriteCurrentPositionToRegister(0);

	589 __ PushBacktrack(&failure);

	590 __ GoTo(&dot_match);

	591 // .*

	592 __ Bind(&more_dots);

	593 __ AdvanceCP(1);

	594 __ Bind(&dot_match);

	595 __ PushCurrentPosition();

	596 __ PushBacktrack(&unwind_dot);

	597 __ LoadCurrentChar(0, &foo);

	598 __ CheckNotCharacter('\n', &more_dots);

	599 // foo

	600 __ Bind(&foo);

	601 __ CheckNotCharacter('f', &foo_failed);

	602 __ LoadCurrentChar(1, &foo_failed);

	603 __ CheckNotCharacter('o', &foo_failed);

	604 __ LoadCurrentChar(2, &foo_failed);

	605 __ CheckNotCharacter('o', &foo_failed);

	606 __ WriteCurrentPositionToRegister(1, 2);

	607 __ Succeed();

	608 __ Break();

	609

	610 __ Bind(&foo_failed);

	611 __ PopBacktrack();

	612 __ Break();

	613

	614 __ Bind(&unwind_dot);

	615 __ PopCurrentPosition();

	616 __ LoadCurrentChar(0, &foo_failed);

	617 __ GoTo(&foo);

	618

	619 __ Bind(&failure);

	620 __ PopRegister(0);

	621 __ PopCurrentPosition();

	622 __ Fail();

	623

	624 v8::HandleScope scope;

	625 Handle<ByteArray> array = Factory::NewByteArray(assembler.length());

	626 assembler.Copy(array->GetDataStartAddress());

	627 int captures[2];

	628

	629 Handle<String> f1 =

	630 Factory::NewStringFromAscii(CStrVector("Now is the time"));

	631 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);

	632 CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));

	633

	634 Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));

	635 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);

	636 CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));

	637 CHECK_EQ(0, captures[0]);

	638 CHECK_EQ(2, captures[1]);

	639

	640 Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));

	641 Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);

	642 CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));

	643 CHECK_EQ(0, captures[0]);

	644 CHECK_EQ(5, captures[1]);

	645

	646 Handle<String> f4 =

	647 Factory::NewStringFromAscii(CStrVector("football buffoonery"));

	648 Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);

	649 CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0));

	650 CHECK_EQ(0, captures[0]);

	651 CHECK_EQ(14, captures[1]);

	652

	653 Handle<String> f5 =

	654 Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));

	655 Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);

	656 CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0));

	657 }

	658

	659

	660 TEST(MacroAssembler) {

	661 V8::Initialize(NULL);

	662 byte codes[1024];

	663 IrregexpAssembler assembler(Vector<byte>(codes, 1024));

	664 RegExpMacroAssemblerIrregexp m(&assembler);

	665 // ^f(o)o.

	666 Label fail, fail2, start;

	667 uc16 foo_chars[3];

	668 foo_chars[0] = 'f';

	669 foo_chars[1] = 'o';

	670 foo_chars[2] = 'o';

	671 Vector<const uc16> foo(foo_chars, 3);

	672 m.SetRegister(4, 42);

	673 m.PushRegister(4);

	674 m.AdvanceRegister(4, 42);

	675 m.GoTo(&start);

	676 m.Fail();

	677 m.Bind(&start);

	678 m.PushBacktrack(&fail2);

	679 m.CheckCharacters(foo, 0, &fail);

	680 m.WriteCurrentPositionToRegister(0);

	681 m.PushCurrentPosition();

	682 m.AdvanceCurrentPosition(3);

	683 m.WriteCurrentPositionToRegister(1);

	684 m.PopCurrentPosition();

	685 m.AdvanceCurrentPosition(1);

	686 m.WriteCurrentPositionToRegister(2);

	687 m.AdvanceCurrentPosition(1);

	688 m.WriteCurrentPositionToRegister(3);

	689 m.Succeed();

	690

	691 m.Bind(&fail);

	692 m.Backtrack();

	693 m.Succeed();

	694

	695 m.Bind(&fail2);

	696 m.PopRegister(0);

	697 m.Fail();

	698

	699 v8::HandleScope scope;

	700

	701 Handle<ByteArray> array = Factory::NewByteArray(assembler.length());

	702 assembler.Copy(array->GetDataStartAddress());

	703 int captures[5];

	704

	705 Handle<String> f1 =

	706 Factory::NewStringFromAscii(CStrVector("foobar"));

	707 Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);

	708 CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));

	709 CHECK_EQ(0, captures[0]);

	710 CHECK_EQ(3, captures[1]);

	711 CHECK_EQ(1, captures[2]);

	712 CHECK_EQ(2, captures[3]);

	713 CHECK_EQ(84, captures[4]);

	714

	715 Handle<String> f2 =

	716 Factory::NewStringFromAscii(CStrVector("barfoo"));

	717 Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);

	718 CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));

	719 CHECK_EQ(42, captures[0]);

	720 }

	721

	722

	723 TEST(AddInverseToTable) {

	724 static const int kLimit = 1000;

	725 static const int kRangeCount = 16;

	726 for (int t = 0; t < 10; t++) {

	727 ZoneScope zone_scope(DELETE_ON_EXIT);

	728 ZoneList<CharacterRange>* ranges =

	729 new ZoneList<CharacterRange>(kRangeCount);

	730 for (int i = 0; i < kRangeCount; i++) {

	731 int from = PseudoRandom(t + 87, i + 25) % kLimit;

	732 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));

	733 if (to > kLimit) to = kLimit;

	734 ranges->Add(CharacterRange(from, to));

	735 }

	736 DispatchTable table;

	737 DispatchTableConstructor cons(&table);

	738 cons.set_choice_index(0);

	739 cons.AddInverse(ranges);

	740 for (int i = 0; i < kLimit; i++) {

	741 bool is_on = false;

	742 for (int j = 0; !is_on && j < kRangeCount; j++)

	743 is_on = ranges->at(j).Contains(i);

	744 OutSet* set = table.Get(i);

	745 CHECK_EQ(is_on, set->Get(0) == false);

	746 }

	747 }

	748 ZoneScope zone_scope(DELETE_ON_EXIT);

	749 ZoneList<CharacterRange>* ranges =

	750 new ZoneList<CharacterRange>(1);

	751 ranges->Add(CharacterRange(0xFFF0, 0xFFFE));

	752 DispatchTable table;

	753 DispatchTableConstructor cons(&table);

	754 cons.set_choice_index(0);

	755 cons.AddInverse(ranges);

	756 CHECK(!table.Get(0xFFFE)->Get(0));

	757 CHECK(table.Get(0xFFFF)->Get(0));

	758 }

	759

	760

	761 static uc32 canonicalize(uc32 c) {

	762 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];

	763 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);

	764 if (count == 0) {

	765 return c;

	766 } else {

	767 CHECK_EQ(1, count);

	768 return canon[0];

	769 }

	770 }

	771

	772

	773 TEST(LatinCanonicalize) {

	774 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;

	775 for (char lower = 'a'; lower <= 'z'; lower++) {

	776 char upper = lower + ('A' - 'a');

	777 CHECK_EQ(canonicalize(lower), canonicalize(upper));

	778 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	779 int length = un_canonicalize.get(lower, '\0', uncanon);

	780 CHECK_EQ(2, length);

	781 CHECK_EQ(upper, uncanon[0]);

	782 CHECK_EQ(lower, uncanon[1]);

	783 }

	784 for (uc32 c = 128; c < (1 << 21); c++)

	785 CHECK_GE(canonicalize(c), 128);

	786 unibrow::Mapping<unibrow::ToUppercase> to_upper;

	787 for (uc32 c = 0; c < (1 << 21); c++) {

	788 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];

	789 int length = to_upper.get(c, '\0', upper);

	790 if (length == 0) {

	791 length = 1;

	792 upper[0] = c;

	793 }

	794 uc32 u = upper[0];

	795 if (length > 1 \|\| (c >= 128 && u < 128))

	796 u = c;

	797 CHECK_EQ(u, canonicalize(c));

	798 }

	799 }

	800

	801

	802 TEST(SimplePropagation) {

	803 v8::HandleScope scope;

	804 ZoneScope zone_scope(DELETE_ON_EXIT);

	805 RegExpNode* node = Compile("(a\|^b\|c)");

	806 CHECK(node->info()->determine_start);

	807 }

	808

	809

	810 static uc32 CanonRange(uc32 c) {

	811 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];

	812 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);

	813 if (count == 0) {

	814 return c;

	815 } else {

	816 CHECK_EQ(1, count);

	817 return canon[0];

	818 }

	819 }

	820

	821

	822 TEST(RangeCanonicalization) {

	823 ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);

	824 // Check that we arrive at the same result when using the basic

	825 // range canonicalization primitives as when using immediate

	826 // canonicalization.

	827 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;

	828 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {

	829 int range = CanonRange(i);

	830 int indirect_length = 0;

	831 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	832 if ((range & CharacterRange::kStartMarker) == 0) {

	833 indirect_length = un_canonicalize.get(i - range, '\0', indirect);

	834 for (int i = 0; i < indirect_length; i++)

	835 indirect[i] += range;

	836 } else {

	837 indirect_length = un_canonicalize.get(i, '\0', indirect);

	838 }

	839 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	840 int direct_length = un_canonicalize.get(i, '\0', direct);

	841 CHECK_EQ(direct_length, indirect_length);

	842 }

	843 // Check that we arrive at the same results when skipping over

	844 // canonicalization ranges.

	845 int next_block = 0;

	846 while (next_block < CharacterRange::kRangeCanonicalizeMax) {

	847 uc32 start = CanonRange(next_block);

	848 CHECK_NE((start & CharacterRange::kStartMarker), 0);

	849 unsigned dist = start & CharacterRange::kPayloadMask;

	850 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	851 int first_length = un_canonicalize.get(next_block, '\0', first);

	852 for (unsigned i = 1; i < dist; i++) {

	853 CHECK_EQ(i, CanonRange(i));

	854 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	855 int succ_length = un_canonicalize.get(next_block + i, '\0', succ);

	856 CHECK_EQ(first_length, succ_length);

	857 for (int j = 0; j < succ_length; j++) {

	858 int calc = first[j] + i;

	859 int found = succ[j];

	860 CHECK_EQ(calc, found);

	861 }

	862 }

	863 next_block = next_block + dist;

	864 }

	865 }

	866

	867

	868 static void TestRangeCaseIndependence(CharacterRange input,

	869 Vector<CharacterRange> expected) {

	870 ZoneScope zone_scope(DELETE_ON_EXIT);

	871 int count = expected.length();

	872 ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);

	873 input.AddCaseEquivalents(list);

	874 CHECK_EQ(count, list->length());

	875 for (int i = 0; i < list->length(); i++) {

	876 CHECK_EQ(expected[i].from(), list->at(i).from());

	877 CHECK_EQ(expected[i].to(), list->at(i).to());

	878 }

	879 }

	880

	881

	882 static void TestSimpleRangeCaseIndependence(CharacterRange input,

	883 CharacterRange expected) {

	884 EmbeddedVector<CharacterRange, 1> vector;

	885 vector[0] = expected;

	886 TestRangeCaseIndependence(input, vector);

	887 }

	888

	889

	890 TEST(CharacterRangeCaseIndependence) {

	891 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),

	892 CharacterRange::Singleton('A'));

	893 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),

	894 CharacterRange::Singleton('Z'));

	895 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),

	896 CharacterRange('A', 'Z'));

	897 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),

	898 CharacterRange('C', 'F'));

	899 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),

	900 CharacterRange('A', 'B'));

	901 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),

	902 CharacterRange('Y', 'Z'));

	903 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),

	904 CharacterRange('A', 'Z'));

	905 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),

	906 CharacterRange('a', 'z'));

	907 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),

	908 CharacterRange('c', 'f'));

	909 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),

	910 CharacterRange('a', 'z'));

	911 // Here we need to add [l-z] to complete the case independence of

	912 // [A-Za-z] but we expect [a-z] to be added since we always add a

	913 // whole block at a time.

	914 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),

	915 CharacterRange('a', 'z'));

	916 }

	917

	918

	919 TEST(Graph) {

	920 V8::Initialize(NULL);

	921 Execute("(x)?\\1y", "", true);

	922 }

OLD	NEW

« src/utils.h ('K') | « test/cctest/SConscript ('k') | test/mjsunit/non-ascii-replace.js » ('j') | no next file with comments »