runtime/vm/regexp.cc - Issue 539153002: Port and integrate the irregexp engine from V8

Side by Side Diff: runtime/vm/regexp.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 #include "vm/regexp.h"

	6

	7 #include "vm/dart_entry.h"

	8 #include "vm/regexp_assembler.h"

	9 #include "vm/regexp_ast.h"

	10 #include "vm/unibrow-inl.h"

	11 #include "vm/unicode.h"

	12 #include "vm/symbols.h"

	13

	14 #define I isolate()

	15 #define CI compiler->isolate()

	16

	17 namespace dart {

	18

	19 DECLARE_FLAG(bool, trace_irregexp);

	20

	21 #define DEFINE_ACCEPT(Type) \

	22 void Type##Node::Accept(NodeVisitor* visitor) { \

	23 visitor->Visit##Type(this); \

	24 }

	25 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)

	26 #undef DEFINE_ACCEPT

	27

	28

	29 // Default to generating optimized regexp code.

	30 static const bool kRegexpOptimization = true;

	31

	32 // More makes code generation slower, less makes V8 benchmark score lower.

	33 static const intptr_t kMaxLookaheadForBoyerMoore = 8;

	34

	35 // In a 3-character pattern you can maximally step forwards 3 characters

	36 // at a time, which is not always enough to pay for the extra logic.

	37 static const intptr_t kPatternTooShortForBoyerMoore = 2;

	38

	39 // The '2' variant has inclusive from and exclusive to.

	40 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,

	41 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.

	42 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,

	43 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,

	44 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,

	45 0xFEFF, 0xFF00, 0x10000 };

	46 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

	47 static const intptr_t kWordRanges[] = {

	48 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };

	49 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);

	50 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 };

	51 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

	52 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };

	53 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);

	54 static const intptr_t kLineTerminatorRanges[] = {

	55 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 };

	56 static const intptr_t kLineTerminatorRangeCount =

	57 ARRAY_SIZE(kLineTerminatorRanges);

	58

	59

	60 static inline void PrintUtf16(uint16_t c) {

	61 const char* format = (0x20 <= c && c <= 0x7F) ?

	62 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";

	63 OS::Print(format, c);

	64 }

	65

	66

	67 // We need to check for the following characters: 0x39c 0x3bc 0x178.

	68 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {

	69 // TODO(dcarney): this could be a lot more efficient.

	70 return range.Contains(0x39c) \|\|

	71 range.Contains(0x3bc) \|\| range.Contains(0x178);

	72 }

	73

	74

	75 static bool RangesContainLatin1Equivalents(

	76 ZoneGrowableArray<CharacterRange>* ranges) {

	77 for (intptr_t i = 0; i < ranges->length(); i++) {

	78 // TODO(dcarney): this could be a lot more efficient.

	79 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true;

	80 }

	81 return false;

	82 }

	83

	84 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) {

	85 ASSERT(c > Symbols::kMaxOneCharCodeSymbol);

	86 switch (c) {

	87 // This are equivalent characters in unicode.

	88 case 0x39c:

	89 case 0x3bc:

	90 return 0xb5;

	91 // This is an uppercase of a Latin-1 character

	92 // outside of Latin-1.

	93 case 0x178:

	94 return 0xff;

	95 }

	96 return 0;

	97 }

	98

	99

	100 class VisitMarker : public ValueObject {

	101 public:

	102 explicit VisitMarker(NodeInfo* info) : info_(info) {

	103 ASSERT(!info->visited);

	104 info->visited = true;

	105 }

	106 ~VisitMarker() {

	107 info_->visited = false;

	108 }

	109 private:

	110 NodeInfo* info_;

	111 };

	112

	113

	114 class FrequencyCollator : public ValueObject {

	115 public:

	116 FrequencyCollator() : total_samples_(0) {

	117 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {

	118 frequencies_[i] = CharacterFrequency(i);

	119 }

	120 }

	121

	122 void CountCharacter(intptr_t character) {

	123 intptr_t index = (character & RegExpMacroAssembler::kTableMask);

	124 frequencies_[index].Increment();

	125 total_samples_++;

	126 }

	127

	128 // Does not measure in percent, but rather per-128 (the table size from the

	129 // regexp macro assembler).

	130 intptr_t Frequency(intptr_t in_character) {

	131 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);

	132 if (total_samples_ < 1) return 1; // Division by zero.

	133 intptr_t freq_in_per128 =

	134 (frequencies_[in_character].counter() * 128) / total_samples_;

	135 return freq_in_per128;

	136 }

	137

	138 private:

	139 class CharacterFrequency {

	140 public:

	141 CharacterFrequency() : counter_(0), character_(-1) { }

	142 explicit CharacterFrequency(intptr_t character)

	143 : counter_(0), character_(character) { }

	144

	145 void Increment() { counter_++; }

	146 intptr_t counter() { return counter_; }

	147 intptr_t character() { return character_; }

	148

	149 private:

	150 intptr_t counter_;

	151 intptr_t character_;

	152

	153 DISALLOW_ALLOCATION();

	154 };

	155

	156

	157 private:

	158 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];

	159 intptr_t total_samples_;

	160 };

	161

	162

	163 class RegExpCompiler : public ValueObject {

	164 public:

	165 RegExpCompiler(intptr_t capture_count,

	166 bool ignore_case,

	167 intptr_t specialization_cid);

	168

	169 intptr_t AllocateRegister() {

	170 return next_register_++;

	171 }

	172

	173 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,

	174 RegExpNode* start,

	175 intptr_t capture_count,

	176 const String& pattern);

	177

	178 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

	179

	180 static const intptr_t kImplementationOffset = 0;

	181 static const intptr_t kNumberOfRegistersOffset = 0;

	182 static const intptr_t kCodeOffset = 1;

	183

	184 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

	185 EndNode* accept() { return accept_; }

	186

	187 static const intptr_t kMaxRecursion = 100;

	188 inline intptr_t recursion_depth() { return recursion_depth_; }

	189 inline void IncrementRecursionDepth() { recursion_depth_++; }

	190 inline void DecrementRecursionDepth() { recursion_depth_--; }

	191

	192 void SetRegExpTooBig() { reg_exp_too_big_ = true; }

	193

	194 inline bool ignore_case() { return ignore_case_; }

	195 inline bool ascii() const {

	196 return (specialization_cid_ == kOneByteStringCid \|\|

	197 specialization_cid_ == kExternalOneByteStringCid);

	198 }

	199 inline intptr_t specialization_cid() { return specialization_cid_; }

	200 FrequencyCollator* frequency_collator() { return &frequency_collator_; }

	201

	202 intptr_t current_expansion_factor() { return current_expansion_factor_; }

	203 void set_current_expansion_factor(intptr_t value) {

	204 current_expansion_factor_ = value;

	205 }

	206

	207 Isolate* isolate() const { return isolate_; }

	208

	209 static const intptr_t kNoRegister = -1;

	210

	211 private:

	212 EndNode* accept_;

	213 intptr_t next_register_;

	214 ZoneGrowableArray<RegExpNode> work_list_;

	215 intptr_t recursion_depth_;

	216 IRRegExpMacroAssembler* macro_assembler_;

	217 bool ignore_case_;

	218 intptr_t specialization_cid_;

	219 bool reg_exp_too_big_;

	220 intptr_t current_expansion_factor_;

	221 FrequencyCollator frequency_collator_;

	222 Isolate* isolate_;

	223 };

	224

	225

	226 class RecursionCheck : public ValueObject {

	227 public:

	228 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

	229 compiler->IncrementRecursionDepth();

	230 }

	231 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

	232 private:

	233 RegExpCompiler* compiler_;

	234 };

	235

	236

	237 // Scoped object to keep track of how much we unroll quantifier loops in the

	238 // regexp graph generator.

	239 class RegExpExpansionLimiter : public ValueObject {

	240 public:

	241 static const intptr_t kMaxExpansionFactor = 6;

	242 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)

	243 : compiler_(compiler),

	244 saved_expansion_factor_(compiler->current_expansion_factor()),

	245 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {

	246 ASSERT(factor > 0);

	247 if (ok_to_expand_) {

	248 if (factor > kMaxExpansionFactor) {

	249 // Avoid integer overflow of the current expansion factor.

	250 ok_to_expand_ = false;

	251 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);

	252 } else {

	253 intptr_t new_factor = saved_expansion_factor_ * factor;

	254 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);

	255 compiler->set_current_expansion_factor(new_factor);

	256 }

	257 }

	258 }

	259

	260 ~RegExpExpansionLimiter() {

	261 compiler_->set_current_expansion_factor(saved_expansion_factor_);

	262 }

	263

	264 bool ok_to_expand() { return ok_to_expand_; }

	265

	266 private:

	267 RegExpCompiler* compiler_;

	268 intptr_t saved_expansion_factor_;

	269 bool ok_to_expand_;

	270

	271 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);

	272 };

	273

	274

	275 // Node generation -------------------------------------------------------------

	276

	277 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

	278 RegExpNode* on_success) {

	279 ZoneGrowableArray<TextElement>* elms =

	280 new(CI) ZoneGrowableArray<TextElement>(1);

	281 elms->Add(TextElement::Atom(this));

	282 return new(CI) TextNode(elms, on_success);

	283 }

	284

	285

	286 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

	287 RegExpNode* on_success) {

	288 ZoneGrowableArray<TextElement>* elms =

	289 new(CI) ZoneGrowableArray<TextElement>(1);

	290 for (intptr_t i = 0; i < elements()->length(); i++) {

	291 elms->Add(elements()->At(i));

	292 }

	293 return new(CI) TextNode(elms, on_success);

	294 }

	295

	296

	297 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

	298 RegExpNode* on_success) {

	299 return new(CI) TextNode(this, on_success);

	300 }

	301

	302

	303 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,

	304 RegExpNode* on_success) {

	305 ZoneGrowableArray<RegExpTree> alternatives = this->alternatives();

	306 intptr_t length = alternatives->length();

	307 ChoiceNode* result =

	308 new(CI) ChoiceNode(length, CI);

	309 for (intptr_t i = 0; i < length; i++) {

	310 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler,

	311 on_success));

	312 result->AddAlternative(alternative);

	313 }

	314 return result;

	315 }

	316

	317

	318 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,

	319 RegExpNode* on_success) {

	320 return ToNode(min(),

	321 max(),

	322 is_greedy(),

	323 body(),

	324 compiler,

	325 on_success);

	326 }

	327

	328

	329 RegExpNode* RegExpQuantifier::ToNode(intptr_t min,

	330 intptr_t max,

	331 bool is_greedy,

	332 RegExpTree* body,

	333 RegExpCompiler* compiler,

	334 RegExpNode* on_success,

	335 bool not_at_start) {

	336 // x{f, t} becomes this:

	337 //

	338 // (r++)<-.

	339 // \| `

	340 // \| (x)

	341 // v ^

	342 // (r=0)-->(?)---/ [if r < t]

	343 // \|

	344 // [if r >= f] \----> ...

	345 //

	346

	347 // 15.10.2.5 RepeatMatcher algorithm.

	348 // The parser has already eliminated the case where max is 0. In the case

	349 // where max_match is zero the parser has removed the quantifier if min was

	350 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.

	351

	352 // If we know that we cannot match zero length then things are a little

	353 // simpler since we don't need to make the special zero length match check

	354 // from step 2.1. If the min and max are small we can unroll a little in

	355 // this case.

	356 // Unroll (foo)+ and (foo){3,}

	357 static const intptr_t kMaxUnrolledMinMatches = 3;

	358 // Unroll (foo)? and (foo){x,3}

	359 static const intptr_t kMaxUnrolledMaxMatches = 3;

	360 if (max == 0) return on_success; // This can happen due to recursion.

	361 bool body_can_be_empty = (body->min_match() == 0);

	362 intptr_t body_start_reg = RegExpCompiler::kNoRegister;

	363 Interval capture_registers = body->CaptureRegisters();

	364 bool needs_capture_clearing = !capture_registers.is_empty();

	365 Isolate* isolate = compiler->isolate();

	366

	367 if (body_can_be_empty) {

	368 body_start_reg = compiler->AllocateRegister();

	369 } else if (kRegexpOptimization && !needs_capture_clearing) {

	370 // Only unroll if there are no captures and the body can't be

	371 // empty.

	372 {

	373 RegExpExpansionLimiter limiter(

	374 compiler, min + ((max != min) ? 1 : 0));

	375 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {

	376 intptr_t new_max = (max == kInfinity) ? max : max - min;

	377 // Recurse once to get the loop or optional matches after the fixed

	378 // ones.

	379 RegExpNode* answer = ToNode(

	380 0, new_max, is_greedy, body, compiler, on_success, true);

	381 // Unroll the forced matches from 0 to min. This can cause chains of

	382 // TextNodes (which the parser does not generate). These should be

	383 // combined if it turns out they hinder good code generation.

	384 for (intptr_t i = 0; i < min; i++) {

	385 answer = body->ToNode(compiler, answer);

	386 }

	387 return answer;

	388 }

	389 }

	390 if (max <= kMaxUnrolledMaxMatches && min == 0) {

	391 ASSERT(max > 0); // Due to the 'if' above.

	392 RegExpExpansionLimiter limiter(compiler, max);

	393 if (limiter.ok_to_expand()) {

	394 // Unroll the optional matches up to max.

	395 RegExpNode* answer = on_success;

	396 for (intptr_t i = 0; i < max; i++) {

	397 ChoiceNode* alternation = new(isolate) ChoiceNode(2, isolate);

	398 if (is_greedy) {

	399 alternation->AddAlternative(

	400 GuardedAlternative(body->ToNode(compiler, answer)));

	401 alternation->AddAlternative(GuardedAlternative(on_success));

	402 } else {

	403 alternation->AddAlternative(GuardedAlternative(on_success));

	404 alternation->AddAlternative(

	405 GuardedAlternative(body->ToNode(compiler, answer)));

	406 }

	407 answer = alternation;

	408 if (not_at_start) alternation->set_not_at_start();

	409 }

	410 return answer;

	411 }

	412 }

	413 }

	414 bool has_min = min > 0;

	415 bool has_max = max < RegExpTree::kInfinity;

	416 bool needs_counter = has_min \|\| has_max;

	417 intptr_t reg_ctr = needs_counter

	418 ? compiler->AllocateRegister()

	419 : RegExpCompiler::kNoRegister;

	420 LoopChoiceNode* center = new(isolate) LoopChoiceNode(body->min_match() == 0,

	421 isolate);

	422 if (not_at_start) center->set_not_at_start();

	423 RegExpNode* loop_return = needs_counter

	424 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))

	425 : static_cast<RegExpNode*>(center);

	426 if (body_can_be_empty) {

	427 // If the body can be empty we need to check if it was and then

	428 // backtrack.

	429 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,

	430 reg_ctr,

	431 min,

	432 loop_return);

	433 }

	434 RegExpNode* body_node = body->ToNode(compiler, loop_return);

	435 if (body_can_be_empty) {

	436 // If the body can be empty we need to store the start position

	437 // so we can bail out if it was empty.

	438 body_node = ActionNode::StorePosition(body_start_reg, false, body_node);

	439 }

	440 if (needs_capture_clearing) {

	441 // Before entering the body of this loop we need to clear captures.

	442 body_node = ActionNode::ClearCaptures(capture_registers, body_node);

	443 }

	444 GuardedAlternative body_alt(body_node);

	445 if (has_max) {

	446 Guard* body_guard =

	447 new(isolate) Guard(reg_ctr, Guard::LT, max);

	448 body_alt.AddGuard(body_guard, isolate);

	449 }

	450 GuardedAlternative rest_alt(on_success);

	451 if (has_min) {

	452 Guard* rest_guard = new(isolate) Guard(reg_ctr, Guard::GEQ, min);

	453 rest_alt.AddGuard(rest_guard, isolate);

	454 }

	455 if (is_greedy) {

	456 center->AddLoopAlternative(body_alt);

	457 center->AddContinueAlternative(rest_alt);

	458 } else {

	459 center->AddContinueAlternative(rest_alt);

	460 center->AddLoopAlternative(body_alt);

	461 }

	462 if (needs_counter) {

	463 return ActionNode::SetRegister(reg_ctr, 0, center);

	464 } else {

	465 return center;

	466 }

	467 }

	468

	469

	470 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,

	471 RegExpNode* on_success) {

	472 switch (assertion_type()) {

	473 case START_OF_LINE:

	474 return AssertionNode::AfterNewline(on_success);

	475 case START_OF_INPUT:

	476 return AssertionNode::AtStart(on_success);

	477 case BOUNDARY:

	478 return AssertionNode::AtBoundary(on_success);

	479 case NON_BOUNDARY:

	480 return AssertionNode::AtNonBoundary(on_success);

	481 case END_OF_INPUT:

	482 return AssertionNode::AtEnd(on_success);

	483 case END_OF_LINE: {

	484 // Compile $ in multiline regexps as an alternation with a positive

	485 // lookahead in one side and an end-of-input on the other side.

	486 // We need two registers for the lookahead.

	487 intptr_t stack_pointer_register = compiler->AllocateRegister();

	488 intptr_t position_register = compiler->AllocateRegister();

	489 // The ChoiceNode to distinguish between a newline and end-of-input.

	490 ChoiceNode* result = new ChoiceNode(2, on_success->isolate());

	491 // Create a newline atom.

	492 ZoneGrowableArray<CharacterRange>* newline_ranges =

	493 new ZoneGrowableArray<CharacterRange>(3);

	494 CharacterRange::AddClassEscape('n', newline_ranges);

	495 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');

	496 TextNode* newline_matcher = new TextNode(

	497 newline_atom,

	498 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

	499 position_register,

	500 0, // No captures inside.

	501 -1, // Ignored if no captures.

	502 on_success));

	503 // Create an end-of-input matcher.

	504 RegExpNode* end_of_line = ActionNode::BeginSubmatch(

	505 stack_pointer_register,

	506 position_register,

	507 newline_matcher);

	508 // Add the two alternatives to the ChoiceNode.

	509 GuardedAlternative eol_alternative(end_of_line);

	510 result->AddAlternative(eol_alternative);

	511 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));

	512 result->AddAlternative(end_alternative);

	513 return result;

	514 }

	515 default:

	516 UNREACHABLE();

	517 }

	518 return on_success;

	519 }

	520

	521

	522 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,

	523 RegExpNode* on_success) {

	524 return new(CI)

	525 BackReferenceNode(RegExpCapture::StartRegister(index()),

	526 RegExpCapture::EndRegister(index()),

	527 on_success);

	528 }

	529

	530

	531 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

	532 RegExpNode* on_success) {

	533 return on_success;

	534 }

	535

	536

	537 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,

	538 RegExpNode* on_success) {

	539 intptr_t stack_pointer_register = compiler->AllocateRegister();

	540 intptr_t position_register = compiler->AllocateRegister();

	541

	542 const intptr_t registers_per_capture = 2;

	543 const intptr_t register_of_first_capture = 2;

	544 intptr_t register_count = capture_count_ * registers_per_capture;

	545 intptr_t register_start =

	546 register_of_first_capture + capture_from_ * registers_per_capture;

	547

	548 RegExpNode* success;

	549 if (is_positive()) {

	550 RegExpNode* node = ActionNode::BeginSubmatch(

	551 stack_pointer_register,

	552 position_register,

	553 body()->ToNode(

	554 compiler,

	555 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

	556 position_register,

	557 register_count,

	558 register_start,

	559 on_success)));

	560 return node;

	561 } else {

	562 // We use a ChoiceNode for a negative lookahead because it has most of

	563 // the characteristics we need. It has the body of the lookahead as its

	564 // first alternative and the expression after the lookahead of the second

	565 // alternative. If the first alternative succeeds then the

	566 // NegativeSubmatchSuccess will unwind the stack including everything the

	567 // choice node set up and backtrack. If the first alternative fails then

	568 // the second alternative is tried, which is exactly the desired result

	569 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special

	570 // ChoiceNode that knows to ignore the first exit when calculating quick

	571 // checks.

	572

	573 GuardedAlternative body_alt(

	574 body()->ToNode(

	575 compiler,

	576 success = new(CI) NegativeSubmatchSuccess(stack_pointer_register,

	577 position_register,

	578 register_count,

	579 register_start,

	580 CI)));

	581 ChoiceNode* choice_node =

	582 new(CI) NegativeLookaheadChoiceNode(body_alt,

	583 GuardedAlternative(on_success),

	584 CI);

	585 return ActionNode::BeginSubmatch(stack_pointer_register,

	586 position_register,

	587 choice_node);

	588 }

	589 }

	590

	591

	592 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

	593 RegExpNode* on_success) {

	594 return ToNode(body(), index(), compiler, on_success);

	595 }

	596

	597

	598 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,

	599 intptr_t index,

	600 RegExpCompiler* compiler,

	601 RegExpNode* on_success) {

	602 intptr_t start_reg = RegExpCapture::StartRegister(index);

	603 intptr_t end_reg = RegExpCapture::EndRegister(index);

	604 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);

	605 RegExpNode* body_node = body->ToNode(compiler, store_end);

	606 return ActionNode::StorePosition(start_reg, true, body_node);

	607 }

	608

	609

	610 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,

	611 RegExpNode* on_success) {

	612 ZoneGrowableArray<RegExpTree> children = nodes();

	613 RegExpNode* current = on_success;

	614 for (intptr_t i = children->length() - 1; i >= 0; i--) {

	615 current = children->At(i)->ToNode(compiler, current);

	616 }

	617 return current;

	618 }

	619

	620

	621 // ASCII filtering -------------------------------------------------------------

	622

	623

	624 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) {

	625 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);

	626 if (next == NULL) return set_replacement(NULL);

	627 on_success_ = next;

	628 return set_replacement(this);

	629 }

	630

	631

	632 RegExpNode* SeqRegExpNode::FilterASCII(intptr_t depth, bool ignore_case) {

	633 if (info()->replacement_calculated) return replacement();

	634 if (depth < 0) return this;

	635 ASSERT(!info()->visited);

	636 VisitMarker marker(info());

	637 return FilterSuccessor(depth - 1, ignore_case);

	638 }

	639

	640

	641 RegExpNode* TextNode::FilterASCII(intptr_t depth, bool ignore_case) {

	642 if (info()->replacement_calculated) return replacement();

	643 if (depth < 0) return this;

	644 ASSERT(!info()->visited);

	645 VisitMarker marker(info());

	646 intptr_t element_count = elms_->length();

	647 for (intptr_t i = 0; i < element_count; i++) {

	648 TextElement elm = elms_->At(i);

	649 if (elm.text_type() == TextElement::ATOM) {

	650 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	651 for (intptr_t j = 0; j < quarks->length(); j++) {

	652 uint16_t c = quarks->At(j);

	653 if (c <= Symbols::kMaxOneCharCodeSymbol) continue;

	654 if (!ignore_case) return set_replacement(NULL);

	655 // Here, we need to check for characters whose upper and lower cases

	656 // are outside the Latin-1 range.

	657 uint16_t converted = ConvertNonLatin1ToLatin1(c);

	658 // Character is outside Latin-1 completely

	659 if (converted == 0) return set_replacement(NULL);

	660 // Convert quark to Latin-1 in place.

	661 (*quarks)[0] = converted;

	662 }

	663 } else {

	664 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

	665 RegExpCharacterClass* cc = elm.char_class();

	666 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	667 if (!CharacterRange::IsCanonical(ranges)) {

	668 CharacterRange::Canonicalize(ranges);

	669 }

	670 // Now they are in order so we only need to look at the first.

	671 intptr_t range_count = ranges->length();

	672 if (cc->is_negated()) {

	673 if (range_count != 0 &&

	674 ranges->At(0).from() == 0 &&

	675 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) {

	676 // This will be handled in a later filter.

	677 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

	678 return set_replacement(NULL);

	679 }

	680 } else {

	681 if (range_count == 0 \|\|

	682 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) {

	683 // This will be handled in a later filter.

	684 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

	685 return set_replacement(NULL);

	686 }

	687 }

	688 }

	689 }

	690 return FilterSuccessor(depth - 1, ignore_case);

	691 }

	692

	693

	694 RegExpNode* LoopChoiceNode::FilterASCII(intptr_t depth, bool ignore_case) {

	695 if (info()->replacement_calculated) return replacement();

	696 if (depth < 0) return this;

	697 if (info()->visited) return this;

	698 {

	699 VisitMarker marker(info());

	700

	701 RegExpNode* continue_replacement =

	702 continue_node_->FilterASCII(depth - 1, ignore_case);

	703 // If we can't continue after the loop then there is no sense in doing the

	704 // loop.

	705 if (continue_replacement == NULL) return set_replacement(NULL);

	706 }

	707

	708 return ChoiceNode::FilterASCII(depth - 1, ignore_case);

	709 }

	710

	711

	712 RegExpNode* ChoiceNode::FilterASCII(intptr_t depth, bool ignore_case) {

	713 if (info()->replacement_calculated) return replacement();

	714 if (depth < 0) return this;

	715 if (info()->visited) return this;

	716 VisitMarker marker(info());

	717 intptr_t choice_count = alternatives_->length();

	718

	719 for (intptr_t i = 0; i < choice_count; i++) {

	720 GuardedAlternative alternative = alternatives_->At(i);

	721 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {

	722 set_replacement(this);

	723 return this;

	724 }

	725 }

	726

	727 intptr_t surviving = 0;

	728 RegExpNode* survivor = NULL;

	729 for (intptr_t i = 0; i < choice_count; i++) {

	730 GuardedAlternative alternative = alternatives_->At(i);

	731 RegExpNode* replacement =

	732 alternative.node()->FilterASCII(depth - 1, ignore_case);

	733 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.

	734 if (replacement != NULL) {

	735 (*alternatives_)[i].set_node(replacement);

	736 surviving++;

	737 survivor = replacement;

	738 }

	739 }

	740 if (surviving < 2) return set_replacement(survivor);

	741

	742 set_replacement(this);

	743 if (surviving == choice_count) {

	744 return this;

	745 }

	746 // Only some of the nodes survived the filtering. We need to rebuild the

	747 // alternatives list.

	748 ZoneGrowableArray<GuardedAlternative>* new_alternatives =

	749 new(I) ZoneGrowableArray<GuardedAlternative>(surviving);

	750 for (intptr_t i = 0; i < choice_count; i++) {

	751 RegExpNode* replacement =

	752 (*alternatives_)[i].node()->FilterASCII(depth - 1, ignore_case);

	753 if (replacement != NULL) {

	754 (*alternatives_)[i].set_node(replacement);

	755 new_alternatives->Add((*alternatives_)[i]);

	756 }

	757 }

	758 alternatives_ = new_alternatives;

	759 return this;

	760 }

	761

	762

	763 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(intptr_t depth,

	764 bool ignore_case) {

	765 if (info()->replacement_calculated) return replacement();

	766 if (depth < 0) return this;

	767 if (info()->visited) return this;

	768 VisitMarker marker(info());

	769 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	770 // afterwards.

	771 RegExpNode* node = (*alternatives_)[1].node();

	772 RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);

	773 if (replacement == NULL) return set_replacement(NULL);

	774 (*alternatives_)[1].set_node(replacement);

	775

	776 RegExpNode* neg_node = (*alternatives_)[0].node();

	777 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);

	778 // If the negative lookahead is always going to fail then

	779 // we don't need to check it.

	780 if (neg_replacement == NULL) return set_replacement(replacement);

	781 (*alternatives_)[0].set_node(neg_replacement);

	782 return set_replacement(this);

	783 }

	784

	785

	786 // Code emission ---------------------------------------------------------------

	787

	788

	789 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

	790 Guard* guard,

	791 Trace* trace) {

	792 switch (guard->op()) {

	793 case Guard::LT:

	794 ASSERT(!trace->mentions_reg(guard->reg()));

	795 macro_assembler->IfRegisterGE(guard->reg(),

	796 guard->value(),

	797 trace->backtrack());

	798 break;

	799 case Guard::GEQ:

	800 ASSERT(!trace->mentions_reg(guard->reg()));

	801 macro_assembler->IfRegisterLT(guard->reg(),

	802 guard->value(),

	803 trace->backtrack());

	804 break;

	805 }

	806 }

	807

	808

	809 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

	810 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	811

	812 // Omit flushing the trace. We discard the entire stack frame anyway.

	813

	814 if (!label()->IsBound()) {

	815 // We are completely independent of the trace, since we ignore it,

	816 // so this code can be used as the generic version.

	817 assembler->BindBlock(label());

	818 }

	819

	820 // Throw away everything on the backtrack stack since the start

	821 // of the negative submatch and restore the character position.

	822 assembler->ReadCurrentPositionFromRegister(current_position_register_);

	823 assembler->ReadStackPointerFromRegister(stack_pointer_register_);

	824 if (clear_capture_count_ > 0) {

	825 // Clear any captures that might have been performed during the success

	826 // of the body of the negative look-ahead.

	827 intptr_t clear_capture_end =

	828 clear_capture_start_ + clear_capture_count_ - 1;

	829 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);

	830 }

	831 // Now that we have unwound the stack we find at the top of the stack the

	832 // backtrack that the BeginSubmatch node got.

	833 assembler->Backtrack();

	834 }

	835

	836

	837 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {

	838 ASSERT(*cp_offset == 0);

	839 for (DeferredAction* action = actions_;

	840 action != NULL;

	841 action = action->next()) {

	842 if (action->Mentions(reg)) {

	843 if (action->action_type() == ActionNode::STORE_POSITION) {

	844 cp_offset = static_cast<DeferredCapture>(action)->cp_offset();

	845 return true;

	846 } else {

	847 return false;

	848 }

	849 }

	850 }

	851 return false;

	852 }

	853

	854

	855 // This is called as we come into a loop choice node and some other tricky

	856 // nodes. It normalizes the state of the code generator to ensure we can

	857 // generate generic code.

	858 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers,

	859 Isolate* isolate) {

	860 intptr_t max_register = RegExpCompiler::kNoRegister;

	861 for (DeferredAction* action = actions_;

	862 action != NULL;

	863 action = action->next()) {

	864 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {

	865 Interval range = static_cast<DeferredClearCaptures*>(action)->range();

	866 for (intptr_t i = range.from(); i <= range.to(); i++)

	867 affected_registers->Set(i, isolate);

	868 if (range.to() > max_register) max_register = range.to();

	869 } else {

	870 affected_registers->Set(action->reg(), isolate);

	871 if (action->reg() > max_register) max_register = action->reg();

	872 }

	873 }

	874 return max_register;

	875 }

	876

	877

	878 bool Trace::DeferredAction::Mentions(intptr_t that) {

	879 if (action_type() == ActionNode::CLEAR_CAPTURES) {

	880 Interval range = static_cast<DeferredClearCaptures*>(this)->range();

	881 return range.Contains(that);

	882 } else {

	883 return reg() == that;

	884 }

	885 }

	886

	887

	888 bool Trace::mentions_reg(intptr_t reg) {

	889 for (DeferredAction* action = actions_;

	890 action != NULL;

	891 action = action->next()) {

	892 if (action->Mentions(reg))

	893 return true;

	894 }

	895 return false;

	896 }

	897

	898

	899 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,

	900 intptr_t max_register,

	901 const OutSet& registers_to_pop,

	902 const OutSet& registers_to_clear) {

	903 for (intptr_t reg = max_register; reg >= 0; reg--) {

	904 if (registers_to_pop.Get(reg)) {

	905 assembler->PopRegister(reg);

	906 } else if (registers_to_clear.Get(reg)) {

	907 intptr_t clear_to = reg;

	908 while (reg > 0 && registers_to_clear.Get(reg - 1)) {

	909 reg--;

	910 }

	911 assembler->ClearRegisters(reg, clear_to);

	912 }

	913 }

	914 }

	915

	916

	917 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,

	918 intptr_t max_register,

	919 const OutSet& affected_registers,

	920 OutSet* registers_to_pop,

	921 OutSet* registers_to_clear,

	922 Isolate* isolate) {

	923 for (intptr_t reg = 0; reg <= max_register; reg++) {

	924 if (!affected_registers.Get(reg)) {

	925 continue;

	926 }

	927

	928 // The chronologically first deferred action in the trace

	929 // is used to infer the action needed to restore a register

	930 // to its previous state (or not, if it's safe to ignore it).

	931 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

	932 DeferredActionUndoType undo_action = IGNORE;

	933

	934 intptr_t value = 0;

	935 bool absolute = false;

	936 bool clear = false;

	937 intptr_t store_position = -1;

	938 // This is a little tricky because we are scanning the actions in reverse

	939 // historical order (newest first).

	940 for (DeferredAction* action = actions_;

	941 action != NULL;

	942 action = action->next()) {

	943 if (action->Mentions(reg)) {

	944 switch (action->action_type()) {

	945 case ActionNode::SET_REGISTER: {

	946 Trace::DeferredSetRegister* psr =

	947 static_cast<Trace::DeferredSetRegister*>(action);

	948 if (!absolute) {

	949 value += psr->value();

	950 absolute = true;

	951 }

	952 // SET_REGISTER is currently only used for newly introduced loop

	953 // counters. They can have a significant previous value if they

	954 // occour in a loop. TODO(lrn): Propagate this information, so

	955 // we can set undo_action to IGNORE if we know there is no value to

	956 // restore.

	957 undo_action = RESTORE;

	958 ASSERT(store_position == -1);

	959 ASSERT(!clear);

	960 break;

	961 }

	962 case ActionNode::INCREMENT_REGISTER:

	963 if (!absolute) {

	964 value++;

	965 }

	966 ASSERT(store_position == -1);

	967 ASSERT(!clear);

	968 undo_action = RESTORE;

	969 break;

	970 case ActionNode::STORE_POSITION: {

	971 Trace::DeferredCapture* pc =

	972 static_cast<Trace::DeferredCapture*>(action);

	973 if (!clear && store_position == -1) {

	974 store_position = pc->cp_offset();

	975 }

	976

	977 // For captures we know that stores and clears alternate.

	978 // Other register, are never cleared, and if the occur

	979 // inside a loop, they might be assigned more than once.

	980 if (reg <= 1) {

	981 // Registers zero and one, aka "capture zero", is

	982 // always set correctly if we succeed. There is no

	983 // need to undo a setting on backtrack, because we

	984 // will set it again or fail.

	985 undo_action = IGNORE;

	986 } else {

	987 undo_action = pc->is_capture() ? CLEAR : RESTORE;

	988 }

	989 ASSERT(!absolute);

	990 ASSERT(value == 0);

	991 break;

	992 }

	993 case ActionNode::CLEAR_CAPTURES: {

	994 // Since we're scanning in reverse order, if we've already

	995 // set the position we have to ignore historically earlier

	996 // clearing operations.

	997 if (store_position == -1) {

	998 clear = true;

	999 }

	1000 undo_action = RESTORE;

	1001 ASSERT(!absolute);

	1002 ASSERT(value == 0);

	1003 break;

	1004 }

	1005 default:

	1006 UNREACHABLE();

	1007 break;

	1008 }

	1009 }

	1010 }

	1011 // Prepare for the undo-action (e.g., push if it's going to be popped).

	1012 if (undo_action == RESTORE) {

	1013 assembler->PushRegister(reg);

	1014 registers_to_pop->Set(reg, isolate);

	1015 } else if (undo_action == CLEAR) {

	1016 registers_to_clear->Set(reg, isolate);

	1017 }

	1018 // Perform the chronologically last action (or accumulated increment)

	1019 // for the register.

	1020 if (store_position != -1) {

	1021 assembler->WriteCurrentPositionToRegister(reg, store_position);

	1022 } else if (clear) {

	1023 assembler->ClearRegisters(reg, reg);

	1024 } else if (absolute) {

	1025 assembler->SetRegister(reg, value);

	1026 } else if (value != 0) {

	1027 assembler->AdvanceRegister(reg, value);

	1028 }

	1029 }

	1030 }

	1031

	1032

	1033 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

	1034 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1035

	1036 ASSERT(!is_trivial());

	1037

	1038 if (actions_ == NULL && backtrack() == NULL) {

	1039 // Here we just have some deferred cp advances to fix and we are back to

	1040 // a normal situation. We may also have to forget some information gained

	1041 // through a quick check that was already performed.

	1042 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);

	1043 // Create a new trivial state and generate the node with that.

	1044 Trace new_state;

	1045 successor->Emit(compiler, &new_state);

	1046 return;

	1047 }

	1048

	1049 // Generate deferred actions here along with code to undo them again.

	1050 OutSet affected_registers;

	1051

	1052 if (backtrack() != NULL) {

	1053 // Here we have a concrete backtrack location. These are set up by choice

	1054 // nodes and so they indicate that we have a deferred save of the current

	1055 // position which we may need to emit here.

	1056 assembler->PushCurrentPosition();

	1057 }

	1058

	1059 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI);

	1060 OutSet registers_to_pop;

	1061 OutSet registers_to_clear;

	1062 PerformDeferredActions(assembler,

	1063 max_register,

	1064 affected_registers,

	1065 &registers_to_pop,

	1066 &registers_to_clear,

	1067 CI);

	1068 if (cp_offset_ != 0) {

	1069 assembler->AdvanceCurrentPosition(cp_offset_);

	1070 }

	1071

	1072 // Create a new trivial state and generate the node with that.

	1073 BlockLabel undo;

	1074 assembler->PushBacktrack(&undo);

	1075 Trace new_state;

	1076 successor->Emit(compiler, &new_state);

	1077

	1078 // On backtrack we need to restore state.

	1079 assembler->BindBlock(&undo);

	1080 RestoreAffectedRegisters(assembler,

	1081 max_register,

	1082 registers_to_pop,

	1083 registers_to_clear);

	1084 if (backtrack() == NULL) {

	1085 assembler->Backtrack();

	1086 } else {

	1087 assembler->PopCurrentPosition();

	1088 assembler->GoTo(backtrack());

	1089 }

	1090 }

	1091

	1092

	1093 void Trace::InvalidateCurrentCharacter() {

	1094 characters_preloaded_ = 0;

	1095 }

	1096

	1097

	1098 void Trace::AdvanceCurrentPositionInTrace(intptr_t by,

	1099 RegExpCompiler* compiler) {

	1100 ASSERT(by > 0);

	1101 // We don't have an instruction for shifting the current character register

	1102 // down or for using a shifted value for anything so lets just forget that

	1103 // we preloaded any characters into it.

	1104 characters_preloaded_ = 0;

	1105 // Adjust the offsets of the quick check performed information. This

	1106 // information is used to find out what we already determined about the

	1107 // characters by means of mask and compare.

	1108 quick_check_performed_.Advance(by, compiler->ascii());

	1109 cp_offset_ += by;

	1110 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

	1111 compiler->SetRegExpTooBig();

	1112 cp_offset_ = 0;

	1113 }

	1114 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0),

	1115 bound_checked_up_to_ - by);

	1116 }

	1117

	1118

	1119 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1120 if (!trace->is_trivial()) {

	1121 trace->Flush(compiler, this);

	1122 return;

	1123 }

	1124 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1125 if (!label()->IsBound()) {

	1126 assembler->BindBlock(label());

	1127 }

	1128 switch (action_) {

	1129 case ACCEPT:

	1130 assembler->Succeed();

	1131 return;

	1132 case BACKTRACK:

	1133 assembler->GoTo(trace->backtrack());

	1134 return;

	1135 case NEGATIVE_SUBMATCH_SUCCESS:

	1136 // This case is handled in a different virtual method.

	1137 UNREACHABLE();

	1138 }

	1139 UNIMPLEMENTED();

	1140 }

	1141

	1142

	1143 bool QuickCheckDetails::Rationalize(bool asc) {

	1144 bool found_useful_op = false;

	1145 uint32_t char_mask;

	1146 if (asc) {

	1147 char_mask = Symbols::kMaxOneCharCodeSymbol;

	1148 } else {

	1149 char_mask = Utf16::kMaxCodeUnit;

	1150 }

	1151 mask_ = 0;

	1152 value_ = 0;

	1153 intptr_t char_shift = 0;

	1154 for (intptr_t i = 0; i < characters_; i++) {

	1155 Position* pos = &positions_[i];

	1156 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) {

	1157 found_useful_op = true;

	1158 }

	1159 mask_ \|= (pos->mask & char_mask) << char_shift;

	1160 value_ \|= (pos->value & char_mask) << char_shift;

	1161 char_shift += asc ? 8 : 16;

	1162 }

	1163 return found_useful_op;

	1164 }

	1165

	1166

	1167 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

	1168 Trace* trace,

	1169 bool preload_has_checked_bounds,

	1170 BlockLabel* on_possible_success,

	1171 QuickCheckDetails* details,

	1172 bool fall_through_on_failure) {

	1173 if (details->characters() == 0) return false;

	1174 GetQuickCheckDetails(

	1175 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);

	1176 if (details->cannot_match()) return false;

	1177 if (!details->Rationalize(compiler->ascii())) return false;

	1178 ASSERT(details->characters() == 1 \|\|

	1179 compiler->macro_assembler()->CanReadUnaligned());

	1180 uint32_t mask = details->mask();

	1181 uint32_t value = details->value();

	1182

	1183 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1184

	1185 if (trace->characters_preloaded() != details->characters()) {

	1186 assembler->LoadCurrentCharacter(trace->cp_offset(),

	1187 trace->backtrack(),

	1188 !preload_has_checked_bounds,

	1189 details->characters());

	1190 }

	1191

	1192

	1193 bool need_mask = true;

	1194

	1195 if (details->characters() == 1) {

	1196 // If number of characters preloaded is 1 then we used a byte or 16 bit

	1197 // load so the value is already masked down.

	1198 uint32_t char_mask;

	1199 if (compiler->ascii()) {

	1200 char_mask = Symbols::kMaxOneCharCodeSymbol;

	1201 } else {

	1202 char_mask = Utf16::kMaxCodeUnit;

	1203 }

	1204 if ((mask & char_mask) == char_mask) need_mask = false;

	1205 mask &= char_mask;

	1206 } else {

	1207 // For 2-character preloads in ASCII mode or 1-character preloads in

	1208 // TWO_BYTE mode we also use a 16 bit load with zero extend.

	1209 if (details->characters() == 2 && compiler->ascii()) {

	1210 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1211 } else if (details->characters() == 1 && !compiler->ascii()) {

	1212 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1213 } else {

	1214 if (mask == 0xffffffff) need_mask = false;

	1215 }

	1216 }

	1217

	1218 if (fall_through_on_failure) {

	1219 if (need_mask) {

	1220 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

	1221 } else {

	1222 assembler->CheckCharacter(value, on_possible_success);

	1223 }

	1224 } else {

	1225 if (need_mask) {

	1226 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());

	1227 } else {

	1228 assembler->CheckNotCharacter(value, trace->backtrack());

	1229 }

	1230 }

	1231 return true;

	1232 }

	1233

	1234

	1235 // Emit the code to check for a ^ in multiline mode (1-character lookbehind

	1236 // that matches newline or the start of input).

	1237 static void EmitHat(RegExpCompiler* compiler,

	1238 RegExpNode* on_success,

	1239 Trace* trace) {

	1240 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1241 // We will be loading the previous character into the current character

	1242 // register.

	1243 Trace new_trace(*trace);

	1244 new_trace.InvalidateCurrentCharacter();

	1245

	1246 BlockLabel ok;

	1247 if (new_trace.cp_offset() == 0) {

	1248 // The start of input counts as a newline in this context, so skip to

	1249 // ok if we are at the start.

	1250 assembler->CheckAtStart(&ok);

	1251 }

	1252 // We already checked that we are not at the start of input so it must be

	1253 // OK to load the previous character.

	1254 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

	1255 new_trace.backtrack(),

	1256 false);

	1257 if (!assembler->CheckSpecialCharacterClass('n',

	1258 new_trace.backtrack())) {

	1259 // Newline means \n, \r, 0x2028 or 0x2029.

	1260 if (!compiler->ascii()) {

	1261 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

	1262 }

	1263 assembler->CheckCharacter('\n', &ok);

	1264 assembler->CheckNotCharacter('\r', new_trace.backtrack());

	1265 }

	1266 assembler->BindBlock(&ok);

	1267 on_success->Emit(compiler, &new_trace);

	1268 }

	1269

	1270

	1271 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1272 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1273 switch (assertion_type_) {

	1274 case AT_END: {

	1275 BlockLabel ok;

	1276 assembler->CheckPosition(trace->cp_offset(), &ok);

	1277 assembler->GoTo(trace->backtrack());

	1278 assembler->BindBlock(&ok);

	1279 break;

	1280 }

	1281 case AT_START: {

	1282 if (trace->at_start() == Trace::FALSE_VALUE) {

	1283 assembler->GoTo(trace->backtrack());

	1284 return;

	1285 }

	1286 if (trace->at_start() == Trace::UNKNOWN) {

	1287 assembler->CheckNotAtStart(trace->backtrack());

	1288 Trace at_start_trace = *trace;

	1289 at_start_trace.set_at_start(true);

	1290 on_success()->Emit(compiler, &at_start_trace);

	1291 return;

	1292 }

	1293 }

	1294 break;

	1295 case AFTER_NEWLINE:

	1296 EmitHat(compiler, on_success(), trace);

	1297 return;

	1298 case AT_BOUNDARY:

	1299 case AT_NON_BOUNDARY: {

	1300 EmitBoundaryCheck(compiler, trace);

	1301 return;

	1302 }

	1303 }

	1304 on_success()->Emit(compiler, trace);

	1305 }

	1306

	1307

	1308 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,

	1309 Trace* trace) {

	1310 // If we are generating a greedy loop then don't stop and don't reuse code.

	1311 if (trace->stop_node() != NULL) {

	1312 return CONTINUE;

	1313 }

	1314

	1315 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1316 if (trace->is_trivial()) {

	1317 if (label_.IsBound()) {

	1318 // We are being asked to generate a generic version, but that's already

	1319 // been done so just go to it.

	1320 macro_assembler->GoTo(&label_);

	1321 return DONE;

	1322 }

	1323 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {

	1324 // To avoid too deep recursion we push the node to the work queue and just

	1325 // generate a goto here.

	1326 compiler->AddWork(this);

	1327 macro_assembler->GoTo(&label_);

	1328 return DONE;

	1329 }

	1330 // Generate generic version of the node and bind the label for later use.

	1331 macro_assembler->BindBlock(&label_);

	1332 return CONTINUE;

	1333 }

	1334

	1335 // We are being asked to make a non-generic version. Keep track of how many

	1336 // non-generic versions we generate so as not to overdo it.

	1337 trace_count_++;

	1338 if (kRegexpOptimization &&

	1339 trace_count_ < kMaxCopiesCodeGenerated &&

	1340 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {

	1341 return CONTINUE;

	1342 }

	1343

	1344 // If we get here code has been generated for this node too many times or

	1345 // recursion is too deep. Time to switch to a generic version. The code for

	1346 // generic versions above can handle deep recursion properly.

	1347 trace->Flush(compiler, this);

	1348 return DONE;

	1349 }

	1350

	1351

	1352 // This generates the code to match a text node. A text node can contain

	1353 // straight character sequences (possibly to be matched in a case-independent

	1354 // way) and character classes. For efficiency we do not do this in a single

	1355 // pass from left to right. Instead we pass over the text node several times,

	1356 // emitting code for some character positions every time. See the comment on

	1357 // TextEmitPass for details.

	1358 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1359 LimitResult limit_result = LimitVersions(compiler, trace);

	1360 if (limit_result == DONE) return;

	1361 ASSERT(limit_result == CONTINUE);

	1362

	1363 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

	1364 compiler->SetRegExpTooBig();

	1365 return;

	1366 }

	1367

	1368 if (compiler->ascii()) {

	1369 intptr_t dummy = 0;

	1370 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

	1371 }

	1372

	1373 bool first_elt_done = false;

	1374 intptr_t bound_checked_to = trace->cp_offset() - 1;

	1375 bound_checked_to += trace->bound_checked_up_to();

	1376

	1377 // If a character is preloaded into the current character register then

	1378 // check that now.

	1379 if (trace->characters_preloaded() == 1) {

	1380 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1381 if (!SkipPass(pass, compiler->ignore_case())) {

	1382 TextEmitPass(compiler,

	1383 static_cast<TextEmitPassType>(pass),

	1384 true,

	1385 trace,

	1386 false,

	1387 &bound_checked_to);

	1388 }

	1389 }

	1390 first_elt_done = true;

	1391 }

	1392

	1393 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1394 if (!SkipPass(pass, compiler->ignore_case())) {

	1395 TextEmitPass(compiler,

	1396 static_cast<TextEmitPassType>(pass),

	1397 false,

	1398 trace,

	1399 first_elt_done,

	1400 &bound_checked_to);

	1401 }

	1402 }

	1403

	1404 Trace successor_trace(*trace);

	1405 successor_trace.set_at_start(false);

	1406 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

	1407 RecursionCheck rc(compiler);

	1408 on_success()->Emit(compiler, &successor_trace);

	1409 }

	1410

	1411

	1412 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1413 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1414 if (trace->stop_node() == this) {

	1415 intptr_t text_length =

	1416 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));

	1417 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);

	1418 // Update the counter-based backtracking info on the stack. This is an

	1419 // optimization for greedy loops (see below).

	1420 ASSERT(trace->cp_offset() == text_length);

	1421 macro_assembler->AdvanceCurrentPosition(text_length);

	1422 macro_assembler->GoTo(trace->loop_label());

	1423 return;

	1424 }

	1425 ASSERT(trace->stop_node() == NULL);

	1426 if (!trace->is_trivial()) {

	1427 trace->Flush(compiler, this);

	1428 return;

	1429 }

	1430 ChoiceNode::Emit(compiler, trace);

	1431 }

	1432

	1433

	1434 // This class is used when generating the alternatives in a choice node. It

	1435 // records the way the alternative is being code generated.

	1436 struct AlternativeGeneration {

	1437 AlternativeGeneration()

	1438 : possible_success(),

	1439 expects_preload(false),

	1440 after(),

	1441 quick_check_details() { }

	1442 BlockLabel possible_success;

	1443 bool expects_preload;

	1444 BlockLabel after;

	1445 QuickCheckDetails quick_check_details;

	1446 };

	1447

	1448

	1449 // Creates a list of AlternativeGenerations. If the list has a reasonable

	1450 // size then it is on the stack, otherwise the excess is on the heap.

	1451 class AlternativeGenerationList {

	1452 public:

	1453 explicit AlternativeGenerationList(intptr_t count)

	1454 : alt_gens_(count) {

	1455 for (intptr_t i = 0; i < count && i < kAFew; i++) {

	1456 alt_gens_.Add(a_few_alt_gens_ + i);

	1457 }

	1458 for (intptr_t i = kAFew; i < count; i++) {

	1459 alt_gens_.Add(new AlternativeGeneration());

	1460 }

	1461 }

	1462 ~AlternativeGenerationList() {

	1463 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {

	1464 delete alt_gens_[i];

	1465 alt_gens_[i] = NULL;

	1466 }

	1467 }

	1468

	1469 AlternativeGeneration* at(intptr_t i) {

	1470 return alt_gens_[i];

	1471 }

	1472

	1473 private:

	1474 static const intptr_t kAFew = 10;

	1475 GrowableArray<AlternativeGeneration*> alt_gens_;

	1476 AlternativeGeneration a_few_alt_gens_[kAFew];

	1477

	1478 DISALLOW_ALLOCATION();

	1479 };

	1480

	1481

	1482 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1483 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1484 intptr_t choice_count = alternatives_->length();

	1485

	1486 #ifdef DEBUG

	1487 for (intptr_t i = 0; i < choice_count - 1; i++) {

	1488 GuardedAlternative alternative = alternatives_->At(i);

	1489 ZoneGrowableArray<Guard> guards = alternative.guards();

	1490 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1491 for (intptr_t j = 0; j < guard_count; j++) {

	1492 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));

	1493 }

	1494 }

	1495 #endif

	1496

	1497 LimitResult limit_result = LimitVersions(compiler, trace);

	1498 if (limit_result == DONE) return;

	1499 ASSERT(limit_result == CONTINUE);

	1500

	1501 intptr_t new_flush_budget = trace->flush_budget() / choice_count;

	1502 if (trace->flush_budget() == 0 && trace->actions() != NULL) {

	1503 trace->Flush(compiler, this);

	1504 return;

	1505 }

	1506

	1507 RecursionCheck rc(compiler);

	1508

	1509 Trace* current_trace = trace;

	1510

	1511 intptr_t text_length =

	1512 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));

	1513 bool greedy_loop = false;

	1514 BlockLabel greedy_loop_label;

	1515 Trace counter_backtrack_trace;

	1516 counter_backtrack_trace.set_backtrack(&greedy_loop_label);

	1517 if (not_at_start()) counter_backtrack_trace.set_at_start(false);

	1518

	1519 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {

	1520 // Here we have special handling for greedy loops containing only text nodes

	1521 // and other simple nodes. These are handled by pushing the current

	1522 // position on the stack and then incrementing the current position each

	1523 // time around the switch. On backtrack we decrement the current position

	1524 // and check it against the pushed value. This avoids pushing backtrack

	1525 // information for each iteration of the loop, which could take up a lot of

	1526 // space.

	1527 greedy_loop = true;

	1528 ASSERT(trace->stop_node() == NULL);

	1529 macro_assembler->PushCurrentPosition();

	1530 current_trace = &counter_backtrack_trace;

	1531 BlockLabel greedy_match_failed;

	1532 Trace greedy_match_trace;

	1533 if (not_at_start()) greedy_match_trace.set_at_start(false);

	1534 greedy_match_trace.set_backtrack(&greedy_match_failed);

	1535 BlockLabel loop_label;

	1536 macro_assembler->BindBlock(&loop_label);

	1537 greedy_match_trace.set_stop_node(this);

	1538 greedy_match_trace.set_loop_label(&loop_label);

	1539 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace);

	1540 macro_assembler->BindBlock(&greedy_match_failed);

	1541 }

	1542

	1543 BlockLabel second_choice; // For use in greedy matches.

	1544 macro_assembler->BindBlock(&second_choice);

	1545

	1546 intptr_t first_normal_choice = greedy_loop ? 1 : 0;

	1547

	1548 bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;

	1549 const intptr_t kEatsAtLeastNotYetInitialized = -1;

	1550 intptr_t eats_at_least = kEatsAtLeastNotYetInitialized;

	1551

	1552 bool skip_was_emitted = false;

	1553

	1554 if (!greedy_loop && choice_count == 2) {

	1555 GuardedAlternative alt1 = (*alternatives_)[1];

	1556 if (alt1.guards() == NULL \|\| alt1.guards()->length() == 0) {

	1557 RegExpNode* eats_anything_node = alt1.node();

	1558 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==

	1559 this) {

	1560 // At this point we know that we are at a non-greedy loop that will eat

	1561 // any character one at a time. Any non-anchored regexp has such a

	1562 // loop prepended to it in order to find where it starts. We look for

	1563 // a pattern of the form ...abc... where we can look 6 characters ahead

	1564 // and step forwards 3 if the character is not one of abc. Abc need

	1565 // not be atoms, they can be any reasonably limited character class or

	1566 // small alternation.

	1567 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.

	1568 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	1569 if (lookahead == NULL) {

	1570 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore,

	1571 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	1572 kRecursionBudget,

	1573 not_at_start));

	1574 if (eats_at_least >= 1) {

	1575 BoyerMooreLookahead* bm =

	1576 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	1577 GuardedAlternative alt0 = alternatives_->At(0);

	1578 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	1579 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);

	1580 }

	1581 } else {

	1582 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);

	1583 }

	1584 }

	1585 }

	1586 }

	1587

	1588 if (eats_at_least == kEatsAtLeastNotYetInitialized) {

	1589 // Save some time by looking at most one machine word ahead.

	1590 eats_at_least =

	1591 EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);

	1592 }

	1593 intptr_t preload_characters =

	1594 CalculatePreloadCharacters(compiler, eats_at_least);

	1595

	1596 bool preload_is_current = !skip_was_emitted &&

	1597 (current_trace->characters_preloaded() == preload_characters);

	1598 bool preload_has_checked_bounds = preload_is_current;

	1599

	1600 AlternativeGenerationList alt_gens(choice_count);

	1601

	1602 // For now we just call all choices one after the other. The idea ultimately

	1603 // is to use the Dispatch table to try only the relevant ones.

	1604 for (intptr_t i = first_normal_choice; i < choice_count; i++) {

	1605 GuardedAlternative alternative = alternatives_->At(i);

	1606 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1607 alt_gen->quick_check_details.set_characters(preload_characters);

	1608 ZoneGrowableArray<Guard> guards = alternative.guards();

	1609 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1610 Trace new_trace(*current_trace);

	1611 new_trace.set_characters_preloaded(preload_is_current ?

	1612 preload_characters :

	1613 0);

	1614 if (preload_has_checked_bounds) {

	1615 new_trace.set_bound_checked_up_to(preload_characters);

	1616 }

	1617 new_trace.quick_check_performed()->Clear();

	1618 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);

	1619 alt_gen->expects_preload = preload_is_current;

	1620 bool generate_full_check_inline = false;

	1621 if (kRegexpOptimization &&

	1622 try_to_emit_quick_check_for_alternative(i) &&

	1623 alternative.node()->EmitQuickCheck(compiler,

	1624 &new_trace,

	1625 preload_has_checked_bounds,

	1626 &alt_gen->possible_success,

	1627 &alt_gen->quick_check_details,

	1628 i < choice_count - 1)) {

	1629 // Quick check was generated for this choice.

	1630 preload_is_current = true;

	1631 preload_has_checked_bounds = true;

	1632 // On the last choice in the ChoiceNode we generated the quick

	1633 // check to fall through on possible success. So now we need to

	1634 // generate the full check inline.

	1635 if (i == choice_count - 1) {

	1636 macro_assembler->BindBlock(&alt_gen->possible_success);

	1637 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1638 new_trace.set_characters_preloaded(preload_characters);

	1639 new_trace.set_bound_checked_up_to(preload_characters);

	1640 generate_full_check_inline = true;

	1641 }

	1642 } else if (alt_gen->quick_check_details.cannot_match()) {

	1643 if (i == choice_count - 1 && !greedy_loop) {

	1644 macro_assembler->GoTo(trace->backtrack());

	1645 }

	1646 continue;

	1647 } else {

	1648 // No quick check was generated. Put the full code here.

	1649 // If this is not the first choice then there could be slow checks from

	1650 // previous cases that go here when they fail. There's no reason to

	1651 // insist that they preload characters since the slow check we are about

	1652 // to generate probably can't use it.

	1653 if (i != first_normal_choice) {

	1654 alt_gen->expects_preload = false;

	1655 new_trace.InvalidateCurrentCharacter();

	1656 }

	1657 if (i < choice_count - 1) {

	1658 new_trace.set_backtrack(&alt_gen->after);

	1659 }

	1660 generate_full_check_inline = true;

	1661 }

	1662 if (generate_full_check_inline) {

	1663 if (new_trace.actions() != NULL) {

	1664 new_trace.set_flush_budget(new_flush_budget);

	1665 }

	1666 for (intptr_t j = 0; j < guard_count; j++) {

	1667 GenerateGuard(macro_assembler, guards->At(j), &new_trace);

	1668 }

	1669 alternative.node()->Emit(compiler, &new_trace);

	1670 preload_is_current = false;

	1671 }

	1672 macro_assembler->BindBlock(&alt_gen->after);

	1673 }

	1674 if (greedy_loop) {

	1675 macro_assembler->BindBlock(&greedy_loop_label);

	1676 // If we have unwound to the bottom then backtrack.

	1677 macro_assembler->CheckGreedyLoop(trace->backtrack());

	1678 // Otherwise try the second priority at an earlier position.

	1679 macro_assembler->AdvanceCurrentPosition(-text_length);

	1680 macro_assembler->GoTo(&second_choice);

	1681 }

	1682

	1683 // At this point we need to generate slow checks for the alternatives where

	1684 // the quick check was inlined. We can recognize these because the associated

	1685 // label was bound.

	1686 for (intptr_t i = first_normal_choice; i < choice_count - 1; i++) {

	1687 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1688 Trace new_trace(*current_trace);

	1689 // If there are actions to be flushed we have to limit how many times

	1690 // they are flushed. Take the budget of the parent trace and distribute

	1691 // it fairly amongst the children.

	1692 if (new_trace.actions() != NULL) {

	1693 new_trace.set_flush_budget(new_flush_budget);

	1694 }

	1695 EmitOutOfLineContinuation(compiler,

	1696 &new_trace,

	1697 alternatives_->At(i),

	1698 alt_gen,

	1699 preload_characters,

	1700 alt_gens.at(i + 1)->expects_preload);

	1701 }

	1702 }

	1703

	1704

	1705 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

	1706 Trace* trace,

	1707 GuardedAlternative alternative,

	1708 AlternativeGeneration* alt_gen,

	1709 intptr_t preload_characters,

	1710 bool next_expects_preload) {

	1711 if (!alt_gen->possible_success.IsLinked()) return;

	1712

	1713 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1714 macro_assembler->BindBlock(&alt_gen->possible_success);

	1715 Trace out_of_line_trace(*trace);

	1716 out_of_line_trace.set_characters_preloaded(preload_characters);

	1717 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1718 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);

	1719 ZoneGrowableArray<Guard> guards = alternative.guards();

	1720 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1721 if (next_expects_preload) {

	1722 BlockLabel reload_current_char;

	1723 out_of_line_trace.set_backtrack(&reload_current_char);

	1724 for (intptr_t j = 0; j < guard_count; j++) {

	1725 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1726 }

	1727 alternative.node()->Emit(compiler, &out_of_line_trace);

	1728 macro_assembler->BindBlock(&reload_current_char);

	1729 // Reload the current character, since the next quick check expects that.

	1730 // We don't need to check bounds here because we only get into this

	1731 // code through a quick check which already did the checked load.

	1732 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),

	1733 NULL,

	1734 false,

	1735 preload_characters);

	1736 macro_assembler->GoTo(&(alt_gen->after));

	1737 } else {

	1738 out_of_line_trace.set_backtrack(&(alt_gen->after));

	1739 for (intptr_t j = 0; j < guard_count; j++) {

	1740 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1741 }

	1742 alternative.node()->Emit(compiler, &out_of_line_trace);

	1743 }

	1744 }

	1745

	1746

	1747 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1748 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1749 LimitResult limit_result = LimitVersions(compiler, trace);

	1750 if (limit_result == DONE) return;

	1751 ASSERT(limit_result == CONTINUE);

	1752

	1753 RecursionCheck rc(compiler);

	1754

	1755 switch (action_type_) {

	1756 case STORE_POSITION: {

	1757 Trace::DeferredCapture

	1758 new_capture(data_.u_position_register.reg,

	1759 data_.u_position_register.is_capture,

	1760 trace);

	1761 Trace new_trace = *trace;

	1762 new_trace.add_action(&new_capture);

	1763 on_success()->Emit(compiler, &new_trace);

	1764 break;

	1765 }

	1766 case INCREMENT_REGISTER: {

	1767 Trace::DeferredIncrementRegister

	1768 new_increment(data_.u_increment_register.reg);

	1769 Trace new_trace = *trace;

	1770 new_trace.add_action(&new_increment);

	1771 on_success()->Emit(compiler, &new_trace);

	1772 break;

	1773 }

	1774 case SET_REGISTER: {

	1775 Trace::DeferredSetRegister

	1776 new_set(data_.u_store_register.reg, data_.u_store_register.value);

	1777 Trace new_trace = *trace;

	1778 new_trace.add_action(&new_set);

	1779 on_success()->Emit(compiler, &new_trace);

	1780 break;

	1781 }

	1782 case CLEAR_CAPTURES: {

	1783 Trace::DeferredClearCaptures

	1784 new_capture(Interval(data_.u_clear_captures.range_from,

	1785 data_.u_clear_captures.range_to));

	1786 Trace new_trace = *trace;

	1787 new_trace.add_action(&new_capture);

	1788 on_success()->Emit(compiler, &new_trace);

	1789 break;

	1790 }

	1791 case BEGIN_SUBMATCH:

	1792 if (!trace->is_trivial()) {

	1793 trace->Flush(compiler, this);

	1794 } else {

	1795 assembler->WriteCurrentPositionToRegister(

	1796 data_.u_submatch.current_position_register, 0);

	1797 assembler->WriteStackPointerToRegister(

	1798 data_.u_submatch.stack_pointer_register);

	1799 on_success()->Emit(compiler, trace);

	1800 }

	1801 break;

	1802 case EMPTY_MATCH_CHECK: {

	1803 intptr_t start_pos_reg = data_.u_empty_match_check.start_register;

	1804 intptr_t stored_pos = 0;

	1805 intptr_t rep_reg = data_.u_empty_match_check.repetition_register;

	1806 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);

	1807 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);

	1808 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {

	1809 // If we know we haven't advanced and there is no minimum we

	1810 // can just backtrack immediately.

	1811 assembler->GoTo(trace->backtrack());

	1812 } else if (know_dist && stored_pos < trace->cp_offset()) {

	1813 // If we know we've advanced we can generate the continuation

	1814 // immediately.

	1815 on_success()->Emit(compiler, trace);

	1816 } else if (!trace->is_trivial()) {

	1817 trace->Flush(compiler, this);

	1818 } else {

	1819 BlockLabel skip_empty_check;

	1820 // If we have a minimum number of repetitions we check the current

	1821 // number first and skip the empty check if it's not enough.

	1822 if (has_minimum) {

	1823 intptr_t limit = data_.u_empty_match_check.repetition_limit;

	1824 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

	1825 }

	1826 // If the match is empty we bail out, otherwise we fall through

	1827 // to the on-success continuation.

	1828 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

	1829 trace->backtrack());

	1830 assembler->BindBlock(&skip_empty_check);

	1831 on_success()->Emit(compiler, trace);

	1832 }

	1833 break;

	1834 }

	1835 case POSITIVE_SUBMATCH_SUCCESS: {

	1836 if (!trace->is_trivial()) {

	1837 trace->Flush(compiler, this);

	1838 return;

	1839 }

	1840 assembler->ReadCurrentPositionFromRegister(

	1841 data_.u_submatch.current_position_register);

	1842 assembler->ReadStackPointerFromRegister(

	1843 data_.u_submatch.stack_pointer_register);

	1844 intptr_t clear_register_count = data_.u_submatch.clear_register_count;

	1845 if (clear_register_count == 0) {

	1846 on_success()->Emit(compiler, trace);

	1847 return;

	1848 }

	1849 intptr_t clear_registers_from = data_.u_submatch.clear_register_from;

	1850 BlockLabel clear_registers_backtrack;

	1851 Trace new_trace = *trace;

	1852 new_trace.set_backtrack(&clear_registers_backtrack);

	1853 on_success()->Emit(compiler, &new_trace);

	1854

	1855 assembler->BindBlock(&clear_registers_backtrack);

	1856 intptr_t clear_registers_to =

	1857 clear_registers_from + clear_register_count - 1;

	1858 assembler->ClearRegisters(clear_registers_from, clear_registers_to);

	1859

	1860 ASSERT(trace->backtrack() == NULL);

	1861 assembler->Backtrack();

	1862 return;

	1863 }

	1864 default:

	1865 UNREACHABLE();

	1866 }

	1867 }

	1868

	1869

	1870 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1871 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1872 if (!trace->is_trivial()) {

	1873 trace->Flush(compiler, this);

	1874 return;

	1875 }

	1876

	1877 LimitResult limit_result = LimitVersions(compiler, trace);

	1878 if (limit_result == DONE) return;

	1879 ASSERT(limit_result == CONTINUE);

	1880

	1881 RecursionCheck rc(compiler);

	1882

	1883 ASSERT(start_reg_ + 1 == end_reg_);

	1884 if (compiler->ignore_case()) {

	1885 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

	1886 trace->backtrack());

	1887 } else {

	1888 assembler->CheckNotBackReference(start_reg_, trace->backtrack());

	1889 }

	1890 on_success()->Emit(compiler, trace);

	1891 }

	1892

	1893

	1894 void ActionNode::FillInBMInfo(intptr_t offset,

	1895 intptr_t budget,

	1896 BoyerMooreLookahead* bm,

	1897 bool not_at_start) {

	1898 if (action_type_ == BEGIN_SUBMATCH) {

	1899 bm->SetRest(offset);

	1900 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {

	1901 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1902 }

	1903 SaveBMInfo(bm, not_at_start, offset);

	1904 }

	1905

	1906

	1907 void AssertionNode::FillInBMInfo(intptr_t offset,

	1908 intptr_t budget,

	1909 BoyerMooreLookahead* bm,

	1910 bool not_at_start) {

	1911 // Match the behaviour of EatsAtLeast on this node.

	1912 if (assertion_type() == AT_START && not_at_start) return;

	1913 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1914 SaveBMInfo(bm, not_at_start, offset);

	1915 }

	1916

	1917

	1918 void BackReferenceNode::FillInBMInfo(intptr_t offset,

	1919 intptr_t budget,

	1920 BoyerMooreLookahead* bm,

	1921 bool not_at_start) {

	1922 // Working out the set of characters that a backreference can match is too

	1923 // hard, so we just say that any character can match.

	1924 bm->SetRest(offset);

	1925 SaveBMInfo(bm, not_at_start, offset);

	1926 }

	1927

	1928

	1929 // Returns the number of characters in the equivalence class, omitting those

	1930 // that cannot occur in the source string because it is ASCII.

	1931 static intptr_t GetCaseIndependentLetters(uint16_t character,

	1932 bool ascii_subject,

	1933 int32_t* letters) {

	1934 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	1935 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters);

	1936 // Unibrow returns 0 or 1 for characters where case independence is

	1937 // trivial.

	1938 if (length == 0) {

	1939 letters[0] = character;

	1940 length = 1;

	1941 }

	1942 if (!ascii_subject \|\| character <= Symbols::kMaxOneCharCodeSymbol) {

	1943 return length;

	1944 }

	1945 // The standard requires that non-ASCII characters cannot have ASCII

	1946 // character codes in their equivalence class.

	1947 return 0;

	1948 }

	1949

	1950

	1951 void ChoiceNode::FillInBMInfo(intptr_t offset,

	1952 intptr_t budget,

	1953 BoyerMooreLookahead* bm,

	1954 bool not_at_start) {

	1955 ZoneGrowableArray<GuardedAlternative>* alts = alternatives();

	1956 budget = (budget - 1) / alts->length();

	1957 for (intptr_t i = 0; i < alts->length(); i++) {

	1958 GuardedAlternative& alt = (*alts)[i];

	1959 if (alt.guards() != NULL && alt.guards()->length() != 0) {

	1960 bm->SetRest(offset); // Give up trying to fill in info.

	1961 SaveBMInfo(bm, not_at_start, offset);

	1962 return;

	1963 }

	1964 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);

	1965 }

	1966 SaveBMInfo(bm, not_at_start, offset);

	1967 }

	1968

	1969

	1970 void EndNode::FillInBMInfo(intptr_t offset,

	1971 intptr_t budget,

	1972 BoyerMooreLookahead* bm,

	1973 bool not_at_start) {

	1974 // Returning 0 from EatsAtLeast should ensure we never get here.

	1975 UNREACHABLE();

	1976 }

	1977

	1978

	1979 void LoopChoiceNode::FillInBMInfo(intptr_t offset,

	1980 intptr_t budget,

	1981 BoyerMooreLookahead* bm,

	1982 bool not_at_start) {

	1983 if (body_can_be_zero_length_ \|\| budget <= 0) {

	1984 bm->SetRest(offset);

	1985 SaveBMInfo(bm, not_at_start, offset);

	1986 return;

	1987 }

	1988 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1989 SaveBMInfo(bm, not_at_start, offset);

	1990 }

	1991

	1992

	1993 void TextNode::FillInBMInfo(intptr_t initial_offset,

	1994 intptr_t budget,

	1995 BoyerMooreLookahead* bm,

	1996 bool not_at_start) {

	1997 if (initial_offset >= bm->length()) return;

	1998 intptr_t offset = initial_offset;

	1999 intptr_t max_char = bm->max_char();

	2000 for (intptr_t i = 0; i < elements()->length(); i++) {

	2001 if (offset >= bm->length()) {

	2002 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2003 return;

	2004 }

	2005 TextElement text = elements()->At(i);

	2006 if (text.text_type() == TextElement::ATOM) {

	2007 RegExpAtom* atom = text.atom();

	2008 for (intptr_t j = 0; j < atom->length(); j++, offset++) {

	2009 if (offset >= bm->length()) {

	2010 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2011 return;

	2012 }

	2013 uint16_t character = atom->data()->At(j);

	2014 if (bm->compiler()->ignore_case()) {

	2015 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2016 intptr_t length = GetCaseIndependentLetters(

	2017 character,

	2018 bm->max_char() == Symbols::kMaxOneCharCodeSymbol,

	2019 chars);

	2020 for (intptr_t j = 0; j < length; j++) {

	2021 bm->Set(offset, chars[j]);

	2022 }

	2023 } else {

	2024 if (character <= max_char) bm->Set(offset, character);

	2025 }

	2026 }

	2027 } else {

	2028 ASSERT(text.text_type() == TextElement::CHAR_CLASS);

	2029 RegExpCharacterClass* char_class = text.char_class();

	2030 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();

	2031 if (char_class->is_negated()) {

	2032 bm->SetAll(offset);

	2033 } else {

	2034 for (intptr_t k = 0; k < ranges->length(); k++) {

	2035 CharacterRange& range = (*ranges)[k];

	2036 if (range.from() > max_char) continue;

	2037 intptr_t to = Utils::Minimum(max_char,

	2038 static_cast<intptr_t>(range.to()));

	2039 bm->SetInterval(offset, Interval(range.from(), to));

	2040 }

	2041 }

	2042 offset++;

	2043 }

	2044 }

	2045 if (offset >= bm->length()) {

	2046 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2047 return;

	2048 }

	2049 on_success()->FillInBMInfo(offset,

	2050 budget - 1,

	2051 bm,

	2052 true); // Not at start after a text node.

	2053 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2054 }

	2055

	2056

	2057 // Check for [0-9A-Z_a-z].

	2058 static void EmitWordCheck(RegExpMacroAssembler* assembler,

	2059 BlockLabel* word,

	2060 BlockLabel* non_word,

	2061 bool fall_through_on_word) {

	2062 if (assembler->CheckSpecialCharacterClass(

	2063 fall_through_on_word ? 'w' : 'W',

	2064 fall_through_on_word ? non_word : word)) {

	2065 // Optimized implementation available.

	2066 return;

	2067 }

	2068 assembler->CheckCharacterGT('z', non_word);

	2069 assembler->CheckCharacterLT('0', non_word);

	2070 assembler->CheckCharacterGT('a' - 1, word);

	2071 assembler->CheckCharacterLT('9' + 1, word);

	2072 assembler->CheckCharacterLT('A', non_word);

	2073 assembler->CheckCharacterLT('Z' + 1, word);

	2074 if (fall_through_on_word) {

	2075 assembler->CheckNotCharacter('_', non_word);

	2076 } else {

	2077 assembler->CheckCharacter('_', word);

	2078 }

	2079 }

	2080

	2081

	2082 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).

	2083 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {

	2084 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2085 Trace::TriBool next_is_word_character = Trace::UNKNOWN;

	2086 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);

	2087 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	2088 if (lookahead == NULL) {

	2089 intptr_t eats_at_least =

	2090 Utils::Minimum(kMaxLookaheadForBoyerMoore,

	2091 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	2092 kRecursionBudget,

	2093 not_at_start));

	2094 if (eats_at_least >= 1) {

	2095 BoyerMooreLookahead* bm =

	2096 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	2097 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	2098 if (bm->at(0)->is_non_word())

	2099 next_is_word_character = Trace::FALSE_VALUE;

	2100 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;

	2101 }

	2102 } else {

	2103 if (lookahead->at(0)->is_non_word())

	2104 next_is_word_character = Trace::FALSE_VALUE;

	2105 if (lookahead->at(0)->is_word())

	2106 next_is_word_character = Trace::TRUE_VALUE;

	2107 }

	2108 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);

	2109 if (next_is_word_character == Trace::UNKNOWN) {

	2110 BlockLabel before_non_word;

	2111 BlockLabel before_word;

	2112 if (trace->characters_preloaded() != 1) {

	2113 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

	2114 }

	2115 // Fall through on non-word.

	2116 EmitWordCheck(assembler, &before_word, &before_non_word, false);

	2117 // Next character is not a word character.

	2118 assembler->BindBlock(&before_non_word);

	2119 BlockLabel ok;

	2120 // Backtrack on \B (non-boundary check) if previous is a word,

	2121 // since we know next is not a word and this would be a boundary.

	2122 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2123

	2124 if (!assembler->IsClosed()) {

	2125 assembler->GoTo(&ok);

	2126 }

	2127

	2128 assembler->BindBlock(&before_word);

	2129 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2130 assembler->BindBlock(&ok);

	2131 } else if (next_is_word_character == Trace::TRUE_VALUE) {

	2132 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2133 } else {

	2134 ASSERT(next_is_word_character == Trace::FALSE_VALUE);

	2135 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2136 }

	2137 }

	2138

	2139

	2140 void AssertionNode::BacktrackIfPrevious(

	2141 RegExpCompiler* compiler,

	2142 Trace* trace,

	2143 AssertionNode::IfPrevious backtrack_if_previous) {

	2144 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2145 Trace new_trace(*trace);

	2146 new_trace.InvalidateCurrentCharacter();

	2147

	2148 BlockLabel fall_through, dummy;

	2149

	2150 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ?

	2151 new_trace.backtrack() :

	2152 &fall_through;

	2153 BlockLabel* word = backtrack_if_previous == kIsNonWord ?

	2154 &fall_through :

	2155 new_trace.backtrack();

	2156

	2157 if (new_trace.cp_offset() == 0) {

	2158 // The start of input counts as a non-word character, so the question is

	2159 // decided if we are at the start.

	2160 assembler->CheckAtStart(non_word);

	2161 }

	2162 // We already checked that we are not at the start of input so it must be

	2163 // OK to load the previous character.

	2164 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);

	2165 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);

	2166

	2167 assembler->BindBlock(&fall_through);

	2168 on_success()->Emit(compiler, &new_trace);

	2169 }

	2170

	2171

	2172 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {

	2173 if (quick_check == NULL) return false;

	2174 if (offset >= quick_check->characters()) return false;

	2175 return quick_check->positions(offset)->determines_perfectly;

	2176 }

	2177

	2178

	2179 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) {

	2180 if (index > *checked_up_to) {

	2181 *checked_up_to = index;

	2182 }

	2183 }

	2184

	2185

	2186 typedef bool EmitCharacterFunction(Isolate* isolate,

	2187 RegExpCompiler* compiler,

	2188 uint16_t c,

	2189 BlockLabel* on_failure,

	2190 intptr_t cp_offset,

	2191 bool check,

	2192 bool preloaded);

	2193

	2194

	2195 static inline bool EmitSimpleCharacter(Isolate* isolate,

	2196 RegExpCompiler* compiler,

	2197 uint16_t c,

	2198 BlockLabel* on_failure,

	2199 intptr_t cp_offset,

	2200 bool check,

	2201 bool preloaded) {

	2202 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2203 bool bound_checked = false;

	2204 if (!preloaded) {

	2205 assembler->LoadCurrentCharacter(

	2206 cp_offset,

	2207 on_failure,

	2208 check);

	2209 bound_checked = true;

	2210 }

	2211 assembler->CheckNotCharacter(c, on_failure);

	2212 return bound_checked;

	2213 }

	2214

	2215

	2216 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

	2217 bool ascii,

	2218 uint16_t c1,

	2219 uint16_t c2,

	2220 BlockLabel* on_failure) {

	2221 uint16_t char_mask;

	2222 if (ascii) {

	2223 char_mask = Symbols::kMaxOneCharCodeSymbol;

	2224 } else {

	2225 char_mask = Utf16::kMaxCodeUnit;

	2226 }

	2227 uint16_t exor = c1 ^ c2;

	2228 // Check whether exor has only one bit set.

	2229 if (((exor - 1) & exor) == 0) {

	2230 // If c1 and c2 differ only by one bit.

	2231 // Ecma262UnCanonicalize always gives the highest number last.

	2232 ASSERT(c2 > c1);

	2233 uint16_t mask = char_mask ^ exor;

	2234 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);

	2235 return true;

	2236 }

	2237 ASSERT(c2 > c1);

	2238 uint16_t diff = c2 - c1;

	2239 if (((diff - 1) & diff) == 0 && c1 >= diff) {

	2240 // If the characters differ by 2^n but don't differ by one bit then

	2241 // subtract the difference from the found character, then do the or

	2242 // trick. We avoid the theoretical case where negative numbers are

	2243 // involved in order to simplify code generation.

	2244 uint16_t mask = char_mask ^ diff;

	2245 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

	2246 diff,

	2247 mask,

	2248 on_failure);

	2249 return true;

	2250 }

	2251 return false;

	2252 }

	2253

	2254 // Only emits letters (things that have case). Only used for case independent

	2255 // matches.

	2256 static inline bool EmitAtomLetter(Isolate* isolate,

	2257 RegExpCompiler* compiler,

	2258 uint16_t c,

	2259 BlockLabel* on_failure,

	2260 intptr_t cp_offset,

	2261 bool check,

	2262 bool preloaded) {

	2263 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2264 bool ascii = compiler->ascii();

	2265 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2266 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2267 if (length <= 1) return false;

	2268 // We may not need to check against the end of the input string

	2269 // if this character lies before a character that matched.

	2270 if (!preloaded) {

	2271 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2272 }

	2273 BlockLabel ok;

	2274 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

	2275 switch (length) {

	2276 case 2: {

	2277 if (ShortCutEmitCharacterPair(macro_assembler,

	2278 ascii,

	2279 chars[0],

	2280 chars[1],

	2281 on_failure)) {

	2282 } else {

	2283 macro_assembler->CheckCharacter(chars[0], &ok);

	2284 macro_assembler->CheckNotCharacter(chars[1], on_failure);

	2285 macro_assembler->BindBlock(&ok);

	2286 }

	2287 break;

	2288 }

	2289 case 4:

	2290 macro_assembler->CheckCharacter(chars[3], &ok);

	2291 // Fall through!

	2292 case 3:

	2293 macro_assembler->CheckCharacter(chars[0], &ok);

	2294 macro_assembler->CheckCharacter(chars[1], &ok);

	2295 macro_assembler->CheckNotCharacter(chars[2], on_failure);

	2296 macro_assembler->BindBlock(&ok);

	2297 break;

	2298 default:

	2299 UNREACHABLE();

	2300 break;

	2301 }

	2302 return true;

	2303 }

	2304

	2305

	2306 // Only emits non-letters (things that don't have case). Only used for case

	2307 // independent matches.

	2308 static inline bool EmitAtomNonLetter(Isolate* isolate,

	2309 RegExpCompiler* compiler,

	2310 uint16_t c,

	2311 BlockLabel* on_failure,

	2312 intptr_t cp_offset,

	2313 bool check,

	2314 bool preloaded) {

	2315 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2316 bool ascii = compiler->ascii();

	2317 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2318 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2319 if (length < 1) {

	2320 // This can't match. Must be an ASCII subject and a non-ASCII character.

	2321 // We do not need to do anything since the ASCII pass already handled this.

	2322 return false; // Bounds not checked.

	2323 }

	2324 bool checked = false;

	2325 // We handle the length > 1 case in a later pass.

	2326 if (length == 1) {

	2327 if (ascii && c > Symbols::kMaxOneCharCodeSymbol) {

	2328 // Can't match - see above.

	2329 return false; // Bounds not checked.

	2330 }

	2331 if (!preloaded) {

	2332 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2333 checked = check;

	2334 }

	2335 macro_assembler->CheckNotCharacter(c, on_failure);

	2336 }

	2337 return checked;

	2338 }

	2339

	2340

	2341 static void EmitBoundaryTest(RegExpMacroAssembler* masm,

	2342 intptr_t border,

	2343 BlockLabel* fall_through,

	2344 BlockLabel* above_or_equal,

	2345 BlockLabel* below) {

	2346 if (below != fall_through) {

	2347 masm->CheckCharacterLT(border, below);

	2348 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);

	2349 } else {

	2350 masm->CheckCharacterGT(border - 1, above_or_equal);

	2351 }

	2352 }

	2353

	2354

	2355 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,

	2356 intptr_t first,

	2357 intptr_t last,

	2358 BlockLabel* fall_through,

	2359 BlockLabel* in_range,

	2360 BlockLabel* out_of_range) {

	2361 if (in_range == fall_through) {

	2362 if (first == last) {

	2363 masm->CheckNotCharacter(first, out_of_range);

	2364 } else {

	2365 masm->CheckCharacterNotInRange(first, last, out_of_range);

	2366 }

	2367 } else {

	2368 if (first == last) {

	2369 masm->CheckCharacter(first, in_range);

	2370 } else {

	2371 masm->CheckCharacterInRange(first, last, in_range);

	2372 }

	2373 if (out_of_range != fall_through) masm->GoTo(out_of_range);

	2374 }

	2375 }

	2376

	2377

	2378 static void CutOutRange(RegExpMacroAssembler* masm,

	2379 ZoneGrowableArray<int>* ranges,

	2380 intptr_t start_index,

	2381 intptr_t end_index,

	2382 intptr_t cut_index,

	2383 BlockLabel* even_label,

	2384 BlockLabel* odd_label) {

	2385 bool odd = (((cut_index - start_index) & 1) == 1);

	2386 BlockLabel* in_range_label = odd ? odd_label : even_label;

	2387 BlockLabel dummy;

	2388 EmitDoubleBoundaryTest(masm,

	2389 ranges->At(cut_index),

	2390 ranges->At(cut_index + 1) - 1,

	2391 &dummy,

	2392 in_range_label,

	2393 &dummy);

	2394 ASSERT(!dummy.IsLinked());

	2395 // Cut out the single range by rewriting the array. This creates a new

	2396 // range that is a merger of the two ranges on either side of the one we

	2397 // are cutting out. The oddity of the labels is preserved.

	2398 for (intptr_t j = cut_index; j > start_index; j--) {

	2399 (*ranges)[j] = ranges->At(j - 1);

	2400 }

	2401 for (intptr_t j = cut_index + 1; j < end_index; j++) {

	2402 (*ranges)[j] = ranges->At(j + 1);

	2403 }

	2404 }

	2405

	2406

	2407 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.

	2408 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.

	2409 static void EmitUseLookupTable(

	2410 RegExpMacroAssembler* masm,

	2411 ZoneGrowableArray<int>* ranges,

	2412 intptr_t start_index,

	2413 intptr_t end_index,

	2414 intptr_t min_char,

	2415 BlockLabel* fall_through,

	2416 BlockLabel* even_label,

	2417 BlockLabel* odd_label) {

	2418 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2419 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2420

	2421 intptr_t base = (min_char & ~kMask);

	2422

	2423 // Assert that everything is on one kTableSize page.

	2424 for (intptr_t i = start_index; i <= end_index; i++) {

	2425 ASSERT((ranges->At(i) & ~kMask) == base);

	2426 }

	2427 ASSERT(start_index == 0 \|\| (ranges->At(start_index - 1) & ~kMask) <= base);

	2428

	2429 char templ[kSize];

	2430 BlockLabel* on_bit_set;

	2431 BlockLabel* on_bit_clear;

	2432 intptr_t bit;

	2433 if (even_label == fall_through) {

	2434 on_bit_set = odd_label;

	2435 on_bit_clear = even_label;

	2436 bit = 1;

	2437 } else {

	2438 on_bit_set = even_label;

	2439 on_bit_clear = odd_label;

	2440 bit = 0;

	2441 }

	2442 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize;

	2443 i++) {

	2444 templ[i] = bit;

	2445 }

	2446 intptr_t j = 0;

	2447 bit ^= 1;

	2448 for (intptr_t i = start_index; i < end_index; i++) {

	2449 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) {

	2450 templ[j] = bit;

	2451 }

	2452 bit ^= 1;

	2453 }

	2454 for (intptr_t i = j; i < kSize; i++) {

	2455 templ[i] = bit;

	2456 }

	2457 // TODO(erikcorry): Cache these.

	2458 const TypedData& ba = TypedData::ZoneHandle(

	2459 masm->isolate(),

	2460 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	2461 for (intptr_t i = 0; i < kSize; i++) {

	2462 ba.SetUint8(i, templ[i]);

	2463 }

	2464 masm->CheckBitInTable(ba, on_bit_set);

	2465 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);

	2466 }

	2467

	2468

	2469 // Unicode case. Split the search space into kSize spaces that are handled

	2470 // with recursion.

	2471 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,

	2472 intptr_t start_index,

	2473 intptr_t end_index,

	2474 intptr_t* new_start_index,

	2475 intptr_t* new_end_index,

	2476 intptr_t* border) {

	2477 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2478 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2479

	2480 intptr_t first = ranges->At(start_index);

	2481 intptr_t last = ranges->At(end_index) - 1;

	2482

	2483 *new_start_index = start_index;

	2484 *border = (ranges->At(start_index) & ~kMask) + kSize;

	2485 while (*new_start_index < end_index) {

	2486 if (ranges->At(new_start_index) > border) break;

	2487 (*new_start_index)++;

	2488 }

	2489 // new_start_index is the index of the first edge that is beyond the

	2490 // current kSize space.

	2491

	2492 // For very large search spaces we do a binary chop search of the non-ASCII

	2493 // space instead of just going to the end of the current kSize space. The

	2494 // heuristics are complicated a little by the fact that any 128-character

	2495 // encoding space can be quickly tested with a table lookup, so we don't

	2496 // wish to do binary chop search at a smaller granularity than that. A

	2497 // 128-character space can take up a lot of space in the ranges array if,

	2498 // for example, we only want to match every second character (eg. the lower

	2499 // case characters on some Unicode pages).

	2500 intptr_t binary_chop_index = (end_index + start_index) / 2;

	2501 // The first test ensures that we get to the code that handles the ASCII

	2502 // range with a single not-taken branch, speeding up this important

	2503 // character range (even non-ASCII charset-based text has spaces and

	2504 // punctuation).

	2505 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // ASCII case.

	2506 end_index - start_index > (new_start_index - start_index) 2 &&

	2507 last - first > kSize * 2 &&

	2508 binary_chop_index > *new_start_index &&

	2509 ranges->At(binary_chop_index) >= first + 2 * kSize) {

	2510 intptr_t scan_forward_for_section_border = binary_chop_index;;

	2511 intptr_t new_border = (ranges->At(binary_chop_index) \| kMask) + 1;

	2512

	2513 while (scan_forward_for_section_border < end_index) {

	2514 if (ranges->At(scan_forward_for_section_border) > new_border) {

	2515 *new_start_index = scan_forward_for_section_border;

	2516 *border = new_border;

	2517 break;

	2518 }

	2519 scan_forward_for_section_border++;

	2520 }

	2521 }

	2522

	2523 ASSERT(*new_start_index > start_index);

	2524 new_end_index = new_start_index - 1;

	2525 if (ranges->At(new_end_index) == border) {

	2526 (*new_end_index)--;

	2527 }

	2528 if (*border >= ranges->At(end_index)) {

	2529 *border = ranges->At(end_index);

	2530 *new_start_index = end_index; // Won't be used.

	2531 *new_end_index = end_index - 1;

	2532 }

	2533 }

	2534

	2535

	2536 // Gets a series of segment boundaries representing a character class. If the

	2537 // character is in the range between an even and an odd boundary (counting from

	2538 // start_index) then go to even_label, otherwise go to odd_label. We already

	2539 // know that the character is in the range of min_char to max_char inclusive.

	2540 // Either label can be NULL indicating backtracking. Either label can also be

	2541 // equal to the fall_through label.

	2542 static void GenerateBranches(RegExpMacroAssembler* masm,

	2543 ZoneGrowableArray<int>* ranges,

	2544 intptr_t start_index,

	2545 intptr_t end_index,

	2546 uint16_t min_char,

	2547 uint16_t max_char,

	2548 BlockLabel* fall_through,

	2549 BlockLabel* even_label,

	2550 BlockLabel* odd_label) {

	2551 intptr_t first = ranges->At(start_index);

	2552 intptr_t last = ranges->At(end_index) - 1;

	2553

	2554 ASSERT(min_char < first);

	2555

	2556 // Just need to test if the character is before or on-or-after

	2557 // a particular character.

	2558 if (start_index == end_index) {

	2559 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);

	2560 return;

	2561 }

	2562

	2563 // Another almost trivial case: There is one interval in the middle that is

	2564 // different from the end intervals.

	2565 if (start_index + 1 == end_index) {

	2566 EmitDoubleBoundaryTest(

	2567 masm, first, last, fall_through, even_label, odd_label);

	2568 return;

	2569 }

	2570

	2571 // It's not worth using table lookup if there are very few intervals in the

	2572 // character class.

	2573 if (end_index - start_index <= 6) {

	2574 // It is faster to test for individual characters, so we look for those

	2575 // first, then try arbitrary ranges in the second round.

	2576 static intptr_t kNoCutIndex = -1;

	2577 intptr_t cut = kNoCutIndex;

	2578 for (intptr_t i = start_index; i < end_index; i++) {

	2579 if (ranges->At(i) == ranges->At(i + 1) - 1) {

	2580 cut = i;

	2581 break;

	2582 }

	2583 }

	2584 if (cut == kNoCutIndex) cut = start_index;

	2585 CutOutRange(

	2586 masm, ranges, start_index, end_index, cut, even_label, odd_label);

	2587 ASSERT(end_index - start_index >= 2);

	2588 GenerateBranches(masm,

	2589 ranges,

	2590 start_index + 1,

	2591 end_index - 1,

	2592 min_char,

	2593 max_char,

	2594 fall_through,

	2595 even_label,

	2596 odd_label);

	2597 return;

	2598 }

	2599

	2600 // If there are a lot of intervals in the regexp, then we will use tables to

	2601 // determine whether the character is inside or outside the character class.

	2602 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;

	2603

	2604 if ((max_char >> kBits) == (min_char >> kBits)) {

	2605 EmitUseLookupTable(masm,

	2606 ranges,

	2607 start_index,

	2608 end_index,

	2609 min_char,

	2610 fall_through,

	2611 even_label,

	2612 odd_label);

	2613 return;

	2614 }

	2615

	2616 if ((min_char >> kBits) != (first >> kBits)) {

	2617 masm->CheckCharacterLT(first, odd_label);

	2618 GenerateBranches(masm,

	2619 ranges,

	2620 start_index + 1,

	2621 end_index,

	2622 first,

	2623 max_char,

	2624 fall_through,

	2625 odd_label,

	2626 even_label);

	2627 return;

	2628 }

	2629

	2630 intptr_t new_start_index = 0;

	2631 intptr_t new_end_index = 0;

	2632 intptr_t border = 0;

	2633

	2634 SplitSearchSpace(ranges,

	2635 start_index,

	2636 end_index,

	2637 &new_start_index,

	2638 &new_end_index,

	2639 &border);

	2640

	2641 BlockLabel handle_rest;

	2642 BlockLabel* above = &handle_rest;

	2643 if (border == last + 1) {

	2644 // We didn't find any section that started after the limit, so everything

	2645 // above the border is one of the terminal labels.

	2646 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;

	2647 ASSERT(new_end_index == end_index - 1);

	2648 }

	2649

	2650 ASSERT(start_index <= new_end_index);

	2651 ASSERT(new_start_index <= end_index);

	2652 ASSERT(start_index < new_start_index);

	2653 ASSERT(new_end_index < end_index);

	2654 ASSERT(new_end_index + 1 == new_start_index \|\|

	2655 (new_end_index + 2 == new_start_index &&

	2656 border == ranges->At(new_end_index + 1)));

	2657 ASSERT(min_char < border - 1);

	2658 ASSERT(border < max_char);

	2659 ASSERT(ranges->At(new_end_index) < border);

	2660 ASSERT(border < ranges->At(new_start_index) \|\|

	2661 (border == ranges->At(new_start_index) &&

	2662 new_start_index == end_index &&

	2663 new_end_index == end_index - 1 &&

	2664 border == last + 1));

	2665 ASSERT(new_start_index == 0 \|\| border >= ranges->At(new_start_index - 1));

	2666

	2667 masm->CheckCharacterGT(border - 1, above);

	2668 BlockLabel dummy;

	2669 GenerateBranches(masm,

	2670 ranges,

	2671 start_index,

	2672 new_end_index,

	2673 min_char,

	2674 border - 1,

	2675 &dummy,

	2676 even_label,

	2677 odd_label);

	2678

	2679 if (handle_rest.IsLinked()) {

	2680 masm->BindBlock(&handle_rest);

	2681 bool flip = (new_start_index & 1) != (start_index & 1);

	2682 GenerateBranches(masm,

	2683 ranges,

	2684 new_start_index,

	2685 end_index,

	2686 border,

	2687 max_char,

	2688 &dummy,

	2689 flip ? odd_label : even_label,

	2690 flip ? even_label : odd_label);

	2691 }

	2692 }

	2693

	2694

	2695 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

	2696 RegExpCharacterClass* cc,

	2697 bool ascii,

	2698 BlockLabel* on_failure,

	2699 intptr_t cp_offset,

	2700 bool check_offset,

	2701 bool preloaded,

	2702 Isolate* isolate) {

	2703 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	2704 if (!CharacterRange::IsCanonical(ranges)) {

	2705 CharacterRange::Canonicalize(ranges);

	2706 }

	2707

	2708 intptr_t max_char;

	2709 if (ascii) {

	2710 max_char = Symbols::kMaxOneCharCodeSymbol;

	2711 } else {

	2712 max_char = Utf16::kMaxCodeUnit;

	2713 }

	2714

	2715 intptr_t range_count = ranges->length();

	2716

	2717 intptr_t last_valid_range = range_count - 1;

	2718 while (last_valid_range >= 0) {

	2719 CharacterRange& range = (*ranges)[last_valid_range];

	2720 if (range.from() <= max_char) {

	2721 break;

	2722 }

	2723 last_valid_range--;

	2724 }

	2725

	2726 if (last_valid_range < 0) {

	2727 if (!cc->is_negated()) {

	2728 macro_assembler->GoTo(on_failure);

	2729 }

	2730 if (check_offset) {

	2731 macro_assembler->CheckPosition(cp_offset, on_failure);

	2732 }

	2733 return;

	2734 }

	2735

	2736 if (last_valid_range == 0 &&

	2737 ranges->At(0).IsEverything(max_char)) {

	2738 if (cc->is_negated()) {

	2739 macro_assembler->GoTo(on_failure);

	2740 } else {

	2741 // This is a common case hit by non-anchored expressions.

	2742 if (check_offset) {

	2743 macro_assembler->CheckPosition(cp_offset, on_failure);

	2744 }

	2745 }

	2746 return;

	2747 }

	2748 if (last_valid_range == 0 &&

	2749 !cc->is_negated() &&

	2750 ranges->At(0).IsEverything(max_char)) {

	2751 // This is a common case hit by non-anchored expressions.

	2752 if (check_offset) {

	2753 macro_assembler->CheckPosition(cp_offset, on_failure);

	2754 }

	2755 return;

	2756 }

	2757

	2758 if (!preloaded) {

	2759 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

	2760 }

	2761

	2762 if (cc->is_standard() &&

	2763 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

	2764 on_failure)) {

	2765 return;

	2766 }

	2767

	2768

	2769 // A new list with ascending entries. Each entry is a code unit

	2770 // where there is a boundary between code units that are part of

	2771 // the class and code units that are not. Normally we insert an

	2772 // entry at zero which goes to the failure label, but if there

	2773 // was already one there we fall through for success on that entry.

	2774 // Subsequent entries have alternating meaning (success/failure).

	2775 ZoneGrowableArray<int>* range_boundaries =

	2776 new(isolate) ZoneGrowableArray<int>(last_valid_range);

	2777

	2778 bool zeroth_entry_is_failure = !cc->is_negated();

	2779

	2780 for (intptr_t i = 0; i <= last_valid_range; i++) {

	2781 CharacterRange& range = (*ranges)[i];

	2782 if (range.from() == 0) {

	2783 ASSERT(i == 0);

	2784 zeroth_entry_is_failure = !zeroth_entry_is_failure;

	2785 } else {

	2786 range_boundaries->Add(range.from());

	2787 }

	2788 range_boundaries->Add(range.to() + 1);

	2789 }

	2790 intptr_t end_index = range_boundaries->length() - 1;

	2791 if (range_boundaries->At(end_index) > max_char) {

	2792 end_index--;

	2793 }

	2794

	2795 BlockLabel fall_through;

	2796 GenerateBranches(macro_assembler,

	2797 range_boundaries,

	2798 0, // start_index.

	2799 end_index,

	2800 0, // min_char.

	2801 max_char,

	2802 &fall_through,

	2803 zeroth_entry_is_failure ? &fall_through : on_failure,

	2804 zeroth_entry_is_failure ? on_failure : &fall_through);

	2805 macro_assembler->BindBlock(&fall_through);

	2806 }

	2807

	2808

	2809 // We call this repeatedly to generate code for each pass over the text node.

	2810 // The passes are in increasing order of difficulty because we hope one

	2811 // of the first passes will fail in which case we are saved the work of the

	2812 // later passes. for example for the case independent regexp /%[asdfghjkl]a/

	2813 // we will check the '%' in the first pass, the case independent 'a' in the

	2814 // second pass and the character class in the last pass.

	2815 //

	2816 // The passes are done from right to left, so for example to test for /bar/

	2817 // we will first test for an 'r' with offset 2, then an 'a' with offset 1

	2818 // and then a 'b' with offset 0. This means we can avoid the end-of-input

	2819 // bounds check most of the time. In the example we only need to check for

	2820 // end-of-input when loading the putative 'r'.

	2821 //

	2822 // A slight complication involves the fact that the first character may already

	2823 // be fetched into a register by the previous node. In this case we want to

	2824 // do the test for that character first. We do this in separate passes. The

	2825 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a

	2826 // pass has been performed then subsequent passes will have true in

	2827 // first_element_checked to indicate that that character does not need to be

	2828 // checked again.

	2829 //

	2830 // In addition to all this we are passed a Trace, which can

	2831 // contain an AlternativeGeneration object. In this AlternativeGeneration

	2832 // object we can see details of any quick check that was already passed in

	2833 // order to get to the code we are now generating. The quick check can involve

	2834 // loading characters, which means we do not need to recheck the bounds

	2835 // up to the limit the quick check already checked. In addition the quick

	2836 // check can have involved a mask and compare operation which may simplify

	2837 // or obviate the need for further checks at some character positions.

	2838 void TextNode::TextEmitPass(RegExpCompiler* compiler,

	2839 TextEmitPassType pass,

	2840 bool preloaded,

	2841 Trace* trace,

	2842 bool first_element_checked,

	2843 intptr_t* checked_up_to) {

	2844 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2845 bool ascii = compiler->ascii();

	2846 BlockLabel* backtrack = trace->backtrack();

	2847 QuickCheckDetails* quick_check = trace->quick_check_performed();

	2848 intptr_t element_count = elms_->length();

	2849 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

	2850 TextElement elm = elms_->At(i);

	2851 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset();

	2852 if (elm.text_type() == TextElement::ATOM) {

	2853 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	2854 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) {

	2855 if (first_element_checked && i == 0 && j == 0) continue;

	2856 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

	2857 EmitCharacterFunction* emit_function = NULL;

	2858 switch (pass) {

	2859 case NON_ASCII_MATCH:

	2860 ASSERT(ascii);

	2861 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) {

	2862 assembler->GoTo(backtrack);

	2863 return;

	2864 }

	2865 break;

	2866 case NON_LETTER_CHARACTER_MATCH:

	2867 emit_function = &EmitAtomNonLetter;

	2868 break;

	2869 case SIMPLE_CHARACTER_MATCH:

	2870 emit_function = &EmitSimpleCharacter;

	2871 break;

	2872 case CASE_CHARACTER_MATCH:

	2873 emit_function = &EmitAtomLetter;

	2874 break;

	2875 default:

	2876 break;

	2877 }

	2878 if (emit_function != NULL) {

	2879 bool bound_checked = emit_function(I,

	2880 compiler,

	2881 quarks->At(j),

	2882 backtrack,

	2883 cp_offset + j,

	2884 *checked_up_to < cp_offset + j,

	2885 preloaded);

	2886 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

	2887 }

	2888 }

	2889 } else {

	2890 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

	2891 if (pass == CHARACTER_CLASS_MATCH) {

	2892 if (first_element_checked && i == 0) continue;

	2893 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

	2894 RegExpCharacterClass* cc = elm.char_class();

	2895 EmitCharClass(assembler,

	2896 cc,

	2897 ascii,

	2898 backtrack,

	2899 cp_offset,

	2900 *checked_up_to < cp_offset,

	2901 preloaded,

	2902 I);

	2903 UpdateBoundsCheck(cp_offset, checked_up_to);

	2904 }

	2905 }

	2906 }

	2907 }

	2908

	2909

	2910 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

	2911 intptr_t eats_at_least) {

	2912 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4),

	2913 eats_at_least);

	2914 if (compiler->macro_assembler()->CanReadUnaligned()) {

	2915 bool ascii = compiler->ascii();

	2916 if (ascii) {

	2917 if (preload_characters > 4) preload_characters = 4;

	2918 // We can't preload 3 characters because there is no machine instruction

	2919 // to do that. We can't just load 4 because we could be reading

	2920 // beyond the end of the string, which could cause a memory fault.

	2921 if (preload_characters == 3) preload_characters = 2;

	2922 } else {

	2923 if (preload_characters > 2) preload_characters = 2;

	2924 }

	2925 } else {

	2926 if (preload_characters > 1) preload_characters = 1;

	2927 }

	2928 return preload_characters;

	2929 }

	2930

	2931

	2932 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,

	2933 intptr_t budget,

	2934 bool not_at_start) {

	2935 intptr_t answer = Length();

	2936 if (answer >= still_to_find) return answer;

	2937 if (budget <= 0) return answer;

	2938 // We are not at start after this node so we set the last argument to 'true'.

	2939 return answer + on_success()->EatsAtLeast(still_to_find - answer,

	2940 budget - 1,

	2941 true);

	2942 }

	2943

	2944

	2945 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find,

	2946 intptr_t budget,

	2947 bool not_at_start) {

	2948 if (budget <= 0) return 0;

	2949 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!

	2950 return on_success()->EatsAtLeast(still_to_find,

	2951 budget - 1,

	2952 not_at_start);

	2953 }

	2954

	2955

	2956 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find,

	2957 intptr_t budget,

	2958 bool not_at_start) {

	2959 if (budget <= 0) return 0;

	2960 // If we know we are not at the start and we are asked "how many characters

	2961 // will you match if you succeed?" then we can answer anything since false

	2962 // implies false. So lets just return the max answer (still_to_find) since

	2963 // that won't prevent us from preloading a lot of characters for the other

	2964 // branches in the node graph.

	2965 if (assertion_type() == AT_START && not_at_start) return still_to_find;

	2966 return on_success()->EatsAtLeast(still_to_find,

	2967 budget - 1,

	2968 not_at_start);

	2969 }

	2970

	2971

	2972 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find,

	2973 intptr_t budget,

	2974 bool not_at_start) {

	2975 if (budget <= 0) return 0;

	2976 return on_success()->EatsAtLeast(still_to_find,

	2977 budget - 1,

	2978 not_at_start);

	2979 }

	2980

	2981

	2982 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find,

	2983 intptr_t budget,

	2984 RegExpNode* ignore_this_node,

	2985 bool not_at_start) {

	2986 if (budget <= 0) return 0;

	2987 intptr_t min = 100;

	2988 intptr_t choice_count = alternatives_->length();

	2989 budget = (budget - 1) / choice_count;

	2990 for (intptr_t i = 0; i < choice_count; i++) {

	2991 RegExpNode* node = (*alternatives_)[i].node();

	2992 if (node == ignore_this_node) continue;

	2993 intptr_t node_eats_at_least =

	2994 node->EatsAtLeast(still_to_find, budget, not_at_start);

	2995 if (node_eats_at_least < min) min = node_eats_at_least;

	2996 if (min == 0) return 0;

	2997 }

	2998 return min;

	2999 }

	3000

	3001

	3002 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3003 intptr_t budget,

	3004 bool not_at_start) {

	3005 return EatsAtLeastHelper(still_to_find,

	3006 budget,

	3007 NULL,

	3008 not_at_start);

	3009 }

	3010

	3011

	3012 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3013 intptr_t budget,

	3014 bool not_at_start) {

	3015 if (budget <= 0) return 0;

	3016 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	3017 // afterwards.

	3018 RegExpNode* node = (*alternatives_)[1].node();

	3019 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);

	3020 }

	3021

	3022

	3023 // Takes the left-most 1-bit and smears it out, setting all bits to its right.

	3024 static inline uint32_t SmearBitsRight(uint32_t v) {

	3025 v \|= v >> 1;

	3026 v \|= v >> 2;

	3027 v \|= v >> 4;

	3028 v \|= v >> 8;

	3029 v \|= v >> 16;

	3030 return v;

	3031 }

	3032

	3033

	3034 // Here is the meat of GetQuickCheckDetails (see also the comment on the

	3035 // super-class in the .h file).

	3036 //

	3037 // We iterate along the text object, building up for each character a

	3038 // mask and value that can be used to test for a quick failure to match.

	3039 // The masks and values for the positions will be combined into a single

	3040 // machine word for the current character width in order to be used in

	3041 // generating a quick check.

	3042 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3043 RegExpCompiler* compiler,

	3044 intptr_t characters_filled_in,

	3045 bool not_at_start) {

	3046 ASSERT(characters_filled_in < details->characters());

	3047 intptr_t characters = details->characters();

	3048 intptr_t char_mask;

	3049 if (compiler->ascii()) {

	3050 char_mask = Symbols::kMaxOneCharCodeSymbol;

	3051 } else {

	3052 char_mask = Utf16::kMaxCodeUnit;

	3053 }

	3054 for (intptr_t k = 0; k < elms_->length(); k++) {

	3055 TextElement elm = elms_->At(k);

	3056 if (elm.text_type() == TextElement::ATOM) {

	3057 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	3058 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) {

	3059 QuickCheckDetails::Position* pos =

	3060 details->positions(characters_filled_in);

	3061 uint16_t c = quarks->At(i);

	3062 if (c > char_mask) {

	3063 // If we expect a non-ASCII character from an ASCII string,

	3064 // there is no way we can match. Not even case independent

	3065 // matching can turn an ASCII character into non-ASCII or

	3066 // vice versa.

	3067 details->set_cannot_match();

	3068 pos->determines_perfectly = false;

	3069 return;

	3070 }

	3071 if (compiler->ignore_case()) {

	3072 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3073 intptr_t length =

	3074 GetCaseIndependentLetters(c, compiler->ascii(), chars);

	3075 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

	3076 if (length == 1) {

	3077 // This letter has no case equivalents, so it's nice and simple

	3078 // and the mask-compare will determine definitely whether we have

	3079 // a match at this character position.

	3080 pos->mask = char_mask;

	3081 pos->value = c;

	3082 pos->determines_perfectly = true;

	3083 } else {

	3084 uint32_t common_bits = char_mask;

	3085 uint32_t bits = chars[0];

	3086 for (intptr_t j = 1; j < length; j++) {

	3087 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);

	3088 common_bits ^= differing_bits;

	3089 bits &= common_bits;

	3090 }

	3091 // If length is 2 and common bits has only one zero in it then

	3092 // our mask and compare instruction will determine definitely

	3093 // whether we have a match at this character position. Otherwise

	3094 // it can only be an approximate check.

	3095 uint32_t one_zero = (common_bits \| ~char_mask);

	3096 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {

	3097 pos->determines_perfectly = true;

	3098 }

	3099 pos->mask = common_bits;

	3100 pos->value = bits;

	3101 }

	3102 } else {

	3103 // Don't ignore case. Nice simple case where the mask-compare will

	3104 // determine definitely whether we have a match at this character

	3105 // position.

	3106 pos->mask = char_mask;

	3107 pos->value = c;

	3108 pos->determines_perfectly = true;

	3109 }

	3110 characters_filled_in++;

	3111 ASSERT(characters_filled_in <= details->characters());

	3112 if (characters_filled_in == details->characters()) {

	3113 return;

	3114 }

	3115 }

	3116 } else {

	3117 QuickCheckDetails::Position* pos =

	3118 details->positions(characters_filled_in);

	3119 RegExpCharacterClass* tree = elm.char_class();

	3120 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();

	3121 if (tree->is_negated()) {

	3122 // A quick check uses multi-character mask and compare. There is no

	3123 // useful way to incorporate a negative char class into this scheme

	3124 // so we just conservatively create a mask and value that will always

	3125 // succeed.

	3126 pos->mask = 0;

	3127 pos->value = 0;

	3128 } else {

	3129 intptr_t first_range = 0;

	3130 while (ranges->At(first_range).from() > char_mask) {

	3131 first_range++;

	3132 if (first_range == ranges->length()) {

	3133 details->set_cannot_match();

	3134 pos->determines_perfectly = false;

	3135 return;

	3136 }

	3137 }

	3138 CharacterRange range = ranges->At(first_range);

	3139 uint16_t from = range.from();

	3140 uint16_t to = range.to();

	3141 if (to > char_mask) {

	3142 to = char_mask;

	3143 }

	3144 uint32_t differing_bits = (from ^ to);

	3145 // A mask and compare is only perfect if the differing bits form a

	3146 // number like 00011111 with one single block of trailing 1s.

	3147 if ((differing_bits & (differing_bits + 1)) == 0 &&

	3148 from + differing_bits == to) {

	3149 pos->determines_perfectly = true;

	3150 }

	3151 uint32_t common_bits = ~SmearBitsRight(differing_bits);

	3152 uint32_t bits = (from & common_bits);

	3153 for (intptr_t i = first_range + 1; i < ranges->length(); i++) {

	3154 CharacterRange range = ranges->At(i);

	3155 uint16_t from = range.from();

	3156 uint16_t to = range.to();

	3157 if (from > char_mask) continue;

	3158 if (to > char_mask) to = char_mask;

	3159 // Here we are combining more ranges into the mask and compare

	3160 // value. With each new range the mask becomes more sparse and

	3161 // so the chances of a false positive rise. A character class

	3162 // with multiple ranges is assumed never to be equivalent to a

	3163 // mask and compare operation.

	3164 pos->determines_perfectly = false;

	3165 uint32_t new_common_bits = (from ^ to);

	3166 new_common_bits = ~SmearBitsRight(new_common_bits);

	3167 common_bits &= new_common_bits;

	3168 bits &= new_common_bits;

	3169 uint32_t differing_bits = (from & common_bits) ^ bits;

	3170 common_bits ^= differing_bits;

	3171 bits &= common_bits;

	3172 }

	3173 pos->mask = common_bits;

	3174 pos->value = bits;

	3175 }

	3176 characters_filled_in++;

	3177 ASSERT(characters_filled_in <= details->characters());

	3178 if (characters_filled_in == details->characters()) {

	3179 return;

	3180 }

	3181 }

	3182 }

	3183 ASSERT(characters_filled_in != details->characters());

	3184 if (!details->cannot_match()) {

	3185 on_success()-> GetQuickCheckDetails(details,

	3186 compiler,

	3187 characters_filled_in,

	3188 true);

	3189 }

	3190 }

	3191

	3192

	3193 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3194 RegExpCompiler* compiler,

	3195 intptr_t characters_filled_in,

	3196 bool not_at_start) {

	3197 if (body_can_be_zero_length_ \|\| info()->visited) return;

	3198 VisitMarker marker(info());

	3199 return ChoiceNode::GetQuickCheckDetails(details,

	3200 compiler,

	3201 characters_filled_in,

	3202 not_at_start);

	3203 }

	3204

	3205

	3206 intptr_t TextNode::Length() {

	3207 TextElement elm = elms_->Last();

	3208 ASSERT(elm.cp_offset() >= 0);

	3209 return elm.cp_offset() + elm.length();

	3210 }

	3211

	3212

	3213 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) {

	3214 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass);

	3215 if (ignore_case) {

	3216 return pass == SIMPLE_CHARACTER_MATCH;

	3217 } else {

	3218 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;

	3219 }

	3220 }

	3221

	3222

	3223 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,

	3224 const intptr_t* special_class,

	3225 intptr_t length) {

	3226 length--; // Remove final 0x10000.

	3227 ASSERT(special_class[length] == 0x10000);

	3228 ASSERT(ranges->length() != 0);

	3229 ASSERT(length != 0);

	3230 ASSERT(special_class[0] != 0);

	3231 if (ranges->length() != (length >> 1) + 1) {

	3232 return false;

	3233 }

	3234 CharacterRange range = ranges->At(0);

	3235 if (range.from() != 0) {

	3236 return false;

	3237 }

	3238 for (intptr_t i = 0; i < length; i += 2) {

	3239 if (special_class[i] != (range.to() + 1)) {

	3240 return false;

	3241 }

	3242 range = ranges->At((i >> 1) + 1);

	3243 if (special_class[i+1] != range.from()) {

	3244 return false;

	3245 }

	3246 }

	3247 if (range.to() != 0xffff) {

	3248 return false;

	3249 }

	3250 return true;

	3251 }

	3252

	3253

	3254 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges,

	3255 const intptr_t* special_class,

	3256 intptr_t length) {

	3257 length--; // Remove final 0x10000.

	3258 ASSERT(special_class[length] == 0x10000);

	3259 if (ranges->length() * 2 != length) {

	3260 return false;

	3261 }

	3262 for (intptr_t i = 0; i < length; i += 2) {

	3263 CharacterRange range = ranges->At(i >> 1);

	3264 if (range.from() != special_class[i] \|\|

	3265 range.to() != special_class[i + 1] - 1) {

	3266 return false;

	3267 }

	3268 }

	3269 return true;

	3270 }

	3271

	3272

	3273 bool RegExpCharacterClass::is_standard() {

	3274 // TODO(lrn): Remove need for this function, by not throwing away information

	3275 // along the way.

	3276 if (is_negated_) {

	3277 return false;

	3278 }

	3279 if (set_.is_standard()) {

	3280 return true;

	3281 }

	3282 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

	3283 set_.set_standard_set_type('s');

	3284 return true;

	3285 }

	3286 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

	3287 set_.set_standard_set_type('S');

	3288 return true;

	3289 }

	3290 if (CompareInverseRanges(set_.ranges(),

	3291 kLineTerminatorRanges,

	3292 kLineTerminatorRangeCount)) {

	3293 set_.set_standard_set_type('.');

	3294 return true;

	3295 }

	3296 if (CompareRanges(set_.ranges(),

	3297 kLineTerminatorRanges,

	3298 kLineTerminatorRangeCount)) {

	3299 set_.set_standard_set_type('n');

	3300 return true;

	3301 }

	3302 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

	3303 set_.set_standard_set_type('w');

	3304 return true;

	3305 }

	3306 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

	3307 set_.set_standard_set_type('W');

	3308 return true;

	3309 }

	3310 return false;

	3311 }

	3312

	3313

	3314 void TextNode::MakeCaseIndependent(bool is_ascii) {

	3315 intptr_t element_count = elms_->length();

	3316 for (intptr_t i = 0; i < element_count; i++) {

	3317 TextElement elm = elms_->At(i);

	3318 if (elm.text_type() == TextElement::CHAR_CLASS) {

	3319 RegExpCharacterClass* cc = elm.char_class();

	3320 // None of the standard character classes is different in the case

	3321 // independent case and it slows us down if we don't know that.

	3322 if (cc->is_standard()) continue;

	3323 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	3324 intptr_t range_count = ranges->length();

	3325 for (intptr_t j = 0; j < range_count; j++) {

	3326 (*ranges)[j].AddCaseEquivalents(ranges, is_ascii, I);

	3327 }

	3328 }

	3329 }

	3330 }

	3331

	3332

	3333 intptr_t TextNode::GreedyLoopTextLength() {

	3334 TextElement elm = elms_->At(elms_->length() - 1);

	3335 return elm.cp_offset() + elm.length();

	3336 }

	3337

	3338

	3339 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

	3340 RegExpCompiler* compiler) {

	3341 if (elms_->length() != 1) return NULL;

	3342 TextElement elm = elms_->At(0);

	3343 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

	3344 RegExpCharacterClass* node = elm.char_class();

	3345 ZoneGrowableArray<CharacterRange>* ranges = node->ranges();

	3346 if (!CharacterRange::IsCanonical(ranges)) {

	3347 CharacterRange::Canonicalize(ranges);

	3348 }

	3349 if (node->is_negated()) {

	3350 return ranges->length() == 0 ? on_success() : NULL;

	3351 }

	3352 if (ranges->length() != 1) return NULL;

	3353 uint32_t max_char;

	3354 if (compiler->ascii()) {

	3355 max_char = Symbols::kMaxOneCharCodeSymbol;

	3356 } else {

	3357 max_char = Utf16::kMaxCodeUnit;

	3358 }

	3359 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL;

	3360 }

	3361

	3362

	3363 ActionNode* ActionNode::SetRegister(intptr_t reg,

	3364 intptr_t val,

	3365 RegExpNode* on_success) {

	3366 ActionNode* result =

	3367 new(on_success->isolate()) ActionNode(SET_REGISTER, on_success);

	3368 result->data_.u_store_register.reg = reg;

	3369 result->data_.u_store_register.value = val;

	3370 return result;

	3371 }

	3372

	3373

	3374 ActionNode* ActionNode::IncrementRegister(intptr_t reg,

	3375 RegExpNode* on_success) {

	3376 ActionNode* result =

	3377 new(on_success->isolate()) ActionNode(INCREMENT_REGISTER, on_success);

	3378 result->data_.u_increment_register.reg = reg;

	3379 return result;

	3380 }

	3381

	3382

	3383 ActionNode* ActionNode::StorePosition(intptr_t reg,

	3384 bool is_capture,

	3385 RegExpNode* on_success) {

	3386 ActionNode* result =

	3387 new(on_success->isolate()) ActionNode(STORE_POSITION, on_success);

	3388 result->data_.u_position_register.reg = reg;

	3389 result->data_.u_position_register.is_capture = is_capture;

	3390 return result;

	3391 }

	3392

	3393

	3394 ActionNode* ActionNode::ClearCaptures(Interval range,

	3395 RegExpNode* on_success) {

	3396 ActionNode* result =

	3397 new(on_success->isolate()) ActionNode(CLEAR_CAPTURES, on_success);

	3398 result->data_.u_clear_captures.range_from = range.from();

	3399 result->data_.u_clear_captures.range_to = range.to();

	3400 return result;

	3401 }

	3402

	3403

	3404 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg,

	3405 intptr_t position_reg,

	3406 RegExpNode* on_success) {

	3407 ActionNode* result =

	3408 new(on_success->isolate()) ActionNode(BEGIN_SUBMATCH, on_success);

	3409 result->data_.u_submatch.stack_pointer_register = stack_reg;

	3410 result->data_.u_submatch.current_position_register = position_reg;

	3411 return result;

	3412 }

	3413

	3414

	3415 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg,

	3416 intptr_t position_reg,

	3417 intptr_t clear_register_count,

	3418 intptr_t clear_register_from,

	3419 RegExpNode* on_success) {

	3420 ActionNode* result =

	3421 new(on_success->isolate()) ActionNode(POSITIVE_SUBMATCH_SUCCESS,

	3422 on_success);

	3423 result->data_.u_submatch.stack_pointer_register = stack_reg;

	3424 result->data_.u_submatch.current_position_register = position_reg;

	3425 result->data_.u_submatch.clear_register_count = clear_register_count;

	3426 result->data_.u_submatch.clear_register_from = clear_register_from;

	3427 return result;

	3428 }

	3429

	3430

	3431 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register,

	3432 intptr_t repetition_register,

	3433 intptr_t repetition_limit,

	3434 RegExpNode* on_success) {

	3435 ActionNode* result =

	3436 new(on_success->isolate()) ActionNode(EMPTY_MATCH_CHECK, on_success);

	3437 result->data_.u_empty_match_check.start_register = start_register;

	3438 result->data_.u_empty_match_check.repetition_register = repetition_register;

	3439 result->data_.u_empty_match_check.repetition_limit = repetition_limit;

	3440 return result;

	3441 }

	3442

	3443

	3444 TextElement TextElement::Atom(RegExpAtom* atom) {

	3445 return TextElement(ATOM, atom);

	3446 }

	3447

	3448

	3449 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {

	3450 return TextElement(CHAR_CLASS, char_class);

	3451 }

	3452

	3453

	3454 intptr_t TextElement::length() const {

	3455 switch (text_type()) {

	3456 case ATOM:

	3457 return atom()->length();

	3458

	3459 case CHAR_CLASS:

	3460 return 1;

	3461 }

	3462 UNREACHABLE();

	3463 return 0;

	3464 }

	3465

	3466

	3467 static void AddClass(const intptr_t* elmv,

	3468 intptr_t elmc,

	3469 ZoneGrowableArray<CharacterRange>* ranges) {

	3470 elmc--;

	3471 ASSERT(elmv[elmc] == 0x10000);

	3472 for (intptr_t i = 0; i < elmc; i += 2) {

	3473 ASSERT(elmv[i] < elmv[i + 1]);

	3474 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));

	3475 }

	3476 }

	3477

	3478

	3479 static void AddClassNegated(const intptr_t *elmv,

	3480 intptr_t elmc,

	3481 ZoneGrowableArray<CharacterRange>* ranges) {

	3482 elmc--;

	3483 ASSERT(elmv[elmc] == 0x10000);

	3484 ASSERT(elmv[0] != 0x0000);

	3485 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit);

	3486 uint16_t last = 0x0000;

	3487 for (intptr_t i = 0; i < elmc; i += 2) {

	3488 ASSERT(last <= elmv[i] - 1);

	3489 ASSERT(elmv[i] < elmv[i + 1]);

	3490 ranges->Add(CharacterRange(last, elmv[i] - 1));

	3491 last = elmv[i + 1];

	3492 }

	3493 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit));

	3494 }

	3495

	3496

	3497 void CharacterRange::AddClassEscape(uint16_t type,

	3498 ZoneGrowableArray<CharacterRange>* ranges) {

	3499 switch (type) {

	3500 case 's':

	3501 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);

	3502 break;

	3503 case 'S':

	3504 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);

	3505 break;

	3506 case 'w':

	3507 AddClass(kWordRanges, kWordRangeCount, ranges);

	3508 break;

	3509 case 'W':

	3510 AddClassNegated(kWordRanges, kWordRangeCount, ranges);

	3511 break;

	3512 case 'd':

	3513 AddClass(kDigitRanges, kDigitRangeCount, ranges);

	3514 break;

	3515 case 'D':

	3516 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);

	3517 break;

	3518 case '.':

	3519 AddClassNegated(kLineTerminatorRanges,

	3520 kLineTerminatorRangeCount,

	3521 ranges);

	3522 break;

	3523 // This is not a character range as defined by the spec but a

	3524 // convenient shorthand for a character class that matches any

	3525 // character.

	3526 case '*':

	3527 ranges->Add(CharacterRange::Everything());

	3528 break;

	3529 // This is the set of characters matched by the $ and ^ symbols

	3530 // in multiline mode.

	3531 case 'n':

	3532 AddClass(kLineTerminatorRanges,

	3533 kLineTerminatorRangeCount,

	3534 ranges);

	3535 break;

	3536 default:

	3537 UNREACHABLE();

	3538 }

	3539 }

	3540

	3541

	3542 // Move a number of elements in a zonelist to another position

	3543 // in the same list. Handles overlapping source and target areas.

	3544 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list,

	3545 intptr_t from,

	3546 intptr_t to,

	3547 intptr_t count) {

	3548 // Ranges are potentially overlapping.

	3549 if (from < to) {

	3550 for (intptr_t i = count - 1; i >= 0; i--) {

	3551 (*list)[to + i] = list->At(from + i);

	3552 }

	3553 } else {

	3554 for (intptr_t i = 0; i < count; i++) {

	3555 (*list)[to + i] = list->At(from + i);

	3556 }

	3557 }

	3558 }

	3559

	3560

	3561 static intptr_t InsertRangeInCanonicalList(

	3562 ZoneGrowableArray<CharacterRange>* list,

	3563 intptr_t count,

	3564 CharacterRange insert) {

	3565 // Inserts a range into list[0..count[, which must be sorted

	3566 // by from value and non-overlapping and non-adjacent, using at most

	3567 // list[0..count] for the result. Returns the number of resulting

	3568 // canonicalized ranges. Inserting a range may collapse existing ranges into

	3569 // fewer ranges, so the return value can be anything in the range 1..count+1.

	3570 uint16_t from = insert.from();

	3571 uint16_t to = insert.to();

	3572 intptr_t start_pos = 0;

	3573 intptr_t end_pos = count;

	3574 for (intptr_t i = count - 1; i >= 0; i--) {

	3575 CharacterRange current = list->At(i);

	3576 if (current.from() > to + 1) {

	3577 end_pos = i;

	3578 } else if (current.to() + 1 < from) {

	3579 start_pos = i + 1;

	3580 break;

	3581 }

	3582 }

	3583

	3584 // Inserted range overlaps, or is adjacent to, ranges at positions

	3585 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are

	3586 // not affected by the insertion.

	3587 // If start_pos == end_pos, the range must be inserted before start_pos.

	3588 // if start_pos < end_pos, the entire range from start_pos to end_pos

	3589 // must be merged with the insert range.

	3590

	3591 if (start_pos == end_pos) {

	3592 // Insert between existing ranges at position start_pos.

	3593 if (start_pos < count) {

	3594 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);

	3595 }

	3596 (*list)[start_pos] = insert;

	3597 return count + 1;

	3598 }

	3599 if (start_pos + 1 == end_pos) {

	3600 // Replace single existing range at position start_pos.

	3601 CharacterRange to_replace = list->At(start_pos);

	3602 intptr_t new_from = Utils::Minimum(to_replace.from(), from);

	3603 intptr_t new_to = Utils::Maximum(to_replace.to(), to);

	3604 (*list)[start_pos] = CharacterRange(new_from, new_to);

	3605 return count;

	3606 }

	3607 // Replace a number of existing ranges from start_pos to end_pos - 1.

	3608 // Move the remaining ranges down.

	3609

	3610 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from);

	3611 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to);

	3612 if (end_pos < count) {

	3613 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);

	3614 }

	3615 (*list)[start_pos] = CharacterRange(new_from, new_to);

	3616 return count - (end_pos - start_pos) + 1;

	3617 }

	3618

	3619

	3620 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) {

	3621 ASSERT(ranges != NULL);

	3622 intptr_t n = ranges->length();

	3623 if (n <= 1) return true;

	3624 intptr_t max = ranges->At(0).to();

	3625 for (intptr_t i = 1; i < n; i++) {

	3626 CharacterRange next_range = ranges->At(i);

	3627 if (next_range.from() <= max + 1) return false;

	3628 max = next_range.to();

	3629 }

	3630 return true;

	3631 }

	3632

	3633

	3634 void CharacterRange::Canonicalize(

	3635 ZoneGrowableArray<CharacterRange>* character_ranges) {

	3636 if (character_ranges->length() <= 1) return;

	3637 // Check whether ranges are already canonical (increasing, non-overlapping,

	3638 // non-adjacent).

	3639 intptr_t n = character_ranges->length();

	3640 intptr_t max = character_ranges->At(0).to();

	3641 intptr_t i = 1;

	3642 while (i < n) {

	3643 CharacterRange current = character_ranges->At(i);

	3644 if (current.from() <= max + 1) {

	3645 break;

	3646 }

	3647 max = current.to();

	3648 i++;

	3649 }

	3650 // Canonical until the i'th range. If that's all of them, we are done.

	3651 if (i == n) return;

	3652

	3653 // The ranges at index i and forward are not canonicalized. Make them so by

	3654 // doing the equivalent of insertion sort (inserting each into the previous

	3655 // list, in order).

	3656 // Notice that inserting a range can reduce the number of ranges in the

	3657 // result due to combining of adjacent and overlapping ranges.

	3658 intptr_t read = i; // Range to insert.

	3659 intptr_t num_canonical = i; // Length of canonicalized part of list.

	3660 do {

	3661 num_canonical = InsertRangeInCanonicalList(character_ranges,

	3662 num_canonical,

	3663 character_ranges->At(read));

	3664 read++;

	3665 } while (read < n);

	3666 character_ranges->TruncateTo(num_canonical);

	3667

	3668 ASSERT(CharacterRange::IsCanonical(character_ranges));

	3669 }

	3670

	3671

	3672 void CharacterSet::Canonicalize() {

	3673 // Special/default classes are always considered canonical. The result

	3674 // of calling ranges() will be sorted.

	3675 if (ranges_ == NULL) return;

	3676 CharacterRange::Canonicalize(ranges_);

	3677 }

	3678

	3679

	3680 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() {

	3681 if (ranges_ == NULL) {

	3682 ranges_ = new ZoneGrowableArray<CharacterRange>(2);

	3683 CharacterRange::AddClassEscape(standard_set_type_, ranges_);

	3684 }

	3685 return ranges_;

	3686 }

	3687

	3688

	3689 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges,

	3690 ZoneGrowableArray<CharacterRange>* negated_ranges) {

	3691 ASSERT(CharacterRange::IsCanonical(ranges));

	3692 ASSERT(negated_ranges->length() == 0);

	3693 intptr_t range_count = ranges->length();

	3694 uint16_t from = 0;

	3695 intptr_t i = 0;

	3696 if (range_count > 0 && ranges->At(0).from() == 0) {

	3697 from = ranges->At(0).to();

	3698 i = 1;

	3699 }

	3700 while (i < range_count) {

	3701 CharacterRange range = ranges->At(i);

	3702 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));

	3703 from = range.to();

	3704 i++;

	3705 }

	3706 if (from < Utf16::kMaxCodeUnit) {

	3707 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit));

	3708 }

	3709 }

	3710

	3711

	3712 void CharacterRange::AddCaseEquivalents(

	3713 ZoneGrowableArray<CharacterRange>* ranges,

	3714 bool is_ascii,

	3715 Isolate* isolate) {

	3716 uint16_t bottom = from();

	3717 uint16_t top = to();

	3718 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {

	3719 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;

	3720 if (top > Symbols::kMaxOneCharCodeSymbol) {

	3721 top = Symbols::kMaxOneCharCodeSymbol;

	3722 }

	3723 }

	3724

	3725 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	3726 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;

	3727 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3728 if (top == bottom) {

	3729 // If this is a singleton we just expand the one character.

	3730 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T

	3731 for (intptr_t i = 0; i < length; i++) {

	3732 uint32_t chr = chars[i];

	3733 if (chr != bottom) {

	3734 ranges->Add(CharacterRange::Singleton(chars[i]));

	3735 }

	3736 }

	3737 } else {

	3738 // If this is a range we expand the characters block by block,

	3739 // expanding contiguous subranges (blocks) one at a time.

	3740 // The approach is as follows. For a given start character we

	3741 // look up the remainder of the block that contains it (represented

	3742 // by the end point), for instance we find 'z' if the character

	3743 // is 'c'. A block is characterized by the property

	3744 // that all characters uncanonicalize in the same way, except that

	3745 // each entry in the result is incremented by the distance from the first

	3746 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and // NOLINT

	3747 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].

	3748 // Once we've found the end point we look up its uncanonicalization

	3749 // and produce a range for each element. For instance for [c-f]

	3750 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

	3751 // add a range if it is not already contained in the input, so [c-f]

	3752 // will be skipped but [C-F] will be added. If this range is not

	3753 // completely contained in a block we do this for all the blocks

	3754 // covered by the range (handling characters that is not in a block

	3755 // as a "singleton block").

	3756 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3757 intptr_t pos = bottom;

	3758 while (pos <= top) {

	3759 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);

	3760 uint16_t block_end;

	3761 if (length == 0) {

	3762 block_end = pos;

	3763 } else {

	3764 ASSERT(length == 1);

	3765 block_end = range[0];

	3766 }

	3767 intptr_t end = (block_end > top) ? top : block_end;

	3768 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT

	3769 for (intptr_t i = 0; i < length; i++) {

	3770 uint32_t c = range[i];

	3771 uint16_t range_from = c - (block_end - pos);

	3772 uint16_t range_to = c - (block_end - end);

	3773 if (!(bottom <= range_from && range_to <= top)) {

	3774 ranges->Add(CharacterRange(range_from, range_to));

	3775 }

	3776 }

	3777 pos = end + 1;

	3778 }

	3779 }

	3780 }

	3781

	3782

	3783 // -------------------------------------------------------------------

	3784 // Splay tree

	3785

	3786

	3787 // Workaround for the fact that ZoneGrowableArray does not have contains().

	3788 static bool ArrayContains(ZoneGrowableArray<unsigned>* array,

	3789 unsigned value) {

	3790 for (intptr_t i = 0; i < array->length(); i++) {

	3791 if (array->At(i) == value) {

	3792 return true;

	3793 }

	3794 }

	3795 return false;

	3796 }

	3797

	3798

	3799 void OutSet::Set(unsigned value, Isolate* isolate) {

	3800 if (value < kFirstLimit) {

	3801 first_ \|= (1 << value);

	3802 } else {

	3803 if (remaining_ == NULL)

	3804 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1);

	3805

	3806 bool remaining_contains_value = ArrayContains(remaining_, value);

	3807 if (remaining_->is_empty() \|\| !remaining_contains_value) {

	3808 remaining_->Add(value);

	3809 }

	3810 }

	3811 }

	3812

	3813

	3814 bool OutSet::Get(unsigned value) const {

	3815 if (value < kFirstLimit) {

	3816 return (first_ & (1 << value)) != 0;

	3817 } else if (remaining_ == NULL) {

	3818 return false;

	3819 } else {

	3820 return ArrayContains(remaining_, value);

	3821 }

	3822 }

	3823

	3824 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3825 RegExpCompiler* compiler,

	3826 intptr_t filled_in,

	3827 bool not_at_start) {

	3828 if (assertion_type_ == AT_START && not_at_start) {

	3829 details->set_cannot_match();

	3830 return;

	3831 }

	3832 return on_success()->GetQuickCheckDetails(details,

	3833 compiler,

	3834 filled_in,

	3835 not_at_start);

	3836 }

	3837

	3838

	3839 void GuardedAlternative::AddGuard(Guard* guard, Isolate* isolate) {

	3840 if (guards_ == NULL)

	3841 guards_ = new(isolate) ZoneGrowableArray<Guard*>(1);

	3842 guards_->Add(guard);

	3843 }

	3844

	3845

	3846 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {

	3847 ASSERT(loop_node_ == NULL);

	3848 AddAlternative(alt);

	3849 loop_node_ = alt.node();

	3850 }

	3851

	3852

	3853 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {

	3854 ASSERT(continue_node_ == NULL);

	3855 AddAlternative(alt);

	3856 continue_node_ = alt.node();

	3857 }

	3858

	3859

	3860 void LoopChoiceNode::Accept(NodeVisitor* visitor) {

	3861 visitor->VisitLoopChoice(this);

	3862 }

	3863

	3864

	3865 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3866 intptr_t budget,

	3867 bool not_at_start) {

	3868 return EatsAtLeastHelper(still_to_find,

	3869 budget - 1,

	3870 loop_node_,

	3871 not_at_start);

	3872 }

	3873

	3874

	3875 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3876 RegExpCompiler* compiler,

	3877 intptr_t characters_filled_in,

	3878 bool not_at_start) {

	3879 not_at_start = (not_at_start \|\| not_at_start_);

	3880 intptr_t choice_count = alternatives_->length();

	3881 ASSERT(choice_count > 0);

	3882 (*alternatives_)[0].node()->GetQuickCheckDetails(details,

	3883 compiler,

	3884 characters_filled_in,

	3885 not_at_start);

	3886 for (intptr_t i = 1; i < choice_count; i++) {

	3887 QuickCheckDetails new_details(details->characters());

	3888 RegExpNode* node = (*alternatives_)[i].node();

	3889 node->GetQuickCheckDetails(&new_details, compiler,

	3890 characters_filled_in,

	3891 not_at_start);

	3892 // Here we merge the quick match details of the two branches.

	3893 details->Merge(&new_details, characters_filled_in);

	3894 }

	3895 }

	3896

	3897

	3898 void QuickCheckDetails::Clear() {

	3899 for (intptr_t i = 0; i < characters_; i++) {

	3900 positions_[i].mask = 0;

	3901 positions_[i].value = 0;

	3902 positions_[i].determines_perfectly = false;

	3903 }

	3904 characters_ = 0;

	3905 }

	3906

	3907

	3908 void QuickCheckDetails::Advance(intptr_t by, bool ascii) {

	3909 ASSERT(by >= 0);

	3910 if (by >= characters_) {

	3911 Clear();

	3912 return;

	3913 }

	3914 for (intptr_t i = 0; i < characters_ - by; i++) {

	3915 positions_[i] = positions_[by + i];

	3916 }

	3917 for (intptr_t i = characters_ - by; i < characters_; i++) {

	3918 positions_[i].mask = 0;

	3919 positions_[i].value = 0;

	3920 positions_[i].determines_perfectly = false;

	3921 }

	3922 characters_ -= by;

	3923 // We could change mask_ and value_ here but we would never advance unless

	3924 // they had already been used in a check and they won't be used again because

	3925 // it would gain us nothing. So there's no point.

	3926 }

	3927

	3928

	3929 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) {

	3930 ASSERT(characters_ == other->characters_);

	3931 if (other->cannot_match_) {

	3932 return;

	3933 }

	3934 if (cannot_match_) {

	3935 this = other;

	3936 return;

	3937 }

	3938 for (intptr_t i = from_index; i < characters_; i++) {

	3939 QuickCheckDetails::Position* pos = positions(i);

	3940 QuickCheckDetails::Position* other_pos = other->positions(i);

	3941 if (pos->mask != other_pos->mask \|\|

	3942 pos->value != other_pos->value \|\|

	3943 !other_pos->determines_perfectly) {

	3944 // Our mask-compare operation will be approximate unless we have the

	3945 // exact same operation on both sides of the alternation.

	3946 pos->determines_perfectly = false;

	3947 }

	3948 pos->mask &= other_pos->mask;

	3949 pos->value &= pos->mask;

	3950 other_pos->value &= pos->mask;

	3951 uint16_t differing_bits = (pos->value ^ other_pos->value);

	3952 pos->mask &= ~differing_bits;

	3953 pos->value &= pos->mask;

	3954 }

	3955 }

	3956

	3957

	3958 // Finds the fixed match length of a sequence of nodes that goes from

	3959 // this alternative and back to this choice node. If there are variable

	3960 // length nodes or other complications in the way then return a sentinel

	3961 // value indicating that a greedy loop cannot be constructed.

	3962 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative(

	3963 GuardedAlternative* alternative) {

	3964 intptr_t length = 0;

	3965 RegExpNode* node = alternative->node();

	3966 // Later we will generate code for all these text nodes using recursion

	3967 // so we have to limit the max number.

	3968 intptr_t recursion_depth = 0;

	3969 while (node != this) {

	3970 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {

	3971 return kNodeIsTooComplexForGreedyLoops;

	3972 }

	3973 intptr_t node_length = node->GreedyLoopTextLength();

	3974 if (node_length == kNodeIsTooComplexForGreedyLoops) {

	3975 return kNodeIsTooComplexForGreedyLoops;

	3976 }

	3977 length += node_length;

	3978 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);

	3979 node = seq_node->on_success();

	3980 }

	3981 return length;

	3982 }

	3983

	3984

	3985 void NegativeLookaheadChoiceNode::GetQuickCheckDetails(

	3986 QuickCheckDetails* details,

	3987 RegExpCompiler* compiler,

	3988 intptr_t filled_in,

	3989 bool not_at_start) {

	3990 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	3991 // afterwards.

	3992 RegExpNode* node = (*alternatives_)[1].node();

	3993 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);

	3994 }

	3995

	3996

	3997 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {

	3998 return RegExpEngine::CompilationResult("RegExp too big");

	3999 }

	4000

	4001

	4002 // Attempts to compile the regexp using an Irregexp code generator. Returns

	4003 // a fixed array or a null handle depending on whether it succeeded.

	4004 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,

	4005 intptr_t specialization_cid)

	4006 : next_register_(2 * (capture_count + 1)),

	4007 work_list_(NULL),

	4008 recursion_depth_(0),

	4009 ignore_case_(ignore_case),

	4010 specialization_cid_(specialization_cid),

	4011 reg_exp_too_big_(false),

	4012 current_expansion_factor_(1),

	4013 isolate_(Isolate::Current()) {

	4014 accept_ = new(I) EndNode(EndNode::ACCEPT, I);

	4015 }

	4016

	4017

	4018 RegExpEngine::CompilationResult RegExpCompiler::Assemble(

	4019 IRRegExpMacroAssembler* macro_assembler,

	4020 RegExpNode* start,

	4021 intptr_t capture_count,

	4022 const String& pattern) {

	4023 static const bool use_slow_safe_regexp_compiler = false;

	4024

	4025 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);

	4026 macro_assembler_ = macro_assembler;

	4027

	4028 ZoneGrowableArray<RegExpNode*> work_list(0);

	4029 work_list_ = &work_list;

	4030 BlockLabel fail;

	4031 macro_assembler_->PushBacktrack(&fail);

	4032 Trace new_trace;

	4033 start->Emit(this, &new_trace);

	4034 macro_assembler_->BindBlock(&fail);

	4035 macro_assembler_->Fail();

	4036 while (!work_list.is_empty()) {

	4037 work_list.RemoveLast()->Emit(this, &new_trace);

	4038 }

	4039 if (reg_exp_too_big_) return IrregexpRegExpTooBig();

	4040

	4041 macro_assembler->FinalizeIndirectGotos();

	4042

	4043 return RegExpEngine::CompilationResult(macro_assembler,

	4044 macro_assembler->graph_entry(),

	4045 macro_assembler->num_blocks(),

	4046 macro_assembler->num_stack_locals());

	4047 }

	4048

	4049

	4050 RegExpEngine::CompilationResult RegExpEngine::Compile(

	4051 RegExpCompileData* data,

	4052 const ParsedFunction* parsed_function,

	4053 const ZoneGrowableArray<const ICData*>& ic_data_array) {

	4054 Isolate* isolate = Isolate::Current();

	4055

	4056 const Function& function = parsed_function->function();

	4057 const intptr_t specialization_cid = function.regexp_cid();

	4058 const bool is_ascii = (specialization_cid == kOneByteStringCid \|\|

	4059 specialization_cid == kExternalOneByteStringCid);

	4060 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp());

	4061 const String& pattern = String::Handle(isolate, regexp.pattern());

	4062

	4063 ASSERT(!regexp.IsNull());

	4064 ASSERT(!pattern.IsNull());

	4065

	4066 const bool ignore_case = regexp.is_ignore_case();

	4067 const bool is_global = regexp.is_global();

	4068

	4069 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid);

	4070

	4071 // TODO(jgruber): Frequency sampling is currently disabled because of several

	4072 // issues. We do not want to store subject strings in the regexp object since

	4073 // they might be long and we should not prevent their garbage collection.

	4074 // Passing them to this function explicitly does not help, since we must

	4075 // generate exactly the same IR for both the unoptimizing and optimizing

	4076 // pipelines (otherwise it gets confused when i.e. deopt id's differ).

	4077 // An option would be to store sampling results in the regexp object, but

	4078 // I'm not sure the performance gains are relevant enough.

	4079

	4080 // Wrap the body of the regexp in capture #0.

	4081 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,

	4082 0,

	4083 &compiler,

	4084 compiler.accept());

	4085

	4086 RegExpNode* node = captured_body;

	4087 bool is_start_anchored = data->tree->IsAnchoredAtStart();

	4088 bool is_end_anchored = data->tree->IsAnchoredAtEnd();

	4089 intptr_t max_length = data->tree->max_match();

	4090 if (!is_start_anchored) {

	4091 // Add a .*? at the beginning, outside the body capture, unless

	4092 // this expression is anchored at the beginning.

	4093 RegExpNode* loop_node =

	4094 RegExpQuantifier::ToNode(0,

	4095 RegExpTree::kInfinity,

	4096 false,

	4097 new(isolate) RegExpCharacterClass('*'),

	4098 &compiler,

	4099 captured_body,

	4100 data->contains_anchor);

	4101

	4102 if (data->contains_anchor) {

	4103 // Unroll loop once, to take care of the case that might start

	4104 // at the start of input.

	4105 ChoiceNode* first_step_node = new(isolate) ChoiceNode(2, isolate);

	4106 first_step_node->AddAlternative(GuardedAlternative(captured_body));

	4107 first_step_node->AddAlternative(GuardedAlternative(

	4108 new(isolate) TextNode(

	4109 new(isolate) RegExpCharacterClass('*'), loop_node)));

	4110 node = first_step_node;

	4111 } else {

	4112 node = loop_node;

	4113 }

	4114 }

	4115 if (is_ascii) {

	4116 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

	4117 // Do it again to propagate the new nodes to places where they were not

	4118 // put because they had not been calculated yet.

	4119 if (node != NULL) {

	4120 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

	4121 }

	4122 }

	4123

	4124 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate);

	4125 data->node = node;

	4126 Analysis analysis(ignore_case, is_ascii);

	4127 analysis.EnsureAnalyzed(node);

	4128 if (analysis.has_failed()) {

	4129 const char* error_message = analysis.error_message();

	4130 return CompilationResult(error_message);

	4131 }

	4132

	4133 // Native regexp implementation.

	4134

	4135 IRRegExpMacroAssembler* macro_assembler =

	4136 new(isolate) IRRegExpMacroAssembler(specialization_cid,

	4137 data->capture_count,

	4138 parsed_function,

	4139 ic_data_array,

	4140 isolate);

	4141

	4142 // Inserted here, instead of in Assembler, because it depends on information

	4143 // in the AST that isn't replicated in the Node structure.

	4144 static const intptr_t kMaxBacksearchLimit = 1024;

	4145 if (is_end_anchored &&

	4146 !is_start_anchored &&

	4147 max_length < kMaxBacksearchLimit) {

	4148 macro_assembler->SetCurrentPositionFromEnd(max_length);

	4149 }

	4150

	4151 if (is_global) {

	4152 macro_assembler->set_global_mode(

	4153 (data->tree->min_match() > 0)

	4154 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK

	4155 : RegExpMacroAssembler::GLOBAL);

	4156 }

	4157

	4158 RegExpEngine::CompilationResult result =

	4159 compiler.Assemble(macro_assembler,

	4160 node,

	4161 data->capture_count,

	4162 pattern);

	4163

	4164 if (FLAG_trace_irregexp) {

	4165 macro_assembler->PrintBlocks();

	4166 }

	4167

	4168 return result;

	4169 }

	4170

	4171

	4172 static void CreateSpecializedFunction(Isolate* isolate,

	4173 const JSRegExp& regexp,

	4174 intptr_t specialization_cid,

	4175 const Object& owner) {

	4176 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;

	4177

	4178 Function& fn = Function::Handle(isolate,

	4179 Function::New(String::Handle(isolate, Symbols::New("RegExp")),

	4180 RawFunction::kIrregexpFunction,

	4181 true, // Static.

	4182 false, // Not const.

	4183 false, // Not abstract.

	4184 false, // Not external.

	4185 false, // Not native.

	4186 owner,

	4187 0)); // Requires a non-negative token position.

	4188

	4189 // TODO(jgruber): Share these arrays between all irregexp functions.

	4190 fn.set_num_fixed_parameters(kParamCount);

	4191 fn.set_parameter_types(Array::Handle(isolate, Array::New(kParamCount,

	4192 Heap::kOld)));

	4193 fn.set_parameter_names(Array::Handle(isolate, Array::New(kParamCount,

	4194 Heap::kOld)));

	4195 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType()));

	4196 fn.SetParameterNameAt(0, Symbols::string_param_());

	4197 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType()));

	4198 fn.SetParameterNameAt(1, Symbols::start_index_param_());

	4199 fn.set_result_type(Type::Handle(isolate, Type::ArrayType()));

	4200

	4201 // Cache the result.

	4202 regexp.set_function(specialization_cid, fn);

	4203

	4204 fn.set_regexp(regexp);

	4205 fn.set_regexp_cid(specialization_cid);

	4206

	4207 // The function is compiled lazily during the first call.

	4208 }

	4209

	4210

	4211 RawJSRegExp* RegExpEngine::CreateJSRegExp(Isolate* isolate,

	4212 const String& pattern,

	4213 bool multi_line,

	4214 bool ignore_case) {

	4215 const JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0));

	4216

	4217 regexp.set_pattern(pattern);

	4218

	4219 if (multi_line) {

	4220 regexp.set_is_multi_line();

	4221 }

	4222 if (ignore_case) {

	4223 regexp.set_is_ignore_case();

	4224 }

	4225

	4226 // TODO(jgruber): We might want to use normal string searching algorithms

	4227 // for simple patterns.

	4228 regexp.set_is_complex();

	4229 regexp.set_is_global(); // All dart regexps are global.

	4230

	4231 const Library& lib = Library::Handle(isolate, Library::CoreLibrary());

	4232 const Class& owner = Class::Handle(isolate,

	4233 lib.LookupClass(String::Handle(isolate, Symbols::New("RegExp"))));

	4234

	4235 CreateSpecializedFunction(isolate, regexp, kOneByteStringCid, owner);

	4236 CreateSpecializedFunction(isolate, regexp, kTwoByteStringCid, owner);

	4237 CreateSpecializedFunction(isolate, regexp, kExternalOneByteStringCid, owner);

	4238 CreateSpecializedFunction(isolate, regexp, kExternalTwoByteStringCid, owner);

	4239

	4240 return regexp.raw();

	4241 }

	4242

	4243

	4244 void BoyerMoorePositionInfo::Set(intptr_t character) {

	4245 SetInterval(Interval(character, character));

	4246 }

	4247

	4248

	4249 ContainedInLattice AddRange(ContainedInLattice containment,

	4250 const intptr_t* ranges,

	4251 intptr_t ranges_length,

	4252 Interval new_range) {

	4253 ASSERT((ranges_length & 1) == 1);

	4254 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);

	4255 if (containment == kLatticeUnknown) return containment;

	4256 bool inside = false;

	4257 intptr_t last = 0;

	4258 for (intptr_t i = 0; i < ranges_length;

	4259 inside = !inside, last = ranges[i], i++) {

	4260 // Consider the range from last to ranges[i].

	4261 // We haven't got to the new range yet.

	4262 if (ranges[i] <= new_range.from()) continue;

	4263 // New range is wholly inside last-ranges[i]. Note that new_range.to() is

	4264 // inclusive, but the values in ranges are not.

	4265 if (last <= new_range.from() && new_range.to() < ranges[i]) {

	4266 return Combine(containment, inside ? kLatticeIn : kLatticeOut);

	4267 }

	4268 return kLatticeUnknown;

	4269 }

	4270 return containment;

	4271 }

	4272

	4273

	4274 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

	4275 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

	4276 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

	4277 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

	4278 surrogate_ =

	4279 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);

	4280 if (interval.to() - interval.from() >= kMapSize - 1) {

	4281 if (map_count_ != kMapSize) {

	4282 map_count_ = kMapSize;

	4283 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;

	4284 }

	4285 return;

	4286 }

	4287 for (intptr_t i = interval.from(); i <= interval.to(); i++) {

	4288 intptr_t mod_character = (i & kMask);

	4289 if (!map_->At(mod_character)) {

	4290 map_count_++;

	4291 (*map_)[mod_character] = true;

	4292 }

	4293 if (map_count_ == kMapSize) return;

	4294 }

	4295 }

	4296

	4297

	4298 void BoyerMoorePositionInfo::SetAll() {

	4299 s_ = w_ = d_ = kLatticeUnknown;

	4300 if (map_count_ != kMapSize) {

	4301 map_count_ = kMapSize;

	4302 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;

	4303 }

	4304 }

	4305

	4306

	4307 BoyerMooreLookahead::BoyerMooreLookahead(

	4308 intptr_t length, RegExpCompiler* compiler, Isolate* isolate)

	4309 : length_(length),

	4310 compiler_(compiler) {

	4311 if (compiler->ascii()) {

	4312 max_char_ = Symbols::kMaxOneCharCodeSymbol;

	4313 } else {

	4314 max_char_ = Utf16::kMaxCodeUnit;

	4315 }

	4316 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);

	4317 for (intptr_t i = 0; i < length; i++) {

	4318 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate));

	4319 }

	4320 }

	4321

	4322

	4323 // Take all the characters that will not prevent a successful match if they

	4324 // occur in the subject string in the range between min_lookahead and

	4325 // max_lookahead (inclusive) measured from the current position. If the

	4326 // character at max_lookahead offset is not one of these characters, then we

	4327 // can safely skip forwards by the number of characters in the range.

	4328 intptr_t BoyerMooreLookahead::GetSkipTable(

	4329 intptr_t min_lookahead,

	4330 intptr_t max_lookahead,

	4331 const TypedData& boolean_skip_table) {

	4332 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4333

	4334 const intptr_t kSkipArrayEntry = 0;

	4335 const intptr_t kDontSkipArrayEntry = 1;

	4336

	4337 for (intptr_t i = 0; i < kSize; i++) {

	4338 boolean_skip_table.SetUint8(i, kSkipArrayEntry);

	4339 }

	4340 intptr_t skip = max_lookahead + 1 - min_lookahead;

	4341

	4342 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4343 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4344 for (intptr_t j = 0; j < kSize; j++) {

	4345 if (map->at(j)) {

	4346 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry);

	4347 }

	4348 }

	4349 }

	4350

	4351 return skip;

	4352 }

	4353

	4354

	4355 // Find the longest range of lookahead that has the fewest number of different

	4356 // characters that can occur at a given position. Since we are optimizing two

	4357 // different parameters at once this is a tradeoff.

	4358 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {

	4359 intptr_t biggest_points = 0;

	4360 // If more than 32 characters out of 128 can occur it is unlikely that we can

	4361 // be lucky enough to step forwards much of the time.

	4362 const intptr_t kMaxMax = 32;

	4363 for (intptr_t max_number_of_chars = 4;

	4364 max_number_of_chars < kMaxMax;

	4365 max_number_of_chars *= 2) {

	4366 biggest_points =

	4367 FindBestInterval(max_number_of_chars, biggest_points, from, to);

	4368 }

	4369 if (biggest_points == 0) return false;

	4370 return true;

	4371 }

	4372

	4373

	4374 // Find the highest-points range between 0 and length_ where the character

	4375 // information is not too vague. 'Too vague' means that there are more than

	4376 // max_number_of_chars that can occur at this position. Calculates the number

	4377 // of points as the product of width-of-the-range and

	4378 // probability-of-finding-one-of-the-characters, where the probability is

	4379 // calculated using the frequency distribution of the sample subject string.

	4380 intptr_t BoyerMooreLookahead::FindBestInterval(

	4381 intptr_t max_number_of_chars,

	4382 intptr_t old_biggest_points,

	4383 intptr_t* from,

	4384 intptr_t* to) {

	4385 intptr_t biggest_points = old_biggest_points;

	4386 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4387 for (intptr_t i = 0; i < length_; ) {

	4388 while (i < length_ && Count(i) > max_number_of_chars) i++;

	4389 if (i == length_) break;

	4390 intptr_t remembered_from = i;

	4391 bool union_map[kSize];

	4392 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false;

	4393 while (i < length_ && Count(i) <= max_number_of_chars) {

	4394 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4395 for (intptr_t j = 0; j < kSize; j++) union_map[j] \|= map->at(j);

	4396 i++;

	4397 }

	4398 intptr_t frequency = 0;

	4399 for (intptr_t j = 0; j < kSize; j++) {

	4400 if (union_map[j]) {

	4401 // Add 1 to the frequency to give a small per-character boost for

	4402 // the cases where our sampling is not good enough and many

	4403 // characters have a frequency of zero. This means the frequency

	4404 // can theoretically be up to 2*kSize though we treat it mostly as

	4405 // a fraction of kSize.

	4406 frequency += compiler_->frequency_collator()->Frequency(j) + 1;

	4407 }

	4408 }

	4409 // We use the probability of skipping times the distance we are skipping to

	4410 // judge the effectiveness of this. Actually we have a cut-off: By

	4411 // dividing by 2 we switch off the skipping if the probability of skipping

	4412 // is less than 50%. This is because the multibyte mask-and-compare

	4413 // skipping in quickcheck is more likely to do well on this case.

	4414 bool in_quickcheck_range = ((i - remembered_from < 4) \|\|

	4415 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));

	4416 // Called 'probability' but it is only a rough estimate and can actually

	4417 // be outside the 0-kSize range.

	4418 intptr_t probability =

	4419 (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

	4420 intptr_t points = (i - remembered_from) * probability;

	4421 if (points > biggest_points) {

	4422 *from = remembered_from;

	4423 *to = i - 1;

	4424 biggest_points = points;

	4425 }

	4426 }

	4427 return biggest_points;

	4428 }

	4429

	4430

	4431 // See comment above on the implementation of GetSkipTable.

	4432 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {

	4433 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4434

	4435 intptr_t min_lookahead = 0;

	4436 intptr_t max_lookahead = 0;

	4437

	4438 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;

	4439

	4440 bool found_single_character = false;

	4441 intptr_t single_character = 0;

	4442 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4443 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4444 if (map->map_count() > 1 \|\|

	4445 (found_single_character && map->map_count() != 0)) {

	4446 found_single_character = false;

	4447 break;

	4448 }

	4449 for (intptr_t j = 0; j < kSize; j++) {

	4450 if (map->at(j)) {

	4451 found_single_character = true;

	4452 single_character = j;

	4453 break;

	4454 }

	4455 }

	4456 }

	4457

	4458 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead;

	4459

	4460 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {

	4461 // The mask-compare can probably handle this better.

	4462 return false;

	4463 }

	4464

	4465 if (found_single_character) {

	4466 BlockLabel cont, again;

	4467 masm->BindBlock(&again);

	4468 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4469 if (max_char_ > kSize) {

	4470 masm->CheckCharacterAfterAnd(single_character,

	4471 RegExpMacroAssembler::kTableMask,

	4472 &cont);

	4473 } else {

	4474 masm->CheckCharacter(single_character, &cont);

	4475 }

	4476 masm->AdvanceCurrentPosition(lookahead_width);

	4477 masm->GoTo(&again);

	4478 masm->BindBlock(&cont);

	4479 return true;

	4480 }

	4481

	4482 const TypedData& boolean_skip_table = TypedData::ZoneHandle(

	4483 compiler_->isolate(),

	4484 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	4485 intptr_t skip_distance = GetSkipTable(

	4486 min_lookahead, max_lookahead, boolean_skip_table);

	4487 ASSERT(skip_distance != 0);

	4488

	4489 BlockLabel cont, again;

	4490

	4491 masm->BindBlock(&again);

	4492 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4493 masm->CheckBitInTable(boolean_skip_table, &cont);

	4494 masm->AdvanceCurrentPosition(skip_distance);

	4495 masm->GoTo(&again);

	4496 masm->BindBlock(&cont);

	4497

	4498 return true;

	4499 }

	4500

	4501

	4502 // -------------------------------------------------------------------

	4503 // Analysis

	4504

	4505

	4506 void Analysis::EnsureAnalyzed(RegExpNode* that) {

	4507 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)

	4508 return;

	4509 that->info()->being_analyzed = true;

	4510 that->Accept(this);

	4511 that->info()->being_analyzed = false;

	4512 that->info()->been_analyzed = true;

	4513 }

	4514

	4515

	4516 void Analysis::VisitEnd(EndNode* that) {

	4517 // nothing to do

	4518 }

	4519

	4520

	4521 void TextNode::CalculateOffsets() {

	4522 intptr_t element_count = elements()->length();

	4523 // Set up the offsets of the elements relative to the start. This is a fixed

	4524 // quantity since a TextNode can only contain fixed-width things.

	4525 intptr_t cp_offset = 0;

	4526 for (intptr_t i = 0; i < element_count; i++) {

	4527 TextElement& elm = (*elements())[i];

	4528 elm.set_cp_offset(cp_offset);

	4529 cp_offset += elm.length();

	4530 }

	4531 }

	4532

	4533

	4534 void Analysis::VisitText(TextNode* that) {

	4535 if (ignore_case_) {

	4536 that->MakeCaseIndependent(is_ascii_);

	4537 }

	4538 EnsureAnalyzed(that->on_success());

	4539 if (!has_failed()) {

	4540 that->CalculateOffsets();

	4541 }

	4542 }

	4543

	4544

	4545 void Analysis::VisitAction(ActionNode* that) {

	4546 RegExpNode* target = that->on_success();

	4547 EnsureAnalyzed(target);

	4548 if (!has_failed()) {

	4549 // If the next node is interested in what it follows then this node

	4550 // has to be interested too so it can pass the information on.

	4551 that->info()->AddFromFollowing(target->info());

	4552 }

	4553 }

	4554

	4555

	4556 void Analysis::VisitChoice(ChoiceNode* that) {

	4557 NodeInfo* info = that->info();

	4558 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4559 RegExpNode* node = (*that->alternatives())[i].node();

	4560 EnsureAnalyzed(node);

	4561 if (has_failed()) return;

	4562 // Anything the following nodes need to know has to be known by

	4563 // this node also, so it can pass it on.

	4564 info->AddFromFollowing(node->info());

	4565 }

	4566 }

	4567

	4568

	4569 void Analysis::VisitLoopChoice(LoopChoiceNode* that) {

	4570 NodeInfo* info = that->info();

	4571 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4572 RegExpNode* node = (*that->alternatives())[i].node();

	4573 if (node != that->loop_node()) {

	4574 EnsureAnalyzed(node);

	4575 if (has_failed()) return;

	4576 info->AddFromFollowing(node->info());

	4577 }

	4578 }

	4579 // Check the loop last since it may need the value of this node

	4580 // to get a correct result.

	4581 EnsureAnalyzed(that->loop_node());

	4582 if (!has_failed()) {

	4583 info->AddFromFollowing(that->loop_node()->info());

	4584 }

	4585 }

	4586

	4587

	4588 void Analysis::VisitBackReference(BackReferenceNode* that) {

	4589 EnsureAnalyzed(that->on_success());

	4590 }

	4591

	4592

	4593 void Analysis::VisitAssertion(AssertionNode* that) {

	4594 EnsureAnalyzed(that->on_success());

	4595 }

	4596

	4597

	4598 // -------------------------------------------------------------------

	4599 // Dot/dotty output

	4600

	4601

	4602 #ifdef DEBUG

	4603

	4604

	4605 class DotPrinter: public NodeVisitor {

	4606 public:

	4607 explicit DotPrinter(bool ignore_case)

	4608 : ignore_case_(ignore_case) {}

	4609 void PrintNode(const char* label, RegExpNode* node);

	4610 void Visit(RegExpNode* node);

	4611 void PrintAttributes(RegExpNode* from);

	4612 void PrintOnFailure(RegExpNode* from, RegExpNode* to);

	4613 #define DECLARE_VISIT(Type) \

	4614 virtual void Visit##Type(Type##Node* that);

	4615 FOR_EACH_NODE_TYPE(DECLARE_VISIT)

	4616 #undef DECLARE_VISIT

	4617 private:

	4618 bool ignore_case_;

	4619 };

	4620

	4621

	4622 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {

	4623 OS::Print("digraph G {\n graph [label=\"");

	4624 for (intptr_t i = 0; label[i]; i++) {

	4625 switch (label[i]) {

	4626 case '\\':

	4627 OS::Print("\\\\");

	4628 break;

	4629 case '"':

	4630 OS::Print("\"");

	4631 break;

	4632 default:

	4633 OS::Print("%c", label[i]);

	4634 break;

	4635 }

	4636 }

	4637 OS::Print("\"];\n");

	4638 Visit(node);

	4639 OS::Print("}\n");

	4640 }

	4641

	4642

	4643 void DotPrinter::Visit(RegExpNode* node) {

	4644 if (node->info()->visited) return;

	4645 node->info()->visited = true;

	4646 node->Accept(this);

	4647 }

	4648

	4649

	4650 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {

	4651 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure);

	4652 Visit(on_failure);

	4653 }

	4654

	4655

	4656 class AttributePrinter : public ValueObject {

	4657 public:

	4658 AttributePrinter() : first_(true) {}

	4659 void PrintSeparator() {

	4660 if (first_) {

	4661 first_ = false;

	4662 } else {

	4663 OS::Print("\|");

	4664 }

	4665 }

	4666 void PrintBit(const char* name, bool value) {

	4667 if (!value) return;

	4668 PrintSeparator();

	4669 OS::Print("{%s}", name);

	4670 }

	4671 void PrintPositive(const char* name, intptr_t value) {

	4672 if (value < 0) return;

	4673 PrintSeparator();

	4674 OS::Print("{%s\|%" Pd "}", name, value);

	4675 }

	4676

	4677 private:

	4678 bool first_;

	4679 };

	4680

	4681

	4682 void DotPrinter::PrintAttributes(RegExpNode* that) {

	4683 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "

	4684 "margin=0.1, fontsize=10, label=\"{", that);

	4685 AttributePrinter printer;

	4686 NodeInfo* info = that->info();

	4687 printer.PrintBit("NI", info->follows_newline_interest);

	4688 printer.PrintBit("WI", info->follows_word_interest);

	4689 printer.PrintBit("SI", info->follows_start_interest);

	4690 BlockLabel* label = that->label();

	4691 if (label->IsBound())

	4692 printer.PrintPositive("@", label->Position());

	4693 OS::Print("}\"];\n"

	4694 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",

	4695 that, that);

	4696 }

	4697

	4698

	4699 static const bool kPrintDispatchTable = false;

	4700 void DotPrinter::VisitChoice(ChoiceNode* that) {

	4701 if (kPrintDispatchTable) {

	4702 OS::Print(" n%p [shape=Mrecord, label=\"", that);

	4703 UNIMPLEMENTED();

	4704 } else {

	4705 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that);

	4706 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4707 GuardedAlternative alt = that->alternatives()->At(i);

	4708 OS::Print(" n%p -> n%p", that, alt.node());

	4709 }

	4710 }

	4711 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4712 GuardedAlternative alt = that->alternatives()->At(i);

	4713 alt.node()->Accept(this);

	4714 }

	4715 }

	4716

	4717

	4718 void DotPrinter::VisitText(TextNode* that) {

	4719 OS::Print(" n%p [label=\"", that);

	4720 for (intptr_t i = 0; i < that->elements()->length(); i++) {

	4721 if (i > 0) OS::Print(" ");

	4722 TextElement elm = that->elements()->At(i);

	4723 switch (elm.text_type()) {

	4724 case TextElement::ATOM: {

	4725 ZoneGrowableArray<uint16_t>* data = elm.atom()->data();

	4726 for (intptr_t i = 0; i < data->length(); i++) {

	4727 OS::Print("%c", static_cast<char>(data->At(i)));

	4728 }

	4729 break;

	4730 }

	4731 case TextElement::CHAR_CLASS: {

	4732 RegExpCharacterClass* node = elm.char_class();

	4733 OS::Print("[");

	4734 if (node->is_negated()) OS::Print("^");

	4735 for (intptr_t j = 0; j < node->ranges()->length(); j++) {

	4736 CharacterRange range = node->ranges()->At(j);

	4737 PrintUtf16(range.from());

	4738 OS::Print("-");

	4739 PrintUtf16(range.to());

	4740 }

	4741 OS::Print("]");

	4742 break;

	4743 }

	4744 default:

	4745 UNREACHABLE();

	4746 }

	4747 }

	4748 OS::Print("\", shape=box, peripheries=2];\n");

	4749 PrintAttributes(that);

	4750 OS::Print(" n%p -> n%p;\n", that, that->on_success());

	4751 Visit(that->on_success());

	4752 }

	4753

	4754

	4755 void DotPrinter::VisitBackReference(BackReferenceNode* that) {

	4756 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n",

	4757 that, that->start_register(), that->end_register());

	4758 PrintAttributes(that);

	4759 OS::Print(" n%p -> n%p;\n", that, that->on_success());

	4760 Visit(that->on_success());

	4761 }

	4762

	4763

	4764 void DotPrinter::VisitEnd(EndNode* that) {

	4765 OS::Print(" n%p [style=bold, shape=point];\n", that);

	4766 PrintAttributes(that);

	4767 }

	4768

	4769

	4770 void DotPrinter::VisitAssertion(AssertionNode* that) {

	4771 OS::Print(" n%p [", that);

	4772 switch (that->assertion_type()) {

	4773 case AssertionNode::AT_END:

	4774 OS::Print("label=\"$\", shape=septagon");

	4775 break;

	4776 case AssertionNode::AT_START:

	4777 OS::Print("label=\"^\", shape=septagon");

	4778 break;

	4779 case AssertionNode::AT_BOUNDARY:

	4780 OS::Print("label=\"\\b\", shape=septagon");

	4781 break;

	4782 case AssertionNode::AT_NON_BOUNDARY:

	4783 OS::Print("label=\"\\B\", shape=septagon");

	4784 break;

	4785 case AssertionNode::AFTER_NEWLINE:

	4786 OS::Print("label=\"(?<=\\n)\", shape=septagon");

	4787 break;

	4788 }

	4789 OS::Print("];\n");

	4790 PrintAttributes(that);

	4791 RegExpNode* successor = that->on_success();

	4792 OS::Print(" n%p -> n%p;\n", that, successor);

	4793 Visit(successor);

	4794 }

	4795

	4796

	4797 void DotPrinter::VisitAction(ActionNode* that) {

	4798 OS::Print(" n%p [", that);

	4799 switch (that->action_type_) {

	4800 case ActionNode::SET_REGISTER:

	4801 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon",

	4802 that->data_.u_store_register.reg,

	4803 that->data_.u_store_register.value);

	4804 break;

	4805 case ActionNode::INCREMENT_REGISTER:

	4806 OS::Print("label=\"$%" Pd "++\", shape=octagon",

	4807 that->data_.u_increment_register.reg);

	4808 break;

	4809 case ActionNode::STORE_POSITION:

	4810 OS::Print("label=\"$%" Pd ":=$pos\", shape=octagon",

	4811 that->data_.u_position_register.reg);

	4812 break;

	4813 case ActionNode::BEGIN_SUBMATCH:

	4814 OS::Print("label=\"$%" Pd ":=$pos,begin\", shape=septagon",

	4815 that->data_.u_submatch.current_position_register);

	4816 break;

	4817 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:

	4818 OS::Print("label=\"escape\", shape=septagon");

	4819 break;

	4820 case ActionNode::EMPTY_MATCH_CHECK:

	4821 OS::Print("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon",

	4822 that->data_.u_empty_match_check.start_register,

	4823 that->data_.u_empty_match_check.repetition_register,

	4824 that->data_.u_empty_match_check.repetition_limit);

	4825 break;

	4826 case ActionNode::CLEAR_CAPTURES: {

	4827 OS::Print("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon",

	4828 that->data_.u_clear_captures.range_from,

	4829 that->data_.u_clear_captures.range_to);

	4830 break;

	4831 }

	4832 }

	4833 OS::Print("];\n");

	4834 PrintAttributes(that);

	4835 RegExpNode* successor = that->on_success();

	4836 OS::Print(" n%p -> n%p;\n", that, successor);

	4837 Visit(successor);

	4838 }

	4839

	4840

	4841 void RegExpEngine::DotPrint(const char* label,

	4842 RegExpNode* node,

	4843 bool ignore_case) {

	4844 DotPrinter printer(ignore_case);

	4845 printer.PrintNode(label, node);

	4846 }

	4847

	4848

	4849 #endif // DEBUG

	4850

	4851 } // namespace dart

OLD	NEW

« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »