runtime/vm/regexp.cc - Issue 539153002: Port and integrate the irregexp engine from V8

Side by Side Diff: runtime/vm/regexp.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Missed a TODO. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 #include "vm/regexp.h"

	6

	7 #include "vm/dart_entry.h"

	8 #include "vm/regexp_assembler.h"

	9 #include "vm/regexp_ast.h"

	10 #include "vm/unibrow-inl.h"

	11 #include "vm/unicode.h"

	12 #include "vm/symbols.h"

	13

	14 #define I isolate()

	15 #define CI compiler->isolate()

	16

	17 namespace dart {

	18

	19 DECLARE_FLAG(bool, trace_irregexp);

	20

	21 #define DEFINE_ACCEPT(Type) \

	22 void Type##Node::Accept(NodeVisitor* visitor) { \

	23 visitor->Visit##Type(this); \

	24 }

	25 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)

	26 #undef DEFINE_ACCEPT

	27

	28

	29 // Default to generating optimized regexp code.

	30 static const bool kRegexpOptimization = true;

	31

	32 // More makes code generation slower, less makes V8 benchmark score lower.

	33 static const intptr_t kMaxLookaheadForBoyerMoore = 8;

	34

	35 // In a 3-character pattern you can maximally step forwards 3 characters

	36 // at a time, which is not always enough to pay for the extra logic.

	37 static const intptr_t kPatternTooShortForBoyerMoore = 2;

	38

	39 // The '2' variant has inclusive from and exclusive to.

	40 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,

	41 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.

	42 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,

	43 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,

	44 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,

	45 0xFEFF, 0xFF00, 0x10000 };

	46 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

	47 static const intptr_t kWordRanges[] = {

	48 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };

	49 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);

	50 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 };

	51 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

	52 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };

	53 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);

	54 static const intptr_t kLineTerminatorRanges[] = {

	55 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 };

	56 static const intptr_t kLineTerminatorRangeCount =

	57 ARRAY_SIZE(kLineTerminatorRanges);

	58

	59

	60 static inline void PrintUtf16(uint16_t c) {

	61 const char* format = (0x20 <= c && c <= 0x7F) ?

	62 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";

	63 OS::Print(format, c);

	64 }

	65

	66

	67 // We need to check for the following characters: 0x39c 0x3bc 0x178.

	68 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {

	69 // TODO(dcarney): this could be a lot more efficient.

	70 return range.Contains(0x39c) \|\|

	71 range.Contains(0x3bc) \|\| range.Contains(0x178);

	72 }

	73

	74

	75 static bool RangesContainLatin1Equivalents(

	76 ZoneGrowableArray<CharacterRange>* ranges) {

	77 for (intptr_t i = 0; i < ranges->length(); i++) {

	78 // TODO(dcarney): this could be a lot more efficient.

	79 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true;

	80 }

	81 return false;

	82 }

	83

	84 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) {

	85 ASSERT(c > Symbols::kMaxOneCharCodeSymbol);

	86 switch (c) {

	87 // This are equivalent characters in unicode.

	88 case 0x39c:

	89 case 0x3bc:

	90 return 0xb5;

	91 // This is an uppercase of a Latin-1 character

	92 // outside of Latin-1.

	93 case 0x178:

	94 return 0xff;

	95 }

	96 return 0;

	97 }

	98

	99

	100 class VisitMarker : public ValueObject {

	101 public:

	102 explicit VisitMarker(NodeInfo* info) : info_(info) {

	103 ASSERT(!info->visited);

	104 info->visited = true;

	105 }

	106 ~VisitMarker() {

	107 info_->visited = false;

	108 }

	109 private:

	110 NodeInfo* info_;

	111 };

	112

	113

	114 class FrequencyCollator : public ValueObject {

	115 public:

	116 FrequencyCollator() : total_samples_(0) {

	117 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {

	118 frequencies_[i] = CharacterFrequency(i);

	119 }

	120 }

	121

	122 void CountCharacter(intptr_t character) {

	123 intptr_t index = (character & RegExpMacroAssembler::kTableMask);

	124 frequencies_[index].Increment();

	125 total_samples_++;

	126 }

	127

	128 // Does not measure in percent, but rather per-128 (the table size from the

	129 // regexp macro assembler).

	130 intptr_t Frequency(intptr_t in_character) {

	131 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);

	132 if (total_samples_ < 1) return 1; // Division by zero.

	133 intptr_t freq_in_per128 =

	134 (frequencies_[in_character].counter() * 128) / total_samples_;

	135 return freq_in_per128;

	136 }

	137

	138 private:

	139 class CharacterFrequency {

	140 public:

	141 CharacterFrequency() : counter_(0), character_(-1) { }

	142 explicit CharacterFrequency(intptr_t character)

	143 : counter_(0), character_(character) { }

	144

	145 void Increment() { counter_++; }

	146 intptr_t counter() { return counter_; }

	147 intptr_t character() { return character_; }

	148

	149 private:

	150 intptr_t counter_;

	151 intptr_t character_;

	152

	153 DISALLOW_ALLOCATION();

	154 };

	155

	156

	157 private:

	158 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];

	159 intptr_t total_samples_;

	160 };

	161

	162

	163 class RegExpCompiler : public ValueObject {

	164 public:

	165 RegExpCompiler(intptr_t capture_count,

	166 bool ignore_case,

	167 intptr_t specialization_cid);

	168

	169 intptr_t AllocateRegister() {

	170 return next_register_++;

	171 }

	172

	173 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,

	174 RegExpNode* start,

	175 intptr_t capture_count,

	176 const String& pattern);

	177

	178 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

	179

	180 static const intptr_t kImplementationOffset = 0;

	181 static const intptr_t kNumberOfRegistersOffset = 0;

	182 static const intptr_t kCodeOffset = 1;

	183

	184 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

	185 EndNode* accept() { return accept_; }

	186

	187 static const intptr_t kMaxRecursion = 100;

	188 inline intptr_t recursion_depth() { return recursion_depth_; }

	189 inline void IncrementRecursionDepth() { recursion_depth_++; }

	190 inline void DecrementRecursionDepth() { recursion_depth_--; }

	191

	192 void SetRegExpTooBig() { reg_exp_too_big_ = true; }

	193

	194 inline bool ignore_case() { return ignore_case_; }

	195 inline bool ascii() const {

	196 return (specialization_cid_ == kOneByteStringCid \|\|

	197 specialization_cid_ == kExternalOneByteStringCid);

	198 }

	199 inline intptr_t specialization_cid() { return specialization_cid_; }

	200 FrequencyCollator* frequency_collator() { return &frequency_collator_; }

	201

	202 intptr_t current_expansion_factor() { return current_expansion_factor_; }

	203 void set_current_expansion_factor(intptr_t value) {

	204 current_expansion_factor_ = value;

	205 }

	206

	207 Isolate* isolate() const { return isolate_; }

	208

	209 static const intptr_t kNoRegister = -1;

	210

	211 private:

	212 EndNode* accept_;

	213 intptr_t next_register_;

	214 ZoneGrowableArray<RegExpNode> work_list_;

	215 intptr_t recursion_depth_;

	216 IRRegExpMacroAssembler* macro_assembler_;

	217 bool ignore_case_;

	218 intptr_t specialization_cid_;

	219 bool reg_exp_too_big_;

	220 intptr_t current_expansion_factor_;

	221 FrequencyCollator frequency_collator_;

	222 Isolate* isolate_;

	223 };

	224

	225

	226 class RecursionCheck : public ValueObject {

	227 public:

	228 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

	229 compiler->IncrementRecursionDepth();

	230 }

	231 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

	232 private:

	233 RegExpCompiler* compiler_;

	234 };

	235

	236

	237 // Scoped object to keep track of how much we unroll quantifier loops in the

	238 // regexp graph generator.

	239 class RegExpExpansionLimiter : public ValueObject {

	240 public:

	241 static const intptr_t kMaxExpansionFactor = 6;

	242 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)

	243 : compiler_(compiler),

	244 saved_expansion_factor_(compiler->current_expansion_factor()),

	245 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {

	246 ASSERT(factor > 0);

	247 if (ok_to_expand_) {

	248 if (factor > kMaxExpansionFactor) {

	249 // Avoid integer overflow of the current expansion factor.

	250 ok_to_expand_ = false;

	251 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);

	252 } else {

	253 intptr_t new_factor = saved_expansion_factor_ * factor;

	254 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);

	255 compiler->set_current_expansion_factor(new_factor);

	256 }

	257 }

	258 }

	259

	260 ~RegExpExpansionLimiter() {

	261 compiler_->set_current_expansion_factor(saved_expansion_factor_);

	262 }

	263

	264 bool ok_to_expand() { return ok_to_expand_; }

	265

	266 private:

	267 RegExpCompiler* compiler_;

	268 intptr_t saved_expansion_factor_;

	269 bool ok_to_expand_;

	270

	271 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);

	272 };

	273

	274

	275 // Node generation -------------------------------------------------------------

	276

	277 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

	278 RegExpNode* on_success) {

	279 ZoneGrowableArray<TextElement>* elms =

	280 new(CI) ZoneGrowableArray<TextElement>(1);

	281 elms->Add(TextElement::Atom(this));

	282 return new(CI) TextNode(elms, on_success);

	283 }

	284

	285

	286 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

	287 RegExpNode* on_success) {

	288 ZoneGrowableArray<TextElement>* elms =

	289 new(CI) ZoneGrowableArray<TextElement>(1);

	290 for (intptr_t i = 0; i < elements()->length(); i++) {

	291 elms->Add(elements()->At(i));

	292 }

	293 return new(CI) TextNode(elms, on_success);

	294 }

	295

	296

	297 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

	298 RegExpNode* on_success) {

	299 return new(CI) TextNode(this, on_success);

	300 }

	301

	302

	303 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,

	304 RegExpNode* on_success) {

	305 ZoneGrowableArray<RegExpTree> alternatives = this->alternatives();

	306 intptr_t length = alternatives->length();

	307 ChoiceNode* result =

	308 new(CI) ChoiceNode(length, CI);

	309 for (intptr_t i = 0; i < length; i++) {

	310 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler,

	311 on_success));

	312 result->AddAlternative(alternative);

	313 }

	314 return result;

	315 }

	316

	317

	318 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,

	319 RegExpNode* on_success) {

	320 return ToNode(min(),

	321 max(),

	322 is_greedy(),

	323 body(),

	324 compiler,

	325 on_success);

	326 }

	327

	328

	329 RegExpNode* RegExpQuantifier::ToNode(intptr_t min,

	330 intptr_t max,

	331 bool is_greedy,

	332 RegExpTree* body,

	333 RegExpCompiler* compiler,

	334 RegExpNode* on_success,

	335 bool not_at_start) {

	336 // x{f, t} becomes this:

	337 //

	338 // (r++)<-.

	339 // \| `

	340 // \| (x)

	341 // v ^

	342 // (r=0)-->(?)---/ [if r < t]

	343 // \|

	344 // [if r >= f] \----> ...

	345 //

	346

	347 // 15.10.2.5 RepeatMatcher algorithm.

	348 // The parser has already eliminated the case where max is 0. In the case

	349 // where max_match is zero the parser has removed the quantifier if min was

	350 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.

	351

	352 // If we know that we cannot match zero length then things are a little

	353 // simpler since we don't need to make the special zero length match check

	354 // from step 2.1. If the min and max are small we can unroll a little in

	355 // this case.

	356 // Unroll (foo)+ and (foo){3,}

	357 static const intptr_t kMaxUnrolledMinMatches = 3;

	358 // Unroll (foo)? and (foo){x,3}

	359 static const intptr_t kMaxUnrolledMaxMatches = 3;

	360 if (max == 0) return on_success; // This can happen due to recursion.

	361 bool body_can_be_empty = (body->min_match() == 0);

	362 intptr_t body_start_reg = RegExpCompiler::kNoRegister;

	363 Interval capture_registers = body->CaptureRegisters();

	364 bool needs_capture_clearing = !capture_registers.is_empty();

	365 Isolate* isolate = compiler->isolate();

	366

	367 if (body_can_be_empty) {

	368 body_start_reg = compiler->AllocateRegister();

	369 } else if (kRegexpOptimization && !needs_capture_clearing) {

	370 // Only unroll if there are no captures and the body can't be

	371 // empty.

	372 {

	373 RegExpExpansionLimiter limiter(

	374 compiler, min + ((max != min) ? 1 : 0));

	375 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {

	376 intptr_t new_max = (max == kInfinity) ? max : max - min;

	377 // Recurse once to get the loop or optional matches after the fixed

	378 // ones.

	379 RegExpNode* answer = ToNode(

	380 0, new_max, is_greedy, body, compiler, on_success, true);

	381 // Unroll the forced matches from 0 to min. This can cause chains of

	382 // TextNodes (which the parser does not generate). These should be

	383 // combined if it turns out they hinder good code generation.

	384 for (intptr_t i = 0; i < min; i++) {

	385 answer = body->ToNode(compiler, answer);

	386 }

	387 return answer;

	388 }

	389 }

	390 if (max <= kMaxUnrolledMaxMatches && min == 0) {

	391 ASSERT(max > 0); // Due to the 'if' above.

	392 RegExpExpansionLimiter limiter(compiler, max);

	393 if (limiter.ok_to_expand()) {

	394 // Unroll the optional matches up to max.

	395 RegExpNode* answer = on_success;

	396 for (intptr_t i = 0; i < max; i++) {

	397 ChoiceNode* alternation = new(isolate) ChoiceNode(2, isolate);

	398 if (is_greedy) {

	399 alternation->AddAlternative(

	400 GuardedAlternative(body->ToNode(compiler, answer)));

	401 alternation->AddAlternative(GuardedAlternative(on_success));

	402 } else {

	403 alternation->AddAlternative(GuardedAlternative(on_success));

	404 alternation->AddAlternative(

	405 GuardedAlternative(body->ToNode(compiler, answer)));

	406 }

	407 answer = alternation;

	408 if (not_at_start) alternation->set_not_at_start();

	409 }

	410 return answer;

	411 }

	412 }

	413 }

	414 bool has_min = min > 0;

	415 bool has_max = max < RegExpTree::kInfinity;

	416 bool needs_counter = has_min \|\| has_max;

	417 intptr_t reg_ctr = needs_counter

	418 ? compiler->AllocateRegister()

	419 : RegExpCompiler::kNoRegister;

	420 LoopChoiceNode* center = new(isolate) LoopChoiceNode(body->min_match() == 0,

	421 isolate);

	422 if (not_at_start) center->set_not_at_start();

	423 RegExpNode* loop_return = needs_counter

	424 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))

	425 : static_cast<RegExpNode*>(center);

	426 if (body_can_be_empty) {

	427 // If the body can be empty we need to check if it was and then

	428 // backtrack.

	429 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,

	430 reg_ctr,

	431 min,

	432 loop_return);

	433 }

	434 RegExpNode* body_node = body->ToNode(compiler, loop_return);

	435 if (body_can_be_empty) {

	436 // If the body can be empty we need to store the start position

	437 // so we can bail out if it was empty.

	438 body_node = ActionNode::StorePosition(body_start_reg, false, body_node);

	439 }

	440 if (needs_capture_clearing) {

	441 // Before entering the body of this loop we need to clear captures.

	442 body_node = ActionNode::ClearCaptures(capture_registers, body_node);

	443 }

	444 GuardedAlternative body_alt(body_node);

	445 if (has_max) {

	446 Guard* body_guard =

	447 new(isolate) Guard(reg_ctr, Guard::LT, max);

	448 body_alt.AddGuard(body_guard, isolate);

	449 }

	450 GuardedAlternative rest_alt(on_success);

	451 if (has_min) {

	452 Guard* rest_guard = new(isolate) Guard(reg_ctr, Guard::GEQ, min);

	453 rest_alt.AddGuard(rest_guard, isolate);

	454 }

	455 if (is_greedy) {

	456 center->AddLoopAlternative(body_alt);

	457 center->AddContinueAlternative(rest_alt);

	458 } else {

	459 center->AddContinueAlternative(rest_alt);

	460 center->AddLoopAlternative(body_alt);

	461 }

	462 if (needs_counter) {

	463 return ActionNode::SetRegister(reg_ctr, 0, center);

	464 } else {

	465 return center;

	466 }

	467 }

	468

	469

	470 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,

	471 RegExpNode* on_success) {

	472 switch (assertion_type()) {

	473 case START_OF_LINE:

	474 return AssertionNode::AfterNewline(on_success);

	475 case START_OF_INPUT:

	476 return AssertionNode::AtStart(on_success);

	477 case BOUNDARY:

	478 return AssertionNode::AtBoundary(on_success);

	479 case NON_BOUNDARY:

	480 return AssertionNode::AtNonBoundary(on_success);

	481 case END_OF_INPUT:

	482 return AssertionNode::AtEnd(on_success);

	483 case END_OF_LINE: {

	484 // Compile $ in multiline regexps as an alternation with a positive

	485 // lookahead in one side and an end-of-input on the other side.

	486 // We need two registers for the lookahead.

	487 intptr_t stack_pointer_register = compiler->AllocateRegister();

	488 intptr_t position_register = compiler->AllocateRegister();

	489 // The ChoiceNode to distinguish between a newline and end-of-input.

	490 ChoiceNode* result = new ChoiceNode(2, on_success->isolate());

	491 // Create a newline atom.

	492 ZoneGrowableArray<CharacterRange>* newline_ranges =

	493 new ZoneGrowableArray<CharacterRange>(3);

	494 CharacterRange::AddClassEscape('n', newline_ranges);

	495 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');

	496 TextNode* newline_matcher = new TextNode(

	497 newline_atom,

	498 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

	499 position_register,

	500 0, // No captures inside.

	501 -1, // Ignored if no captures.

	502 on_success));

	503 // Create an end-of-input matcher.

	504 RegExpNode* end_of_line = ActionNode::BeginSubmatch(

	505 stack_pointer_register,

	506 position_register,

	507 newline_matcher);

	508 // Add the two alternatives to the ChoiceNode.

	509 GuardedAlternative eol_alternative(end_of_line);

	510 result->AddAlternative(eol_alternative);

	511 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));

	512 result->AddAlternative(end_alternative);

	513 return result;

	514 }

	515 default:

	516 UNREACHABLE();

	517 }

	518 return on_success;

	519 }

	520

	521

	522 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,

	523 RegExpNode* on_success) {

	524 return new(CI)

	525 BackReferenceNode(RegExpCapture::StartRegister(index()),

	526 RegExpCapture::EndRegister(index()),

	527 on_success);

	528 }

	529

	530

	531 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

	532 RegExpNode* on_success) {

	533 return on_success;

	534 }

	535

	536

	537 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,

	538 RegExpNode* on_success) {

	539 intptr_t stack_pointer_register = compiler->AllocateRegister();

	540 intptr_t position_register = compiler->AllocateRegister();

	541

	542 const intptr_t registers_per_capture = 2;

	543 const intptr_t register_of_first_capture = 2;

	544 intptr_t register_count = capture_count_ * registers_per_capture;

	545 intptr_t register_start =

	546 register_of_first_capture + capture_from_ * registers_per_capture;

	547

	548 RegExpNode* success;

	549 if (is_positive()) {

	550 RegExpNode* node = ActionNode::BeginSubmatch(

	551 stack_pointer_register,

	552 position_register,

	553 body()->ToNode(

	554 compiler,

	555 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

	556 position_register,

	557 register_count,

	558 register_start,

	559 on_success)));

	560 return node;

	561 } else {

	562 // We use a ChoiceNode for a negative lookahead because it has most of

	563 // the characteristics we need. It has the body of the lookahead as its

	564 // first alternative and the expression after the lookahead of the second

	565 // alternative. If the first alternative succeeds then the

	566 // NegativeSubmatchSuccess will unwind the stack including everything the

	567 // choice node set up and backtrack. If the first alternative fails then

	568 // the second alternative is tried, which is exactly the desired result

	569 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special

	570 // ChoiceNode that knows to ignore the first exit when calculating quick

	571 // checks.

	572

	573 GuardedAlternative body_alt(

	574 body()->ToNode(

	575 compiler,

	576 success = new(CI) NegativeSubmatchSuccess(stack_pointer_register,

	577 position_register,

	578 register_count,

	579 register_start,

	580 CI)));

	581 ChoiceNode* choice_node =

	582 new(CI) NegativeLookaheadChoiceNode(body_alt,

	583 GuardedAlternative(on_success),

	584 CI);

	585 return ActionNode::BeginSubmatch(stack_pointer_register,

	586 position_register,

	587 choice_node);

	588 }

	589 }

	590

	591

	592 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

	593 RegExpNode* on_success) {

	594 return ToNode(body(), index(), compiler, on_success);

	595 }

	596

	597

	598 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,

	599 intptr_t index,

	600 RegExpCompiler* compiler,

	601 RegExpNode* on_success) {

	602 intptr_t start_reg = RegExpCapture::StartRegister(index);

	603 intptr_t end_reg = RegExpCapture::EndRegister(index);

	604 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);

	605 RegExpNode* body_node = body->ToNode(compiler, store_end);

	606 return ActionNode::StorePosition(start_reg, true, body_node);

	607 }

	608

	609

	610 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,

	611 RegExpNode* on_success) {

	612 ZoneGrowableArray<RegExpTree> children = nodes();

	613 RegExpNode* current = on_success;

	614 for (intptr_t i = children->length() - 1; i >= 0; i--) {

	615 current = children->At(i)->ToNode(compiler, current);

	616 }

	617 return current;

	618 }

	619

	620

	621 // ASCII filtering -------------------------------------------------------------

	622

	623

	624 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) {

	625 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);

	626 if (next == NULL) return set_replacement(NULL);

	627 on_success_ = next;

	628 return set_replacement(this);

	629 }

	630

	631

	632 RegExpNode* SeqRegExpNode::FilterASCII(intptr_t depth, bool ignore_case) {

	633 if (info()->replacement_calculated) return replacement();

	634 if (depth < 0) return this;

	635 ASSERT(!info()->visited);

	636 VisitMarker marker(info());

	637 return FilterSuccessor(depth - 1, ignore_case);

	638 }

	639

	640

	641 RegExpNode* TextNode::FilterASCII(intptr_t depth, bool ignore_case) {

	642 if (info()->replacement_calculated) return replacement();

	643 if (depth < 0) return this;

	644 ASSERT(!info()->visited);

	645 VisitMarker marker(info());

	646 intptr_t element_count = elms_->length();

	647 for (intptr_t i = 0; i < element_count; i++) {

	648 TextElement elm = elms_->At(i);

	649 if (elm.text_type() == TextElement::ATOM) {

	650 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	651 for (intptr_t j = 0; j < quarks->length(); j++) {

	652 uint16_t c = quarks->At(j);

	653 if (c <= Symbols::kMaxOneCharCodeSymbol) continue;

	654 if (!ignore_case) return set_replacement(NULL);

	655 // Here, we need to check for characters whose upper and lower cases

	656 // are outside the Latin-1 range.

	657 uint16_t converted = ConvertNonLatin1ToLatin1(c);

	658 // Character is outside Latin-1 completely

	659 if (converted == 0) return set_replacement(NULL);

	660 // Convert quark to Latin-1 in place.

	661 (*quarks)[0] = converted;

	662 }

	663 } else {

	664 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

	665 RegExpCharacterClass* cc = elm.char_class();

	666 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	667 if (!CharacterRange::IsCanonical(ranges)) {

	668 CharacterRange::Canonicalize(ranges);

	669 }

	670 // Now they are in order so we only need to look at the first.

	671 intptr_t range_count = ranges->length();

	672 if (cc->is_negated()) {

	673 if (range_count != 0 &&

	674 ranges->At(0).from() == 0 &&

	675 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) {

	676 // This will be handled in a later filter.

	677 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

	678 return set_replacement(NULL);

	679 }

	680 } else {

	681 if (range_count == 0 \|\|

	682 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) {

	683 // This will be handled in a later filter.

	684 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

	685 return set_replacement(NULL);

	686 }

	687 }

	688 }

	689 }

	690 return FilterSuccessor(depth - 1, ignore_case);

	691 }

	692

	693

	694 RegExpNode* LoopChoiceNode::FilterASCII(intptr_t depth, bool ignore_case) {

	695 if (info()->replacement_calculated) return replacement();

	696 if (depth < 0) return this;

	697 if (info()->visited) return this;

	698 {

	699 VisitMarker marker(info());

	700

	701 RegExpNode* continue_replacement =

	702 continue_node_->FilterASCII(depth - 1, ignore_case);

	703 // If we can't continue after the loop then there is no sense in doing the

	704 // loop.

	705 if (continue_replacement == NULL) return set_replacement(NULL);

	706 }

	707

	708 return ChoiceNode::FilterASCII(depth - 1, ignore_case);

	709 }

	710

	711

	712 RegExpNode* ChoiceNode::FilterASCII(intptr_t depth, bool ignore_case) {

	713 if (info()->replacement_calculated) return replacement();

	714 if (depth < 0) return this;

	715 if (info()->visited) return this;

	716 VisitMarker marker(info());

	717 intptr_t choice_count = alternatives_->length();

	718

	719 for (intptr_t i = 0; i < choice_count; i++) {

	720 GuardedAlternative alternative = alternatives_->At(i);

	721 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {

	722 set_replacement(this);

	723 return this;

	724 }

	725 }

	726

	727 intptr_t surviving = 0;

	728 RegExpNode* survivor = NULL;

	729 for (intptr_t i = 0; i < choice_count; i++) {

	730 GuardedAlternative alternative = alternatives_->At(i);

	731 RegExpNode* replacement =

	732 alternative.node()->FilterASCII(depth - 1, ignore_case);

	733 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK.

	734 if (replacement != NULL) {

	735 (*alternatives_)[i].set_node(replacement);

	736 surviving++;

	737 survivor = replacement;

	738 }

	739 }

	740 if (surviving < 2) return set_replacement(survivor);

	741

	742 set_replacement(this);

	743 if (surviving == choice_count) {

	744 return this;

	745 }

	746 // Only some of the nodes survived the filtering. We need to rebuild the

	747 // alternatives list.

	748 ZoneGrowableArray<GuardedAlternative>* new_alternatives =

	749 new(I) ZoneGrowableArray<GuardedAlternative>(surviving);

	750 for (intptr_t i = 0; i < choice_count; i++) {

	751 RegExpNode* replacement =

	752 (*alternatives_)[i].node()->FilterASCII(depth - 1, ignore_case);

	753 if (replacement != NULL) {

	754 (*alternatives_)[i].set_node(replacement);

	755 new_alternatives->Add((*alternatives_)[i]);

	756 }

	757 }

	758 alternatives_ = new_alternatives;

	759 return this;

	760 }

	761

	762

	763 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(intptr_t depth,

	764 bool ignore_case) {

	765 if (info()->replacement_calculated) return replacement();

	766 if (depth < 0) return this;

	767 if (info()->visited) return this;

	768 VisitMarker marker(info());

	769 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	770 // afterwards.

	771 RegExpNode* node = (*alternatives_)[1].node();

	772 RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);

	773 if (replacement == NULL) return set_replacement(NULL);

	774 (*alternatives_)[1].set_node(replacement);

	775

	776 RegExpNode* neg_node = (*alternatives_)[0].node();

	777 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);

	778 // If the negative lookahead is always going to fail then

	779 // we don't need to check it.

	780 if (neg_replacement == NULL) return set_replacement(replacement);

	781 (*alternatives_)[0].set_node(neg_replacement);

	782 return set_replacement(this);

	783 }

	784

	785

	786 // Code emission ---------------------------------------------------------------

	787

	788

	789 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

	790 Guard* guard,

	791 Trace* trace) {

	792 switch (guard->op()) {

	793 case Guard::LT:

	794 ASSERT(!trace->mentions_reg(guard->reg()));

	795 macro_assembler->IfRegisterGE(guard->reg(),

	796 guard->value(),

	797 trace->backtrack());

	798 break;

	799 case Guard::GEQ:

	800 ASSERT(!trace->mentions_reg(guard->reg()));

	801 macro_assembler->IfRegisterLT(guard->reg(),

	802 guard->value(),

	803 trace->backtrack());

	804 break;

	805 }

	806 }

	807

	808

	809 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

	810 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	811

	812 // Omit flushing the trace. We discard the entire stack frame anyway.

	813

	814 if (!label()->IsBound()) {

	815 // We are completely independent of the trace, since we ignore it,

	816 // so this code can be used as the generic version.

	817 assembler->BindBlock(label());

	818 }

	819

	820 // Throw away everything on the backtrack stack since the start

	821 // of the negative submatch and restore the character position.

	822 assembler->ReadCurrentPositionFromRegister(current_position_register_);

	823 assembler->ReadStackPointerFromRegister(stack_pointer_register_);

	824 if (clear_capture_count_ > 0) {

	825 // Clear any captures that might have been performed during the success

	826 // of the body of the negative look-ahead.

	827 intptr_t clear_capture_end =

	828 clear_capture_start_ + clear_capture_count_ - 1;

	829 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);

	830 }

	831 // Now that we have unwound the stack we find at the top of the stack the

	832 // backtrack that the BeginSubmatch node got.

	833 assembler->Backtrack();

	834 }

	835

	836

	837 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {

	838 ASSERT(*cp_offset == 0);

	839 for (DeferredAction* action = actions_;

	840 action != NULL;

	841 action = action->next()) {

	842 if (action->Mentions(reg)) {

	843 if (action->action_type() == ActionNode::STORE_POSITION) {

	844 cp_offset = static_cast<DeferredCapture>(action)->cp_offset();

	845 return true;

	846 } else {

	847 return false;

	848 }

	849 }

	850 }

	851 return false;

	852 }

	853

	854

	855 // This is called as we come into a loop choice node and some other tricky

	856 // nodes. It normalizes the state of the code generator to ensure we can

	857 // generate generic code.

	858 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers,

	859 Isolate* isolate) {

	860 intptr_t max_register = RegExpCompiler::kNoRegister;

	861 for (DeferredAction* action = actions_;

	862 action != NULL;

	863 action = action->next()) {

	864 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {

	865 Interval range = static_cast<DeferredClearCaptures*>(action)->range();

	866 for (intptr_t i = range.from(); i <= range.to(); i++)

	867 affected_registers->Set(i, isolate);

	868 if (range.to() > max_register) max_register = range.to();

	869 } else {

	870 affected_registers->Set(action->reg(), isolate);

	871 if (action->reg() > max_register) max_register = action->reg();

	872 }

	873 }

	874 return max_register;

	875 }

	876

	877

	878 bool Trace::DeferredAction::Mentions(intptr_t that) {

	879 if (action_type() == ActionNode::CLEAR_CAPTURES) {

	880 Interval range = static_cast<DeferredClearCaptures*>(this)->range();

	881 return range.Contains(that);

	882 } else {

	883 return reg() == that;

	884 }

	885 }

	886

	887

	888 bool Trace::mentions_reg(intptr_t reg) {

	889 for (DeferredAction* action = actions_;

	890 action != NULL;

	891 action = action->next()) {

	892 if (action->Mentions(reg))

	893 return true;

	894 }

	895 return false;

	896 }

	897

	898

	899 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,

	900 intptr_t max_register,

	901 const OutSet& registers_to_pop,

	902 const OutSet& registers_to_clear) {

	903 for (intptr_t reg = max_register; reg >= 0; reg--) {

	904 if (registers_to_pop.Get(reg)) {

	905 assembler->PopRegister(reg);

	906 } else if (registers_to_clear.Get(reg)) {

	907 intptr_t clear_to = reg;

	908 while (reg > 0 && registers_to_clear.Get(reg - 1)) {

	909 reg--;

	910 }

	911 assembler->ClearRegisters(reg, clear_to);

	912 }

	913 }

	914 }

	915

	916

	917 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,

	918 intptr_t max_register,

	919 const OutSet& affected_registers,

	920 OutSet* registers_to_pop,

	921 OutSet* registers_to_clear,

	922 Isolate* isolate) {

	923 for (intptr_t reg = 0; reg <= max_register; reg++) {

	924 if (!affected_registers.Get(reg)) {

	925 continue;

	926 }

	927

	928 // The chronologically first deferred action in the trace

	929 // is used to infer the action needed to restore a register

	930 // to its previous state (or not, if it's safe to ignore it).

	931 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

	932 DeferredActionUndoType undo_action = IGNORE;

	933

	934 intptr_t value = 0;

	935 bool absolute = false;

	936 bool clear = false;

	937 intptr_t store_position = -1;

	938 // This is a little tricky because we are scanning the actions in reverse

	939 // historical order (newest first).

	940 for (DeferredAction* action = actions_;

	941 action != NULL;

	942 action = action->next()) {

	943 if (action->Mentions(reg)) {

	944 switch (action->action_type()) {

	945 case ActionNode::SET_REGISTER: {

	946 Trace::DeferredSetRegister* psr =

	947 static_cast<Trace::DeferredSetRegister*>(action);

	948 if (!absolute) {

	949 value += psr->value();

	950 absolute = true;

	951 }

	952 // SET_REGISTER is currently only used for newly introduced loop

	953 // counters. They can have a significant previous value if they

	954 // occour in a loop. TODO(lrn): Propagate this information, so

	955 // we can set undo_action to IGNORE if we know there is no value to

	956 // restore.

	957 undo_action = RESTORE;

	958 ASSERT(store_position == -1);

	959 ASSERT(!clear);

	960 break;

	961 }

	962 case ActionNode::INCREMENT_REGISTER:

	963 if (!absolute) {

	964 value++;

	965 }

	966 ASSERT(store_position == -1);

	967 ASSERT(!clear);

	968 undo_action = RESTORE;

	969 break;

	970 case ActionNode::STORE_POSITION: {

	971 Trace::DeferredCapture* pc =

	972 static_cast<Trace::DeferredCapture*>(action);

	973 if (!clear && store_position == -1) {

	974 store_position = pc->cp_offset();

	975 }

	976

	977 // For captures we know that stores and clears alternate.

	978 // Other register, are never cleared, and if the occur

	979 // inside a loop, they might be assigned more than once.

	980 if (reg <= 1) {

	981 // Registers zero and one, aka "capture zero", is

	982 // always set correctly if we succeed. There is no

	983 // need to undo a setting on backtrack, because we

	984 // will set it again or fail.

	985 undo_action = IGNORE;

	986 } else {

	987 undo_action = pc->is_capture() ? CLEAR : RESTORE;

	988 }

	989 ASSERT(!absolute);

	990 ASSERT(value == 0);

	991 break;

	992 }

	993 case ActionNode::CLEAR_CAPTURES: {

	994 // Since we're scanning in reverse order, if we've already

	995 // set the position we have to ignore historically earlier

	996 // clearing operations.

	997 if (store_position == -1) {

	998 clear = true;

	999 }

	1000 undo_action = RESTORE;

	1001 ASSERT(!absolute);

	1002 ASSERT(value == 0);

	1003 break;

	1004 }

	1005 default:

	1006 UNREACHABLE();

	1007 break;

	1008 }

	1009 }

	1010 }

	1011 // Prepare for the undo-action (e.g., push if it's going to be popped).

	1012 if (undo_action == RESTORE) {

	1013 assembler->PushRegister(reg);

	1014 registers_to_pop->Set(reg, isolate);

	1015 } else if (undo_action == CLEAR) {

	1016 registers_to_clear->Set(reg, isolate);

	1017 }

	1018 // Perform the chronologically last action (or accumulated increment)

	1019 // for the register.

	1020 if (store_position != -1) {

	1021 assembler->WriteCurrentPositionToRegister(reg, store_position);

	1022 } else if (clear) {

	1023 assembler->ClearRegisters(reg, reg);

	1024 } else if (absolute) {

	1025 assembler->SetRegister(reg, value);

	1026 } else if (value != 0) {

	1027 assembler->AdvanceRegister(reg, value);

	1028 }

	1029 }

	1030 }

	1031

	1032

	1033 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

	1034 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1035

	1036 ASSERT(!is_trivial());

	1037

	1038 if (actions_ == NULL && backtrack() == NULL) {

	1039 // Here we just have some deferred cp advances to fix and we are back to

	1040 // a normal situation. We may also have to forget some information gained

	1041 // through a quick check that was already performed.

	1042 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);

	1043 // Create a new trivial state and generate the node with that.

	1044 Trace new_state;

	1045 successor->Emit(compiler, &new_state);

	1046 return;

	1047 }

	1048

	1049 // Generate deferred actions here along with code to undo them again.

	1050 OutSet affected_registers;

	1051

	1052 if (backtrack() != NULL) {

	1053 // Here we have a concrete backtrack location. These are set up by choice

	1054 // nodes and so they indicate that we have a deferred save of the current

	1055 // position which we may need to emit here.

	1056 assembler->PushCurrentPosition();

	1057 }

	1058

	1059 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI);

	1060 OutSet registers_to_pop;

	1061 OutSet registers_to_clear;

	1062 PerformDeferredActions(assembler,

	1063 max_register,

	1064 affected_registers,

	1065 &registers_to_pop,

	1066 &registers_to_clear,

	1067 CI);

	1068 if (cp_offset_ != 0) {

	1069 assembler->AdvanceCurrentPosition(cp_offset_);

	1070 }

	1071

	1072 // Create a new trivial state and generate the node with that.

	1073 BlockLabel undo;

	1074 assembler->PushBacktrack(&undo);

	1075 Trace new_state;

	1076 successor->Emit(compiler, &new_state);

	1077

	1078 // On backtrack we need to restore state.

	1079 assembler->BindBlock(&undo);

	1080 RestoreAffectedRegisters(assembler,

	1081 max_register,

	1082 registers_to_pop,

	1083 registers_to_clear);

	1084 if (backtrack() == NULL) {

	1085 assembler->Backtrack();

	1086 } else {

	1087 assembler->PopCurrentPosition();

	1088 assembler->GoTo(backtrack());

	1089 }

	1090 }

	1091

	1092

	1093 void Trace::InvalidateCurrentCharacter() {

	1094 characters_preloaded_ = 0;

	1095 }

	1096

	1097

	1098 void Trace::AdvanceCurrentPositionInTrace(intptr_t by,

	1099 RegExpCompiler* compiler) {

	1100 ASSERT(by > 0);

	1101 // We don't have an instruction for shifting the current character register

	1102 // down or for using a shifted value for anything so lets just forget that

	1103 // we preloaded any characters into it.

	1104 characters_preloaded_ = 0;

	1105 // Adjust the offsets of the quick check performed information. This

	1106 // information is used to find out what we already determined about the

	1107 // characters by means of mask and compare.

	1108 quick_check_performed_.Advance(by, compiler->ascii());

	1109 cp_offset_ += by;

	1110 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

	1111 compiler->SetRegExpTooBig();

	1112 cp_offset_ = 0;

	1113 }

	1114 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0),

	1115 bound_checked_up_to_ - by);

	1116 }

	1117

	1118

	1119 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1120 if (!trace->is_trivial()) {

	1121 trace->Flush(compiler, this);

	1122 return;

	1123 }

	1124 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1125 if (!label()->IsBound()) {

	1126 assembler->BindBlock(label());

	1127 }

	1128 switch (action_) {

	1129 case ACCEPT:

	1130 assembler->Succeed();

	1131 return;

	1132 case BACKTRACK:

	1133 assembler->GoTo(trace->backtrack());

	1134 return;

	1135 case NEGATIVE_SUBMATCH_SUCCESS:

	1136 // This case is handled in a different virtual method.

	1137 UNREACHABLE();

	1138 }

	1139 UNIMPLEMENTED();

	1140 }

	1141

	1142

	1143 bool QuickCheckDetails::Rationalize(bool asc) {

	1144 bool found_useful_op = false;

	1145 uint32_t char_mask;

	1146 if (asc) {

	1147 char_mask = Symbols::kMaxOneCharCodeSymbol;

	1148 } else {

	1149 char_mask = Utf16::kMaxCodeUnit;

	1150 }

	1151 mask_ = 0;

	1152 value_ = 0;

	1153 intptr_t char_shift = 0;

	1154 for (intptr_t i = 0; i < characters_; i++) {

	1155 Position* pos = &positions_[i];

	1156 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) {

	1157 found_useful_op = true;

	1158 }

	1159 mask_ \|= (pos->mask & char_mask) << char_shift;

	1160 value_ \|= (pos->value & char_mask) << char_shift;

	1161 char_shift += asc ? 8 : 16;

	1162 }

	1163 return found_useful_op;

	1164 }

	1165

	1166

	1167 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

	1168 Trace* trace,

	1169 bool preload_has_checked_bounds,

	1170 BlockLabel* on_possible_success,

	1171 QuickCheckDetails* details,

	1172 bool fall_through_on_failure) {

	1173 if (details->characters() == 0) return false;

	1174 GetQuickCheckDetails(

	1175 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);

	1176 if (details->cannot_match()) return false;

	1177 if (!details->Rationalize(compiler->ascii())) return false;

	1178 ASSERT(details->characters() == 1 \|\|

	1179 compiler->macro_assembler()->CanReadUnaligned());

	1180 uint32_t mask = details->mask();

	1181 uint32_t value = details->value();

	1182

	1183 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1184

	1185 if (trace->characters_preloaded() != details->characters()) {

	1186 assembler->LoadCurrentCharacter(trace->cp_offset(),

	1187 trace->backtrack(),

	1188 !preload_has_checked_bounds,

	1189 details->characters());

	1190 }

	1191

	1192

	1193 bool need_mask = true;

	1194

	1195 if (details->characters() == 1) {

	1196 // If number of characters preloaded is 1 then we used a byte or 16 bit

	1197 // load so the value is already masked down.

	1198 uint32_t char_mask;

	1199 if (compiler->ascii()) {

	1200 char_mask = Symbols::kMaxOneCharCodeSymbol;

	1201 } else {

	1202 char_mask = Utf16::kMaxCodeUnit;

	1203 }

	1204 if ((mask & char_mask) == char_mask) need_mask = false;

	1205 mask &= char_mask;

	1206 } else {

	1207 // For 2-character preloads in ASCII mode or 1-character preloads in

	1208 // TWO_BYTE mode we also use a 16 bit load with zero extend.

	1209 if (details->characters() == 2 && compiler->ascii()) {

	1210 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1211 } else if (details->characters() == 1 && !compiler->ascii()) {

	1212 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1213 } else {

	1214 if (mask == 0xffffffff) need_mask = false;

	1215 }

	1216 }

	1217

	1218 if (fall_through_on_failure) {

	1219 if (need_mask) {

	1220 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

	1221 } else {

	1222 assembler->CheckCharacter(value, on_possible_success);

	1223 }

	1224 } else {

	1225 if (need_mask) {

	1226 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());

	1227 } else {

	1228 assembler->CheckNotCharacter(value, trace->backtrack());

	1229 }

	1230 }

	1231 return true;

	1232 }

	1233

	1234

	1235 // Emit the code to check for a ^ in multiline mode (1-character lookbehind

	1236 // that matches newline or the start of input).

	1237 static void EmitHat(RegExpCompiler* compiler,

	1238 RegExpNode* on_success,

	1239 Trace* trace) {

	1240 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1241 // We will be loading the previous character into the current character

	1242 // register.

	1243 Trace new_trace(*trace);

	1244 new_trace.InvalidateCurrentCharacter();

	1245

	1246 BlockLabel ok;

	1247 if (new_trace.cp_offset() == 0) {

	1248 // The start of input counts as a newline in this context, so skip to

	1249 // ok if we are at the start.

	1250 assembler->CheckAtStart(&ok);

	1251 }

	1252 // We already checked that we are not at the start of input so it must be

	1253 // OK to load the previous character.

	1254 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

	1255 new_trace.backtrack(),

	1256 false);

	1257 if (!assembler->CheckSpecialCharacterClass('n',

	1258 new_trace.backtrack())) {

	1259 // Newline means \n, \r, 0x2028 or 0x2029.

	1260 if (!compiler->ascii()) {

	1261 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

	1262 }

	1263 assembler->CheckCharacter('\n', &ok);

	1264 assembler->CheckNotCharacter('\r', new_trace.backtrack());

	1265 }

	1266 assembler->BindBlock(&ok);

	1267 on_success->Emit(compiler, &new_trace);

	1268 }

	1269

	1270

	1271 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1272 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1273 switch (assertion_type_) {

	1274 case AT_END: {

	1275 BlockLabel ok;

	1276 assembler->CheckPosition(trace->cp_offset(), &ok);

	1277 assembler->GoTo(trace->backtrack());

	1278 assembler->BindBlock(&ok);

	1279 break;

	1280 }

	1281 case AT_START: {

	1282 if (trace->at_start() == Trace::FALSE_VALUE) {

	1283 assembler->GoTo(trace->backtrack());

	1284 return;

	1285 }

	1286 if (trace->at_start() == Trace::UNKNOWN) {

	1287 assembler->CheckNotAtStart(trace->backtrack());

	1288 Trace at_start_trace = *trace;

	1289 at_start_trace.set_at_start(true);

	1290 on_success()->Emit(compiler, &at_start_trace);

	1291 return;

	1292 }

	1293 }

	1294 break;

	1295 case AFTER_NEWLINE:

	1296 EmitHat(compiler, on_success(), trace);

	1297 return;

	1298 case AT_BOUNDARY:

	1299 case AT_NON_BOUNDARY: {

	1300 EmitBoundaryCheck(compiler, trace);

	1301 return;

	1302 }

	1303 }

	1304 on_success()->Emit(compiler, trace);

	1305 }

	1306

	1307

	1308 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,

	1309 Trace* trace) {

	1310 // If we are generating a greedy loop then don't stop and don't reuse code.

	1311 if (trace->stop_node() != NULL) {

	1312 return CONTINUE;

	1313 }

	1314

	1315 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1316 if (trace->is_trivial()) {

	1317 if (label_.IsBound()) {

	1318 // We are being asked to generate a generic version, but that's already

	1319 // been done so just go to it.

	1320 macro_assembler->GoTo(&label_);

	1321 return DONE;

	1322 }

	1323 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {

	1324 // To avoid too deep recursion we push the node to the work queue and just

	1325 // generate a goto here.

	1326 compiler->AddWork(this);

	1327 macro_assembler->GoTo(&label_);

	1328 return DONE;

	1329 }

	1330 // Generate generic version of the node and bind the label for later use.

	1331 macro_assembler->BindBlock(&label_);

	1332 return CONTINUE;

	1333 }

	1334

	1335 // We are being asked to make a non-generic version. Keep track of how many

	1336 // non-generic versions we generate so as not to overdo it.

	1337 trace_count_++;

	1338 if (kRegexpOptimization &&

	1339 trace_count_ < kMaxCopiesCodeGenerated &&

	1340 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {

	1341 return CONTINUE;

	1342 }

	1343

	1344 // If we get here code has been generated for this node too many times or

	1345 // recursion is too deep. Time to switch to a generic version. The code for

	1346 // generic versions above can handle deep recursion properly.

	1347 trace->Flush(compiler, this);

	1348 return DONE;

	1349 }

	1350

	1351

	1352 // This generates the code to match a text node. A text node can contain

	1353 // straight character sequences (possibly to be matched in a case-independent

	1354 // way) and character classes. For efficiency we do not do this in a single

	1355 // pass from left to right. Instead we pass over the text node several times,

	1356 // emitting code for some character positions every time. See the comment on

	1357 // TextEmitPass for details.

	1358 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1359 LimitResult limit_result = LimitVersions(compiler, trace);

	1360 if (limit_result == DONE) return;

	1361 ASSERT(limit_result == CONTINUE);

	1362

	1363 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

	1364 compiler->SetRegExpTooBig();

	1365 return;

	1366 }

	1367

	1368 if (compiler->ascii()) {

	1369 intptr_t dummy = 0;

	1370 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

	1371 }

	1372

	1373 bool first_elt_done = false;

	1374 intptr_t bound_checked_to = trace->cp_offset() - 1;

	1375 bound_checked_to += trace->bound_checked_up_to();

	1376

	1377 // If a character is preloaded into the current character register then

	1378 // check that now.

	1379 if (trace->characters_preloaded() == 1) {

	1380 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1381 if (!SkipPass(pass, compiler->ignore_case())) {

	1382 TextEmitPass(compiler,

	1383 static_cast<TextEmitPassType>(pass),

	1384 true,

	1385 trace,

	1386 false,

	1387 &bound_checked_to);

	1388 }

	1389 }

	1390 first_elt_done = true;

	1391 }

	1392

	1393 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1394 if (!SkipPass(pass, compiler->ignore_case())) {

	1395 TextEmitPass(compiler,

	1396 static_cast<TextEmitPassType>(pass),

	1397 false,

	1398 trace,

	1399 first_elt_done,

	1400 &bound_checked_to);

	1401 }

	1402 }

	1403

	1404 Trace successor_trace(*trace);

	1405 successor_trace.set_at_start(false);

	1406 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

	1407 RecursionCheck rc(compiler);

	1408 on_success()->Emit(compiler, &successor_trace);

	1409 }

	1410

	1411

	1412 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1413 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1414 if (trace->stop_node() == this) {

	1415 intptr_t text_length =

	1416 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));

	1417 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);

	1418 // Update the counter-based backtracking info on the stack. This is an

	1419 // optimization for greedy loops (see below).

	1420 ASSERT(trace->cp_offset() == text_length);

	1421 macro_assembler->AdvanceCurrentPosition(text_length);

	1422 macro_assembler->GoTo(trace->loop_label());

	1423 return;

	1424 }

	1425 ASSERT(trace->stop_node() == NULL);

	1426 if (!trace->is_trivial()) {

	1427 trace->Flush(compiler, this);

	1428 return;

	1429 }

	1430 ChoiceNode::Emit(compiler, trace);

	1431 }

	1432

	1433

	1434 // This class is used when generating the alternatives in a choice node. It

	1435 // records the way the alternative is being code generated.

	1436 struct AlternativeGeneration {

	1437 AlternativeGeneration()

	1438 : possible_success(),

	1439 expects_preload(false),

	1440 after(),

	1441 quick_check_details() { }

	1442 BlockLabel possible_success;

	1443 bool expects_preload;

	1444 BlockLabel after;

	1445 QuickCheckDetails quick_check_details;

	1446 };

	1447

	1448

	1449 // Creates a list of AlternativeGenerations. If the list has a reasonable

	1450 // size then it is on the stack, otherwise the excess is on the heap.

	1451 class AlternativeGenerationList {

	1452 public:

	1453 explicit AlternativeGenerationList(intptr_t count)

	1454 : alt_gens_(count) {

	1455 for (intptr_t i = 0; i < count && i < kAFew; i++) {

	1456 alt_gens_.Add(a_few_alt_gens_ + i);

	1457 }

	1458 for (intptr_t i = kAFew; i < count; i++) {

	1459 alt_gens_.Add(new AlternativeGeneration());

	1460 }

	1461 }

	1462 ~AlternativeGenerationList() {

	1463 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {

	1464 delete alt_gens_[i];

	1465 alt_gens_[i] = NULL;

	1466 }

	1467 }

	1468

	1469 AlternativeGeneration* at(intptr_t i) {

	1470 return alt_gens_[i];

	1471 }

	1472

	1473 private:

	1474 static const intptr_t kAFew = 10;

	1475 GrowableArray<AlternativeGeneration*> alt_gens_;

	1476 AlternativeGeneration a_few_alt_gens_[kAFew];

	1477

	1478 DISALLOW_ALLOCATION();

	1479 };

	1480

	1481

	1482 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1483 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1484 intptr_t choice_count = alternatives_->length();

	1485

	1486 #ifdef DEBUG

	1487 for (intptr_t i = 0; i < choice_count - 1; i++) {

	1488 GuardedAlternative alternative = alternatives_->At(i);

	1489 ZoneGrowableArray<Guard> guards = alternative.guards();

	1490 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1491 for (intptr_t j = 0; j < guard_count; j++) {

	1492 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));

	1493 }

	1494 }

	1495 #endif

	1496

	1497 LimitResult limit_result = LimitVersions(compiler, trace);

	1498 if (limit_result == DONE) return;

	1499 ASSERT(limit_result == CONTINUE);

	1500

	1501 intptr_t new_flush_budget = trace->flush_budget() / choice_count;

	1502 if (trace->flush_budget() == 0 && trace->actions() != NULL) {

	1503 trace->Flush(compiler, this);

	1504 return;

	1505 }

	1506

	1507 RecursionCheck rc(compiler);

	1508

	1509 Trace* current_trace = trace;

	1510

	1511 intptr_t text_length =

	1512 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));

	1513 bool greedy_loop = false;

	1514 BlockLabel greedy_loop_label;

	1515 Trace counter_backtrack_trace;

	1516 counter_backtrack_trace.set_backtrack(&greedy_loop_label);

	1517 if (not_at_start()) counter_backtrack_trace.set_at_start(false);

	1518

	1519 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {

	1520 // Here we have special handling for greedy loops containing only text nodes

	1521 // and other simple nodes. These are handled by pushing the current

	1522 // position on the stack and then incrementing the current position each

	1523 // time around the switch. On backtrack we decrement the current position

	1524 // and check it against the pushed value. This avoids pushing backtrack

	1525 // information for each iteration of the loop, which could take up a lot of

	1526 // space.

	1527 greedy_loop = true;

	1528 ASSERT(trace->stop_node() == NULL);

	1529 macro_assembler->PushCurrentPosition();

	1530 current_trace = &counter_backtrack_trace;

	1531 BlockLabel greedy_match_failed;

	1532 Trace greedy_match_trace;

	1533 if (not_at_start()) greedy_match_trace.set_at_start(false);

	1534 greedy_match_trace.set_backtrack(&greedy_match_failed);

	1535 BlockLabel loop_label;

	1536 macro_assembler->BindBlock(&loop_label);

	1537 greedy_match_trace.set_stop_node(this);

	1538 greedy_match_trace.set_loop_label(&loop_label);

	1539 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace);

	1540 macro_assembler->BindBlock(&greedy_match_failed);

	1541 }

	1542

	1543 BlockLabel second_choice; // For use in greedy matches.

	1544 macro_assembler->BindBlock(&second_choice);

	1545

	1546 intptr_t first_normal_choice = greedy_loop ? 1 : 0;

	1547

	1548 bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;

	1549 const intptr_t kEatsAtLeastNotYetInitialized = -1;

	1550 intptr_t eats_at_least = kEatsAtLeastNotYetInitialized;

	1551

	1552 bool skip_was_emitted = false;

	1553

	1554 if (!greedy_loop && choice_count == 2) {

	1555 GuardedAlternative alt1 = (*alternatives_)[1];

	1556 if (alt1.guards() == NULL \|\| alt1.guards()->length() == 0) {

	1557 RegExpNode* eats_anything_node = alt1.node();

	1558 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==

	1559 this) {

	1560 // At this point we know that we are at a non-greedy loop that will eat

	1561 // any character one at a time. Any non-anchored regexp has such a

	1562 // loop prepended to it in order to find where it starts. We look for

	1563 // a pattern of the form ...abc... where we can look 6 characters ahead

	1564 // and step forwards 3 if the character is not one of abc. Abc need

	1565 // not be atoms, they can be any reasonably limited character class or

	1566 // small alternation.

	1567 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.

	1568 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	1569 if (lookahead == NULL) {

	1570 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore,

	1571 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	1572 kRecursionBudget,

	1573 not_at_start));

	1574 if (eats_at_least >= 1) {

	1575 BoyerMooreLookahead* bm =

	1576 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	1577 GuardedAlternative alt0 = alternatives_->At(0);

	1578 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	1579 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);

	1580 }

	1581 } else {

	1582 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);

	1583 }

	1584 }

	1585 }

	1586 }

	1587

	1588 if (eats_at_least == kEatsAtLeastNotYetInitialized) {

	1589 // Save some time by looking at most one machine word ahead.

	1590 eats_at_least =

	1591 EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);

	1592 }

	1593 intptr_t preload_characters =

	1594 CalculatePreloadCharacters(compiler, eats_at_least);

	1595

	1596 bool preload_is_current = !skip_was_emitted &&

	1597 (current_trace->characters_preloaded() == preload_characters);

	1598 bool preload_has_checked_bounds = preload_is_current;

	1599

	1600 AlternativeGenerationList alt_gens(choice_count);

	1601

	1602 // For now we just call all choices one after the other. The idea ultimately

	1603 // is to use the Dispatch table to try only the relevant ones.

	1604 for (intptr_t i = first_normal_choice; i < choice_count; i++) {

	1605 GuardedAlternative alternative = alternatives_->At(i);

	1606 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1607 alt_gen->quick_check_details.set_characters(preload_characters);

	1608 ZoneGrowableArray<Guard> guards = alternative.guards();

	1609 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1610 Trace new_trace(*current_trace);

	1611 new_trace.set_characters_preloaded(preload_is_current ?

	1612 preload_characters :

	1613 0);

	1614 if (preload_has_checked_bounds) {

	1615 new_trace.set_bound_checked_up_to(preload_characters);

	1616 }

	1617 new_trace.quick_check_performed()->Clear();

	1618 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);

	1619 alt_gen->expects_preload = preload_is_current;

	1620 bool generate_full_check_inline = false;

	1621 if (kRegexpOptimization &&

	1622 try_to_emit_quick_check_for_alternative(i) &&

	1623 alternative.node()->EmitQuickCheck(compiler,

	1624 &new_trace,

	1625 preload_has_checked_bounds,

	1626 &alt_gen->possible_success,

	1627 &alt_gen->quick_check_details,

	1628 i < choice_count - 1)) {

	1629 // Quick check was generated for this choice.

	1630 preload_is_current = true;

	1631 preload_has_checked_bounds = true;

	1632 // On the last choice in the ChoiceNode we generated the quick

	1633 // check to fall through on possible success. So now we need to

	1634 // generate the full check inline.

	1635 if (i == choice_count - 1) {

	1636 macro_assembler->BindBlock(&alt_gen->possible_success);

	1637 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1638 new_trace.set_characters_preloaded(preload_characters);

	1639 new_trace.set_bound_checked_up_to(preload_characters);

	1640 generate_full_check_inline = true;

	1641 }

	1642 } else if (alt_gen->quick_check_details.cannot_match()) {

	1643 if (i == choice_count - 1 && !greedy_loop) {

	1644 macro_assembler->GoTo(trace->backtrack());

	1645 }

	1646 continue;

	1647 } else {

	1648 // No quick check was generated. Put the full code here.

	1649 // If this is not the first choice then there could be slow checks from

	1650 // previous cases that go here when they fail. There's no reason to

	1651 // insist that they preload characters since the slow check we are about

	1652 // to generate probably can't use it.

	1653 if (i != first_normal_choice) {

	1654 alt_gen->expects_preload = false;

	1655 new_trace.InvalidateCurrentCharacter();

	1656 }

	1657 if (i < choice_count - 1) {

	1658 new_trace.set_backtrack(&alt_gen->after);

	1659 }

	1660 generate_full_check_inline = true;

	1661 }

	1662 if (generate_full_check_inline) {

	1663 if (new_trace.actions() != NULL) {

	1664 new_trace.set_flush_budget(new_flush_budget);

	1665 }

	1666 for (intptr_t j = 0; j < guard_count; j++) {

	1667 GenerateGuard(macro_assembler, guards->At(j), &new_trace);

	1668 }

	1669 alternative.node()->Emit(compiler, &new_trace);

	1670 preload_is_current = false;

	1671 }

	1672 macro_assembler->BindBlock(&alt_gen->after);

	1673 }

	1674 if (greedy_loop) {

	1675 macro_assembler->BindBlock(&greedy_loop_label);

	1676 // If we have unwound to the bottom then backtrack.

	1677 macro_assembler->CheckGreedyLoop(trace->backtrack());

	1678 // Otherwise try the second priority at an earlier position.

	1679 macro_assembler->AdvanceCurrentPosition(-text_length);

	1680 macro_assembler->GoTo(&second_choice);

	1681 }

	1682

	1683 // At this point we need to generate slow checks for the alternatives where

	1684 // the quick check was inlined. We can recognize these because the associated

	1685 // label was bound.

	1686 for (intptr_t i = first_normal_choice; i < choice_count - 1; i++) {

	1687 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1688 Trace new_trace(*current_trace);

	1689 // If there are actions to be flushed we have to limit how many times

	1690 // they are flushed. Take the budget of the parent trace and distribute

	1691 // it fairly amongst the children.

	1692 if (new_trace.actions() != NULL) {

	1693 new_trace.set_flush_budget(new_flush_budget);

	1694 }

	1695 EmitOutOfLineContinuation(compiler,

	1696 &new_trace,

	1697 alternatives_->At(i),

	1698 alt_gen,

	1699 preload_characters,

	1700 alt_gens.at(i + 1)->expects_preload);

	1701 }

	1702 }

	1703

	1704

	1705 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

	1706 Trace* trace,

	1707 GuardedAlternative alternative,

	1708 AlternativeGeneration* alt_gen,

	1709 intptr_t preload_characters,

	1710 bool next_expects_preload) {

	1711 if (!alt_gen->possible_success.IsLinked()) return;

	1712

	1713 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1714 macro_assembler->BindBlock(&alt_gen->possible_success);

	1715 Trace out_of_line_trace(*trace);

	1716 out_of_line_trace.set_characters_preloaded(preload_characters);

	1717 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1718 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);

	1719 ZoneGrowableArray<Guard> guards = alternative.guards();

	1720 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1721 if (next_expects_preload) {

	1722 BlockLabel reload_current_char;

	1723 out_of_line_trace.set_backtrack(&reload_current_char);

	1724 for (intptr_t j = 0; j < guard_count; j++) {

	1725 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1726 }

	1727 alternative.node()->Emit(compiler, &out_of_line_trace);

	1728 macro_assembler->BindBlock(&reload_current_char);

	1729 // Reload the current character, since the next quick check expects that.

	1730 // We don't need to check bounds here because we only get into this

	1731 // code through a quick check which already did the checked load.

	1732 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),

	1733 NULL,

	1734 false,

	1735 preload_characters);

	1736 macro_assembler->GoTo(&(alt_gen->after));

	1737 } else {

	1738 out_of_line_trace.set_backtrack(&(alt_gen->after));

	1739 for (intptr_t j = 0; j < guard_count; j++) {

	1740 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1741 }

	1742 alternative.node()->Emit(compiler, &out_of_line_trace);

	1743 }

	1744 }

	1745

	1746

	1747 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1748 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1749 LimitResult limit_result = LimitVersions(compiler, trace);

	1750 if (limit_result == DONE) return;

	1751 ASSERT(limit_result == CONTINUE);

	1752

	1753 RecursionCheck rc(compiler);

	1754

	1755 switch (action_type_) {

	1756 case STORE_POSITION: {

	1757 Trace::DeferredCapture

	1758 new_capture(data_.u_position_register.reg,

	1759 data_.u_position_register.is_capture,

	1760 trace);

	1761 Trace new_trace = *trace;

	1762 new_trace.add_action(&new_capture);

	1763 on_success()->Emit(compiler, &new_trace);

	1764 break;

	1765 }

	1766 case INCREMENT_REGISTER: {

	1767 Trace::DeferredIncrementRegister

	1768 new_increment(data_.u_increment_register.reg);

	1769 Trace new_trace = *trace;

	1770 new_trace.add_action(&new_increment);

	1771 on_success()->Emit(compiler, &new_trace);

	1772 break;

	1773 }

	1774 case SET_REGISTER: {

	1775 Trace::DeferredSetRegister

	1776 new_set(data_.u_store_register.reg, data_.u_store_register.value);

	1777 Trace new_trace = *trace;

	1778 new_trace.add_action(&new_set);

	1779 on_success()->Emit(compiler, &new_trace);

	1780 break;

	1781 }

	1782 case CLEAR_CAPTURES: {

	1783 Trace::DeferredClearCaptures

	1784 new_capture(Interval(data_.u_clear_captures.range_from,

	1785 data_.u_clear_captures.range_to));

	1786 Trace new_trace = *trace;

	1787 new_trace.add_action(&new_capture);

	1788 on_success()->Emit(compiler, &new_trace);

	1789 break;

	1790 }

	1791 case BEGIN_SUBMATCH:

	1792 if (!trace->is_trivial()) {

	1793 trace->Flush(compiler, this);

	1794 } else {

	1795 assembler->WriteCurrentPositionToRegister(

	1796 data_.u_submatch.current_position_register, 0);

	1797 assembler->WriteStackPointerToRegister(

	1798 data_.u_submatch.stack_pointer_register);

	1799 on_success()->Emit(compiler, trace);

	1800 }

	1801 break;

	1802 case EMPTY_MATCH_CHECK: {

	1803 intptr_t start_pos_reg = data_.u_empty_match_check.start_register;

	1804 intptr_t stored_pos = 0;

	1805 intptr_t rep_reg = data_.u_empty_match_check.repetition_register;

	1806 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);

	1807 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);

	1808 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {

	1809 // If we know we haven't advanced and there is no minimum we

	1810 // can just backtrack immediately.

	1811 assembler->GoTo(trace->backtrack());

	1812 } else if (know_dist && stored_pos < trace->cp_offset()) {

	1813 // If we know we've advanced we can generate the continuation

	1814 // immediately.

	1815 on_success()->Emit(compiler, trace);

	1816 } else if (!trace->is_trivial()) {

	1817 trace->Flush(compiler, this);

	1818 } else {

	1819 BlockLabel skip_empty_check;

	1820 // If we have a minimum number of repetitions we check the current

	1821 // number first and skip the empty check if it's not enough.

	1822 if (has_minimum) {

	1823 intptr_t limit = data_.u_empty_match_check.repetition_limit;

	1824 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

	1825 }

	1826 // If the match is empty we bail out, otherwise we fall through

	1827 // to the on-success continuation.

	1828 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

	1829 trace->backtrack());

	1830 assembler->BindBlock(&skip_empty_check);

	1831 on_success()->Emit(compiler, trace);

	1832 }

	1833 break;

	1834 }

	1835 case POSITIVE_SUBMATCH_SUCCESS: {

	1836 if (!trace->is_trivial()) {

	1837 trace->Flush(compiler, this);

	1838 return;

	1839 }

	1840 assembler->ReadCurrentPositionFromRegister(

	1841 data_.u_submatch.current_position_register);

	1842 assembler->ReadStackPointerFromRegister(

	1843 data_.u_submatch.stack_pointer_register);

	1844 intptr_t clear_register_count = data_.u_submatch.clear_register_count;

	1845 if (clear_register_count == 0) {

	1846 on_success()->Emit(compiler, trace);

	1847 return;

	1848 }

	1849 intptr_t clear_registers_from = data_.u_submatch.clear_register_from;

	1850 BlockLabel clear_registers_backtrack;

	1851 Trace new_trace = *trace;

	1852 new_trace.set_backtrack(&clear_registers_backtrack);

	1853 on_success()->Emit(compiler, &new_trace);

	1854

	1855 assembler->BindBlock(&clear_registers_backtrack);

	1856 intptr_t clear_registers_to =

	1857 clear_registers_from + clear_register_count - 1;

	1858 assembler->ClearRegisters(clear_registers_from, clear_registers_to);

	1859

	1860 ASSERT(trace->backtrack() == NULL);

	1861 assembler->Backtrack();

	1862 return;

	1863 }

	1864 default:

	1865 UNREACHABLE();

	1866 }

	1867 }

	1868

	1869

	1870 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1871 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1872 if (!trace->is_trivial()) {

	1873 trace->Flush(compiler, this);

	1874 return;

	1875 }

	1876

	1877 LimitResult limit_result = LimitVersions(compiler, trace);

	1878 if (limit_result == DONE) return;

	1879 ASSERT(limit_result == CONTINUE);

	1880

	1881 RecursionCheck rc(compiler);

	1882

	1883 ASSERT(start_reg_ + 1 == end_reg_);

	1884 if (compiler->ignore_case()) {

	1885 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

	1886 trace->backtrack());

	1887 } else {

	1888 assembler->CheckNotBackReference(start_reg_, trace->backtrack());

	1889 }

	1890 on_success()->Emit(compiler, trace);

	1891 }

	1892

	1893

	1894 void ActionNode::FillInBMInfo(intptr_t offset,

	1895 intptr_t budget,

	1896 BoyerMooreLookahead* bm,

	1897 bool not_at_start) {

	1898 if (action_type_ == BEGIN_SUBMATCH) {

	1899 bm->SetRest(offset);

	1900 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {

	1901 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1902 }

	1903 SaveBMInfo(bm, not_at_start, offset);

	1904 }

	1905

	1906

	1907 void AssertionNode::FillInBMInfo(intptr_t offset,

	1908 intptr_t budget,

	1909 BoyerMooreLookahead* bm,

	1910 bool not_at_start) {

	1911 // Match the behaviour of EatsAtLeast on this node.

	1912 if (assertion_type() == AT_START && not_at_start) return;

	1913 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1914 SaveBMInfo(bm, not_at_start, offset);

	1915 }

	1916

	1917

	1918 void BackReferenceNode::FillInBMInfo(intptr_t offset,

	1919 intptr_t budget,

	1920 BoyerMooreLookahead* bm,

	1921 bool not_at_start) {

	1922 // Working out the set of characters that a backreference can match is too

	1923 // hard, so we just say that any character can match.

	1924 bm->SetRest(offset);

	1925 SaveBMInfo(bm, not_at_start, offset);

	1926 }

	1927

	1928

	1929 // Returns the number of characters in the equivalence class, omitting those

	1930 // that cannot occur in the source string because it is ASCII.

	1931 static intptr_t GetCaseIndependentLetters(uint16_t character,

	1932 bool ascii_subject,

	1933 int32_t* letters) {

	1934 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	1935 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters);

	1936 // Unibrow returns 0 or 1 for characters where case independence is

	1937 // trivial.

	1938 if (length == 0) {

	1939 letters[0] = character;

	1940 length = 1;

	1941 }

	1942 if (!ascii_subject \|\| character <= Symbols::kMaxOneCharCodeSymbol) {

	1943 return length;

	1944 }

	1945 // The standard requires that non-ASCII characters cannot have ASCII

	1946 // character codes in their equivalence class.

	1947 return 0;

	1948 }

	1949

	1950

	1951 void ChoiceNode::FillInBMInfo(intptr_t offset,

	1952 intptr_t budget,

	1953 BoyerMooreLookahead* bm,

	1954 bool not_at_start) {

	1955 ZoneGrowableArray<GuardedAlternative>* alts = alternatives();

	1956 budget = (budget - 1) / alts->length();

	1957 for (intptr_t i = 0; i < alts->length(); i++) {

	1958 GuardedAlternative& alt = (*alts)[i];

	1959 if (alt.guards() != NULL && alt.guards()->length() != 0) {

	1960 bm->SetRest(offset); // Give up trying to fill in info.

	1961 SaveBMInfo(bm, not_at_start, offset);

	1962 return;

	1963 }

	1964 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);

	1965 }

	1966 SaveBMInfo(bm, not_at_start, offset);

	1967 }

	1968

	1969

	1970 void EndNode::FillInBMInfo(intptr_t offset,

	1971 intptr_t budget,

	1972 BoyerMooreLookahead* bm,

	1973 bool not_at_start) {

	1974 // Returning 0 from EatsAtLeast should ensure we never get here.

	1975 UNREACHABLE();

	1976 }

	1977

	1978

	1979 void LoopChoiceNode::FillInBMInfo(intptr_t offset,

	1980 intptr_t budget,

	1981 BoyerMooreLookahead* bm,

	1982 bool not_at_start) {

	1983 if (body_can_be_zero_length_ \|\| budget <= 0) {

	1984 bm->SetRest(offset);

	1985 SaveBMInfo(bm, not_at_start, offset);

	1986 return;

	1987 }

	1988 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1989 SaveBMInfo(bm, not_at_start, offset);

	1990 }

	1991

	1992

	1993 void TextNode::FillInBMInfo(intptr_t initial_offset,

	1994 intptr_t budget,

	1995 BoyerMooreLookahead* bm,

	1996 bool not_at_start) {

	1997 if (initial_offset >= bm->length()) return;

	1998 intptr_t offset = initial_offset;

	1999 intptr_t max_char = bm->max_char();

	2000 for (intptr_t i = 0; i < elements()->length(); i++) {

	2001 if (offset >= bm->length()) {

	2002 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2003 return;

	2004 }

	2005 TextElement text = elements()->At(i);

	2006 if (text.text_type() == TextElement::ATOM) {

	2007 RegExpAtom* atom = text.atom();

	2008 for (intptr_t j = 0; j < atom->length(); j++, offset++) {

	2009 if (offset >= bm->length()) {

	2010 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2011 return;

	2012 }

	2013 uint16_t character = atom->data()->At(j);

	2014 if (bm->compiler()->ignore_case()) {

	2015 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2016 intptr_t length = GetCaseIndependentLetters(

	2017 character,

	2018 bm->max_char() == Symbols::kMaxOneCharCodeSymbol,

	2019 chars);

	2020 for (intptr_t j = 0; j < length; j++) {

	2021 bm->Set(offset, chars[j]);

	2022 }

	2023 } else {

	2024 if (character <= max_char) bm->Set(offset, character);

	2025 }

	2026 }

	2027 } else {

	2028 ASSERT(text.text_type() == TextElement::CHAR_CLASS);

	2029 RegExpCharacterClass* char_class = text.char_class();

	2030 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();

	2031 if (char_class->is_negated()) {

	2032 bm->SetAll(offset);

	2033 } else {

	2034 for (intptr_t k = 0; k < ranges->length(); k++) {

	2035 CharacterRange& range = (*ranges)[k];

	2036 if (range.from() > max_char) continue;

	2037 intptr_t to = Utils::Minimum(max_char,

	2038 static_cast<intptr_t>(range.to()));

	2039 bm->SetInterval(offset, Interval(range.from(), to));

	2040 }

	2041 }

	2042 offset++;

	2043 }

	2044 }

	2045 if (offset >= bm->length()) {

	2046 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2047 return;

	2048 }

	2049 on_success()->FillInBMInfo(offset,

	2050 budget - 1,

	2051 bm,

	2052 true); // Not at start after a text node.

	2053 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2054 }

	2055

	2056

	2057 // Check for [0-9A-Z_a-z].

	2058 static void EmitWordCheck(RegExpMacroAssembler* assembler,

	2059 BlockLabel* word,

	2060 BlockLabel* non_word,

	2061 bool fall_through_on_word) {

	2062 if (assembler->CheckSpecialCharacterClass(

	2063 fall_through_on_word ? 'w' : 'W',

	2064 fall_through_on_word ? non_word : word)) {

	2065 // Optimized implementation available.

	2066 return;

	2067 }

	2068 assembler->CheckCharacterGT('z', non_word);

	2069 assembler->CheckCharacterLT('0', non_word);

	2070 assembler->CheckCharacterGT('a' - 1, word);

	2071 assembler->CheckCharacterLT('9' + 1, word);

	2072 assembler->CheckCharacterLT('A', non_word);

	2073 assembler->CheckCharacterLT('Z' + 1, word);

	2074 if (fall_through_on_word) {

	2075 assembler->CheckNotCharacter('_', non_word);

	2076 } else {

	2077 assembler->CheckCharacter('_', word);

	2078 }

	2079 }

	2080

	2081

	2082 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).

	2083 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {

	2084 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2085 Trace::TriBool next_is_word_character = Trace::UNKNOWN;

	2086 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);

	2087 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	2088 if (lookahead == NULL) {

	2089 intptr_t eats_at_least =

	2090 Utils::Minimum(kMaxLookaheadForBoyerMoore,

	2091 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	2092 kRecursionBudget,

	2093 not_at_start));

	2094 if (eats_at_least >= 1) {

	2095 BoyerMooreLookahead* bm =

	2096 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	2097 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	2098 if (bm->at(0)->is_non_word())

	2099 next_is_word_character = Trace::FALSE_VALUE;

	2100 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;

	2101 }

	2102 } else {

	2103 if (lookahead->at(0)->is_non_word())

	2104 next_is_word_character = Trace::FALSE_VALUE;

	2105 if (lookahead->at(0)->is_word())

	2106 next_is_word_character = Trace::TRUE_VALUE;

	2107 }

	2108 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);

	2109 if (next_is_word_character == Trace::UNKNOWN) {

	2110 BlockLabel before_non_word;

	2111 BlockLabel before_word;

	2112 if (trace->characters_preloaded() != 1) {

	2113 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

	2114 }

	2115 // Fall through on non-word.

	2116 EmitWordCheck(assembler, &before_word, &before_non_word, false);

	2117 // Next character is not a word character.

	2118 assembler->BindBlock(&before_non_word);

	2119 BlockLabel ok;

	2120 // Backtrack on \B (non-boundary check) if previous is a word,

	2121 // since we know next is not a word and this would be a boundary.

	2122 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2123

	2124 if (!assembler->IsClosed()) {

	2125 assembler->GoTo(&ok);

	2126 }

	2127

	2128 assembler->BindBlock(&before_word);

	2129 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2130 assembler->BindBlock(&ok);

	2131 } else if (next_is_word_character == Trace::TRUE_VALUE) {

	2132 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2133 } else {

	2134 ASSERT(next_is_word_character == Trace::FALSE_VALUE);

	2135 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2136 }

	2137 }

	2138

	2139

	2140 void AssertionNode::BacktrackIfPrevious(

	2141 RegExpCompiler* compiler,

	2142 Trace* trace,

	2143 AssertionNode::IfPrevious backtrack_if_previous) {

	2144 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2145 Trace new_trace(*trace);

	2146 new_trace.InvalidateCurrentCharacter();

	2147

	2148 BlockLabel fall_through, dummy;

	2149

	2150 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ?

	2151 new_trace.backtrack() :

	2152 &fall_through;

	2153 BlockLabel* word = backtrack_if_previous == kIsNonWord ?

	2154 &fall_through :

	2155 new_trace.backtrack();

	2156

	2157 if (new_trace.cp_offset() == 0) {

	2158 // The start of input counts as a non-word character, so the question is

	2159 // decided if we are at the start.

	2160 assembler->CheckAtStart(non_word);

	2161 }

	2162 // We already checked that we are not at the start of input so it must be

	2163 // OK to load the previous character.

	2164 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);

	2165 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);

	2166

	2167 assembler->BindBlock(&fall_through);

	2168 on_success()->Emit(compiler, &new_trace);

	2169 }

	2170

	2171

	2172 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {

	2173 if (quick_check == NULL) return false;

	2174 if (offset >= quick_check->characters()) return false;

	2175 return quick_check->positions(offset)->determines_perfectly;

	2176 }

	2177

	2178

	2179 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) {

	2180 if (index > *checked_up_to) {

	2181 *checked_up_to = index;

	2182 }

	2183 }

	2184

	2185

	2186 typedef bool EmitCharacterFunction(Isolate* isolate,

	2187 RegExpCompiler* compiler,

	2188 uint16_t c,

	2189 BlockLabel* on_failure,

	2190 intptr_t cp_offset,

	2191 bool check,

	2192 bool preloaded);

	2193

	2194

	2195 static inline bool EmitSimpleCharacter(Isolate* isolate,

	2196 RegExpCompiler* compiler,

	2197 uint16_t c,

	2198 BlockLabel* on_failure,

	2199 intptr_t cp_offset,

	2200 bool check,

	2201 bool preloaded) {

	2202 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2203 bool bound_checked = false;

	2204 if (!preloaded) {

	2205 assembler->LoadCurrentCharacter(

	2206 cp_offset,

	2207 on_failure,

	2208 check);

	2209 bound_checked = true;

	2210 }

	2211 assembler->CheckNotCharacter(c, on_failure);

	2212 return bound_checked;

	2213 }

	2214

	2215

	2216 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

	2217 bool ascii,

	2218 uint16_t c1,

	2219 uint16_t c2,

	2220 BlockLabel* on_failure) {

	2221 uint16_t char_mask;

	2222 if (ascii) {

	2223 char_mask = Symbols::kMaxOneCharCodeSymbol;

	2224 } else {

	2225 char_mask = Utf16::kMaxCodeUnit;

	2226 }

	2227 uint16_t exor = c1 ^ c2;

	2228 // Check whether exor has only one bit set.

	2229 if (((exor - 1) & exor) == 0) {

	2230 // If c1 and c2 differ only by one bit.

	2231 // Ecma262UnCanonicalize always gives the highest number last.

	2232 ASSERT(c2 > c1);

	2233 uint16_t mask = char_mask ^ exor;

	2234 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);

	2235 return true;

	2236 }

	2237 ASSERT(c2 > c1);

	2238 uint16_t diff = c2 - c1;

	2239 if (((diff - 1) & diff) == 0 && c1 >= diff) {

	2240 // If the characters differ by 2^n but don't differ by one bit then

	2241 // subtract the difference from the found character, then do the or

	2242 // trick. We avoid the theoretical case where negative numbers are

	2243 // involved in order to simplify code generation.

	2244 uint16_t mask = char_mask ^ diff;

	2245 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

	2246 diff,

	2247 mask,

	2248 on_failure);

	2249 return true;

	2250 }

	2251 return false;

	2252 }

	2253

	2254 // Only emits letters (things that have case). Only used for case independent

	2255 // matches.

	2256 static inline bool EmitAtomLetter(Isolate* isolate,

	2257 RegExpCompiler* compiler,

	2258 uint16_t c,

	2259 BlockLabel* on_failure,

	2260 intptr_t cp_offset,

	2261 bool check,

	2262 bool preloaded) {

	2263 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2264 bool ascii = compiler->ascii();

	2265 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2266 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2267 if (length <= 1) return false;

	2268 // We may not need to check against the end of the input string

	2269 // if this character lies before a character that matched.

	2270 if (!preloaded) {

	2271 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2272 }

	2273 BlockLabel ok;

	2274 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

	2275 switch (length) {

	2276 case 2: {

	2277 if (ShortCutEmitCharacterPair(macro_assembler,

	2278 ascii,

	2279 chars[0],

	2280 chars[1],

	2281 on_failure)) {

	2282 } else {

	2283 macro_assembler->CheckCharacter(chars[0], &ok);

	2284 macro_assembler->CheckNotCharacter(chars[1], on_failure);

	2285 macro_assembler->BindBlock(&ok);

	2286 }

	2287 break;

	2288 }

	2289 case 4:

	2290 macro_assembler->CheckCharacter(chars[3], &ok);

	2291 // Fall through!

	2292 case 3:

	2293 macro_assembler->CheckCharacter(chars[0], &ok);

	2294 macro_assembler->CheckCharacter(chars[1], &ok);

	2295 macro_assembler->CheckNotCharacter(chars[2], on_failure);

	2296 macro_assembler->BindBlock(&ok);

	2297 break;

	2298 default:

	2299 UNREACHABLE();

	2300 break;

	2301 }

	2302 return true;

	2303 }

	2304

	2305

	2306 // Only emits non-letters (things that don't have case). Only used for case

	2307 // independent matches.

	2308 static inline bool EmitAtomNonLetter(Isolate* isolate,

	2309 RegExpCompiler* compiler,

	2310 uint16_t c,

	2311 BlockLabel* on_failure,

	2312 intptr_t cp_offset,

	2313 bool check,

	2314 bool preloaded) {

	2315 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2316 bool ascii = compiler->ascii();

	2317 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2318 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2319 if (length < 1) {

	2320 // This can't match. Must be an ASCII subject and a non-ASCII character.

	2321 // We do not need to do anything since the ASCII pass already handled this.

	2322 return false; // Bounds not checked.

	2323 }

	2324 bool checked = false;

	2325 // We handle the length > 1 case in a later pass.

	2326 if (length == 1) {

	2327 if (ascii && c > Symbols::kMaxOneCharCodeSymbol) {

	2328 // Can't match - see above.

	2329 return false; // Bounds not checked.

	2330 }

	2331 if (!preloaded) {

	2332 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2333 checked = check;

	2334 }

	2335 macro_assembler->CheckNotCharacter(c, on_failure);

	2336 }

	2337 return checked;

	2338 }

	2339

	2340

	2341 static void EmitBoundaryTest(RegExpMacroAssembler* masm,

	2342 intptr_t border,

	2343 BlockLabel* fall_through,

	2344 BlockLabel* above_or_equal,

	2345 BlockLabel* below) {

	2346 if (below != fall_through) {

	2347 masm->CheckCharacterLT(border, below);

	2348 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);

	2349 } else {

	2350 masm->CheckCharacterGT(border - 1, above_or_equal);

	2351 }

	2352 }

	2353

	2354

	2355 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,

	2356 intptr_t first,

	2357 intptr_t last,

	2358 BlockLabel* fall_through,

	2359 BlockLabel* in_range,

	2360 BlockLabel* out_of_range) {

	2361 if (in_range == fall_through) {

	2362 if (first == last) {

	2363 masm->CheckNotCharacter(first, out_of_range);

	2364 } else {

	2365 masm->CheckCharacterNotInRange(first, last, out_of_range);

	2366 }

	2367 } else {

	2368 if (first == last) {

	2369 masm->CheckCharacter(first, in_range);

	2370 } else {

	2371 masm->CheckCharacterInRange(first, last, in_range);

	2372 }

	2373 if (out_of_range != fall_through) masm->GoTo(out_of_range);

	2374 }

	2375 }

	2376

	2377

	2378 static void CutOutRange(RegExpMacroAssembler* masm,

	2379 ZoneGrowableArray<int>* ranges,

	2380 intptr_t start_index,

	2381 intptr_t end_index,

	2382 intptr_t cut_index,

	2383 BlockLabel* even_label,

	2384 BlockLabel* odd_label) {

	2385 bool odd = (((cut_index - start_index) & 1) == 1);

	2386 BlockLabel* in_range_label = odd ? odd_label : even_label;

	2387 BlockLabel dummy;

	2388 EmitDoubleBoundaryTest(masm,

	2389 ranges->At(cut_index),

	2390 ranges->At(cut_index + 1) - 1,

	2391 &dummy,

	2392 in_range_label,

	2393 &dummy);

	2394 ASSERT(!dummy.IsLinked());

	2395 // Cut out the single range by rewriting the array. This creates a new

	2396 // range that is a merger of the two ranges on either side of the one we

	2397 // are cutting out. The oddity of the labels is preserved.

	2398 for (intptr_t j = cut_index; j > start_index; j--) {

	2399 (*ranges)[j] = ranges->At(j - 1);

	2400 }

	2401 for (intptr_t j = cut_index + 1; j < end_index; j++) {

	2402 (*ranges)[j] = ranges->At(j + 1);

	2403 }

	2404 }

	2405

	2406

	2407 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.

	2408 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.

	2409 static void EmitUseLookupTable(

	2410 RegExpMacroAssembler* masm,

	2411 ZoneGrowableArray<int>* ranges,

	2412 intptr_t start_index,

	2413 intptr_t end_index,

	2414 intptr_t min_char,

	2415 BlockLabel* fall_through,

	2416 BlockLabel* even_label,

	2417 BlockLabel* odd_label) {

	2418 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2419 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2420

	2421 intptr_t base = (min_char & ~kMask);

	2422

	2423 // Assert that everything is on one kTableSize page.

	2424 for (intptr_t i = start_index; i <= end_index; i++) {

	2425 ASSERT((ranges->At(i) & ~kMask) == base);

	2426 }

	2427 ASSERT(start_index == 0 \|\| (ranges->At(start_index - 1) & ~kMask) <= base);

	2428

	2429 char templ[kSize];

	2430 BlockLabel* on_bit_set;

	2431 BlockLabel* on_bit_clear;

	2432 intptr_t bit;

	2433 if (even_label == fall_through) {

	2434 on_bit_set = odd_label;

	2435 on_bit_clear = even_label;

	2436 bit = 1;

	2437 } else {

	2438 on_bit_set = even_label;

	2439 on_bit_clear = odd_label;

	2440 bit = 0;

	2441 }

	2442 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize;

	2443 i++) {

	2444 templ[i] = bit;

	2445 }

	2446 intptr_t j = 0;

	2447 bit ^= 1;

	2448 for (intptr_t i = start_index; i < end_index; i++) {

	2449 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) {

	2450 templ[j] = bit;

	2451 }

	2452 bit ^= 1;

	2453 }

	2454 for (intptr_t i = j; i < kSize; i++) {

	2455 templ[i] = bit;

	2456 }

	2457 // TODO(erikcorry): Cache these.

	2458 const TypedData& ba = TypedData::ZoneHandle(

	2459 masm->isolate(),

	2460 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	2461 for (intptr_t i = 0; i < kSize; i++) {

	2462 ba.SetUint8(i, templ[i]);

	2463 }

	2464 masm->CheckBitInTable(ba, on_bit_set);

	2465 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);

	2466 }

	2467

	2468

	2469 // Unicode case. Split the search space into kSize spaces that are handled

	2470 // with recursion.

	2471 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,

	2472 intptr_t start_index,

	2473 intptr_t end_index,

	2474 intptr_t* new_start_index,

	2475 intptr_t* new_end_index,

	2476 intptr_t* border) {

	2477 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2478 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2479

	2480 intptr_t first = ranges->At(start_index);

	2481 intptr_t last = ranges->At(end_index) - 1;

	2482

	2483 *new_start_index = start_index;

	2484 *border = (ranges->At(start_index) & ~kMask) + kSize;

	2485 while (*new_start_index < end_index) {

	2486 if (ranges->At(new_start_index) > border) break;

	2487 (*new_start_index)++;

	2488 }

	2489 // new_start_index is the index of the first edge that is beyond the

	2490 // current kSize space.

	2491

	2492 // For very large search spaces we do a binary chop search of the non-ASCII

	2493 // space instead of just going to the end of the current kSize space. The

	2494 // heuristics are complicated a little by the fact that any 128-character

	2495 // encoding space can be quickly tested with a table lookup, so we don't

	2496 // wish to do binary chop search at a smaller granularity than that. A

	2497 // 128-character space can take up a lot of space in the ranges array if,

	2498 // for example, we only want to match every second character (eg. the lower

	2499 // case characters on some Unicode pages).

	2500 intptr_t binary_chop_index = (end_index + start_index) / 2;

	2501 // The first test ensures that we get to the code that handles the ASCII

	2502 // range with a single not-taken branch, speeding up this important

	2503 // character range (even non-ASCII charset-based text has spaces and

	2504 // punctuation).

	2505 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // ASCII case.

	2506 end_index - start_index > (new_start_index - start_index) 2 &&

	2507 last - first > kSize * 2 &&

	2508 binary_chop_index > *new_start_index &&

	2509 ranges->At(binary_chop_index) >= first + 2 * kSize) {

	2510 intptr_t scan_forward_for_section_border = binary_chop_index;;

	2511 intptr_t new_border = (ranges->At(binary_chop_index) \| kMask) + 1;

	2512

	2513 while (scan_forward_for_section_border < end_index) {

	2514 if (ranges->At(scan_forward_for_section_border) > new_border) {

	2515 *new_start_index = scan_forward_for_section_border;

	2516 *border = new_border;

	2517 break;

	2518 }

	2519 scan_forward_for_section_border++;

	2520 }

	2521 }

	2522

	2523 ASSERT(*new_start_index > start_index);

	2524 new_end_index = new_start_index - 1;

	2525 if (ranges->At(new_end_index) == border) {

	2526 (*new_end_index)--;

	2527 }

	2528 if (*border >= ranges->At(end_index)) {

	2529 *border = ranges->At(end_index);

	2530 *new_start_index = end_index; // Won't be used.

	2531 *new_end_index = end_index - 1;

	2532 }

	2533 }

	2534

	2535

	2536 // Gets a series of segment boundaries representing a character class. If the

	2537 // character is in the range between an even and an odd boundary (counting from

	2538 // start_index) then go to even_label, otherwise go to odd_label. We already

	2539 // know that the character is in the range of min_char to max_char inclusive.

	2540 // Either label can be NULL indicating backtracking. Either label can also be

	2541 // equal to the fall_through label.

	2542 static void GenerateBranches(RegExpMacroAssembler* masm,

	2543 ZoneGrowableArray<int>* ranges,

	2544 intptr_t start_index,

	2545 intptr_t end_index,

	2546 uint16_t min_char,

	2547 uint16_t max_char,

	2548 BlockLabel* fall_through,

	2549 BlockLabel* even_label,

	2550 BlockLabel* odd_label) {

	2551 intptr_t first = ranges->At(start_index);

	2552 intptr_t last = ranges->At(end_index) - 1;

	2553

	2554 ASSERT(min_char < first);

	2555

	2556 // Just need to test if the character is before or on-or-after

	2557 // a particular character.

	2558 if (start_index == end_index) {

	2559 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);

	2560 return;

	2561 }

	2562

	2563 // Another almost trivial case: There is one interval in the middle that is

	2564 // different from the end intervals.

	2565 if (start_index + 1 == end_index) {

	2566 EmitDoubleBoundaryTest(

	2567 masm, first, last, fall_through, even_label, odd_label);

	2568 return;

	2569 }

	2570

	2571 // It's not worth using table lookup if there are very few intervals in the

	2572 // character class.

	2573 if (end_index - start_index <= 6) {

	2574 // It is faster to test for individual characters, so we look for those

	2575 // first, then try arbitrary ranges in the second round.

	2576 static intptr_t kNoCutIndex = -1;

	2577 intptr_t cut = kNoCutIndex;

	2578 for (intptr_t i = start_index; i < end_index; i++) {

	2579 if (ranges->At(i) == ranges->At(i + 1) - 1) {

	2580 cut = i;

	2581 break;

	2582 }

	2583 }

	2584 if (cut == kNoCutIndex) cut = start_index;

	2585 CutOutRange(

	2586 masm, ranges, start_index, end_index, cut, even_label, odd_label);

	2587 ASSERT(end_index - start_index >= 2);

	2588 GenerateBranches(masm,

	2589 ranges,

	2590 start_index + 1,

	2591 end_index - 1,

	2592 min_char,

	2593 max_char,

	2594 fall_through,

	2595 even_label,

	2596 odd_label);

	2597 return;

	2598 }

	2599

	2600 // If there are a lot of intervals in the regexp, then we will use tables to

	2601 // determine whether the character is inside or outside the character class.

	2602 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;

	2603

	2604 if ((max_char >> kBits) == (min_char >> kBits)) {

	2605 EmitUseLookupTable(masm,

	2606 ranges,

	2607 start_index,

	2608 end_index,

	2609 min_char,

	2610 fall_through,

	2611 even_label,

	2612 odd_label);

	2613 return;

	2614 }

	2615

	2616 if ((min_char >> kBits) != (first >> kBits)) {

	2617 masm->CheckCharacterLT(first, odd_label);

	2618 GenerateBranches(masm,

	2619 ranges,

	2620 start_index + 1,

	2621 end_index,

	2622 first,

	2623 max_char,

	2624 fall_through,

	2625 odd_label,

	2626 even_label);

	2627 return;

	2628 }

	2629

	2630 intptr_t new_start_index = 0;

	2631 intptr_t new_end_index = 0;

	2632 intptr_t border = 0;

	2633

	2634 SplitSearchSpace(ranges,

	2635 start_index,

	2636 end_index,

	2637 &new_start_index,

	2638 &new_end_index,

	2639 &border);

	2640

	2641 BlockLabel handle_rest;

	2642 BlockLabel* above = &handle_rest;

	2643 if (border == last + 1) {

	2644 // We didn't find any section that started after the limit, so everything

	2645 // above the border is one of the terminal labels.

	2646 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;

	2647 ASSERT(new_end_index == end_index - 1);

	2648 }

	2649

	2650 ASSERT(start_index <= new_end_index);

	2651 ASSERT(new_start_index <= end_index);

	2652 ASSERT(start_index < new_start_index);

	2653 ASSERT(new_end_index < end_index);

	2654 ASSERT(new_end_index + 1 == new_start_index \|\|

	2655 (new_end_index + 2 == new_start_index &&

	2656 border == ranges->At(new_end_index + 1)));

	2657 ASSERT(min_char < border - 1);

	2658 ASSERT(border < max_char);

	2659 ASSERT(ranges->At(new_end_index) < border);

	2660 ASSERT(border < ranges->At(new_start_index) \|\|

	2661 (border == ranges->At(new_start_index) &&

	2662 new_start_index == end_index &&

	2663 new_end_index == end_index - 1 &&

	2664 border == last + 1));

	2665 ASSERT(new_start_index == 0 \|\| border >= ranges->At(new_start_index - 1));

	2666

	2667 masm->CheckCharacterGT(border - 1, above);

	2668 BlockLabel dummy;

	2669 GenerateBranches(masm,

	2670 ranges,

	2671 start_index,

	2672 new_end_index,

	2673 min_char,

	2674 border - 1,

	2675 &dummy,

	2676 even_label,

	2677 odd_label);

	2678

	2679 if (handle_rest.IsLinked()) {

	2680 masm->BindBlock(&handle_rest);

	2681 bool flip = (new_start_index & 1) != (start_index & 1);

	2682 GenerateBranches(masm,

	2683 ranges,

	2684 new_start_index,

	2685 end_index,

	2686 border,

	2687 max_char,

	2688 &dummy,

	2689 flip ? odd_label : even_label,

	2690 flip ? even_label : odd_label);

	2691 }

	2692 }

	2693

	2694

	2695 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

	2696 RegExpCharacterClass* cc,

	2697 bool ascii,

	2698 BlockLabel* on_failure,

	2699 intptr_t cp_offset,

	2700 bool check_offset,

	2701 bool preloaded,

	2702 Isolate* isolate) {

	2703 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	2704 if (!CharacterRange::IsCanonical(ranges)) {

	2705 CharacterRange::Canonicalize(ranges);

	2706 }

	2707

	2708 intptr_t max_char;

	2709 if (ascii) {

	2710 max_char = Symbols::kMaxOneCharCodeSymbol;

	2711 } else {

	2712 max_char = Utf16::kMaxCodeUnit;

	2713 }

	2714

	2715 intptr_t range_count = ranges->length();

	2716

	2717 intptr_t last_valid_range = range_count - 1;

	2718 while (last_valid_range >= 0) {

	2719 CharacterRange& range = (*ranges)[last_valid_range];

	2720 if (range.from() <= max_char) {

	2721 break;

	2722 }

	2723 last_valid_range--;

	2724 }

	2725

	2726 if (last_valid_range < 0) {

	2727 if (!cc->is_negated()) {

	2728 macro_assembler->GoTo(on_failure);

	2729 }

	2730 if (check_offset) {

	2731 macro_assembler->CheckPosition(cp_offset, on_failure);

	2732 }

	2733 return;

	2734 }

	2735

	2736 if (last_valid_range == 0 &&

	2737 ranges->At(0).IsEverything(max_char)) {

	2738 if (cc->is_negated()) {

	2739 macro_assembler->GoTo(on_failure);

	2740 } else {

	2741 // This is a common case hit by non-anchored expressions.

	2742 if (check_offset) {

	2743 macro_assembler->CheckPosition(cp_offset, on_failure);

	2744 }

	2745 }

	2746 return;

	2747 }

	2748 if (last_valid_range == 0 &&

	2749 !cc->is_negated() &&

	2750 ranges->At(0).IsEverything(max_char)) {

	2751 // This is a common case hit by non-anchored expressions.

	2752 if (check_offset) {

	2753 macro_assembler->CheckPosition(cp_offset, on_failure);

	2754 }

	2755 return;

	2756 }

	2757

	2758 if (!preloaded) {

	2759 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

	2760 }

	2761

	2762 if (cc->is_standard() &&

	2763 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

	2764 on_failure)) {

	2765 return;

	2766 }

	2767

	2768

	2769 // A new list with ascending entries. Each entry is a code unit

	2770 // where there is a boundary between code units that are part of

	2771 // the class and code units that are not. Normally we insert an

	2772 // entry at zero which goes to the failure label, but if there

	2773 // was already one there we fall through for success on that entry.

	2774 // Subsequent entries have alternating meaning (success/failure).

	2775 ZoneGrowableArray<int>* range_boundaries =

	2776 new(isolate) ZoneGrowableArray<int>(last_valid_range);

	2777

	2778 bool zeroth_entry_is_failure = !cc->is_negated();

	2779

	2780 for (intptr_t i = 0; i <= last_valid_range; i++) {

	2781 CharacterRange& range = (*ranges)[i];

	2782 if (range.from() == 0) {

	2783 ASSERT(i == 0);

	2784 zeroth_entry_is_failure = !zeroth_entry_is_failure;

	2785 } else {

	2786 range_boundaries->Add(range.from());

	2787 }

	2788 range_boundaries->Add(range.to() + 1);

	2789 }

	2790 intptr_t end_index = range_boundaries->length() - 1;

	2791 if (range_boundaries->At(end_index) > max_char) {

	2792 end_index--;

	2793 }

	2794

	2795 BlockLabel fall_through;

	2796 GenerateBranches(macro_assembler,

	2797 range_boundaries,

	2798 0, // start_index.

	2799 end_index,

	2800 0, // min_char.

	2801 max_char,

	2802 &fall_through,

	2803 zeroth_entry_is_failure ? &fall_through : on_failure,

	2804 zeroth_entry_is_failure ? on_failure : &fall_through);

	2805 macro_assembler->BindBlock(&fall_through);

	2806 }

	2807

	2808

	2809 // We call this repeatedly to generate code for each pass over the text node.

	2810 // The passes are in increasing order of difficulty because we hope one

	2811 // of the first passes will fail in which case we are saved the work of the

	2812 // later passes. for example for the case independent regexp /%[asdfghjkl]a/

	2813 // we will check the '%' in the first pass, the case independent 'a' in the

	2814 // second pass and the character class in the last pass.

	2815 //

	2816 // The passes are done from right to left, so for example to test for /bar/

	2817 // we will first test for an 'r' with offset 2, then an 'a' with offset 1

	2818 // and then a 'b' with offset 0. This means we can avoid the end-of-input

	2819 // bounds check most of the time. In the example we only need to check for

	2820 // end-of-input when loading the putative 'r'.

	2821 //

	2822 // A slight complication involves the fact that the first character may already

	2823 // be fetched into a register by the previous node. In this case we want to

	2824 // do the test for that character first. We do this in separate passes. The

	2825 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a

	2826 // pass has been performed then subsequent passes will have true in

	2827 // first_element_checked to indicate that that character does not need to be

	2828 // checked again.

	2829 //

	2830 // In addition to all this we are passed a Trace, which can

	2831 // contain an AlternativeGeneration object. In this AlternativeGeneration

	2832 // object we can see details of any quick check that was already passed in

	2833 // order to get to the code we are now generating. The quick check can involve

	2834 // loading characters, which means we do not need to recheck the bounds

	2835 // up to the limit the quick check already checked. In addition the quick

	2836 // check can have involved a mask and compare operation which may simplify

	2837 // or obviate the need for further checks at some character positions.

	2838 void TextNode::TextEmitPass(RegExpCompiler* compiler,

	2839 TextEmitPassType pass,

	2840 bool preloaded,

	2841 Trace* trace,

	2842 bool first_element_checked,

	2843 intptr_t* checked_up_to) {

	2844 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2845 bool ascii = compiler->ascii();

	2846 BlockLabel* backtrack = trace->backtrack();

	2847 QuickCheckDetails* quick_check = trace->quick_check_performed();

	2848 intptr_t element_count = elms_->length();

	2849 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

	2850 TextElement elm = elms_->At(i);

	2851 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset();

	2852 if (elm.text_type() == TextElement::ATOM) {

	2853 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	2854 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) {

	2855 if (first_element_checked && i == 0 && j == 0) continue;

	2856 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

	2857 EmitCharacterFunction* emit_function = NULL;

	2858 switch (pass) {

	2859 case NON_ASCII_MATCH:

	2860 ASSERT(ascii);

	2861 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) {

	2862 assembler->GoTo(backtrack);

	2863 return;

	2864 }

	2865 break;

	2866 case NON_LETTER_CHARACTER_MATCH:

	2867 emit_function = &EmitAtomNonLetter;

	2868 break;

	2869 case SIMPLE_CHARACTER_MATCH:

	2870 emit_function = &EmitSimpleCharacter;

	2871 break;

	2872 case CASE_CHARACTER_MATCH:

	2873 emit_function = &EmitAtomLetter;

	2874 break;

	2875 default:

	2876 break;

	2877 }

	2878 if (emit_function != NULL) {

	2879 bool bound_checked = emit_function(I,

	2880 compiler,

	2881 quarks->At(j),

	2882 backtrack,

	2883 cp_offset + j,

	2884 *checked_up_to < cp_offset + j,

	2885 preloaded);

	2886 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

	2887 }

	2888 }

	2889 } else {

	2890 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

	2891 if (pass == CHARACTER_CLASS_MATCH) {

	2892 if (first_element_checked && i == 0) continue;

	2893 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

	2894 RegExpCharacterClass* cc = elm.char_class();

	2895 EmitCharClass(assembler,

	2896 cc,

	2897 ascii,

	2898 backtrack,

	2899 cp_offset,

	2900 *checked_up_to < cp_offset,

	2901 preloaded,

	2902 I);

	2903 UpdateBoundsCheck(cp_offset, checked_up_to);

	2904 }

	2905 }

	2906 }

	2907 }

	2908

	2909

	2910 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

	2911 intptr_t eats_at_least) {

	2912 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4),

	2913 eats_at_least);

	2914 if (compiler->macro_assembler()->CanReadUnaligned()) {

	2915 bool ascii = compiler->ascii();

	2916 if (ascii) {

	2917 if (preload_characters > 4) preload_characters = 4;

	2918 // We can't preload 3 characters because there is no machine instruction

	2919 // to do that. We can't just load 4 because we could be reading

	2920 // beyond the end of the string, which could cause a memory fault.

	2921 if (preload_characters == 3) preload_characters = 2;

	2922 } else {

	2923 if (preload_characters > 2) preload_characters = 2;

	2924 }

	2925 } else {

	2926 if (preload_characters > 1) preload_characters = 1;

	2927 }

	2928 return preload_characters;

	2929 }

	2930

	2931

	2932 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,

	2933 intptr_t budget,

	2934 bool not_at_start) {

	2935 intptr_t answer = Length();

	2936 if (answer >= still_to_find) return answer;

	2937 if (budget <= 0) return answer;

	2938 // We are not at start after this node so we set the last argument to 'true'.

	2939 return answer + on_success()->EatsAtLeast(still_to_find - answer,

	2940 budget - 1,

	2941 true);

	2942 }

	2943

	2944

	2945 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find,

	2946 intptr_t budget,

	2947 bool not_at_start) {

	2948 if (budget <= 0) return 0;

	2949 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!

	2950 return on_success()->EatsAtLeast(still_to_find,

	2951 budget - 1,

	2952 not_at_start);

	2953 }

	2954

	2955

	2956 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find,

	2957 intptr_t budget,

	2958 bool not_at_start) {

	2959 if (budget <= 0) return 0;

	2960 // If we know we are not at the start and we are asked "how many characters

	2961 // will you match if you succeed?" then we can answer anything since false

	2962 // implies false. So lets just return the max answer (still_to_find) since

	2963 // that won't prevent us from preloading a lot of characters for the other

	2964 // branches in the node graph.

	2965 if (assertion_type() == AT_START && not_at_start) return still_to_find;

	2966 return on_success()->EatsAtLeast(still_to_find,

	2967 budget - 1,

	2968 not_at_start);

	2969 }

	2970

	2971

	2972 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find,

	2973 intptr_t budget,

	2974 bool not_at_start) {

	2975 if (budget <= 0) return 0;

	2976 return on_success()->EatsAtLeast(still_to_find,

	2977 budget - 1,

	2978 not_at_start);

	2979 }

	2980

	2981

	2982 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find,

	2983 intptr_t budget,

	2984 RegExpNode* ignore_this_node,

	2985 bool not_at_start) {

	2986 if (budget <= 0) return 0;

	2987 intptr_t min = 100;

	2988 intptr_t choice_count = alternatives_->length();

	2989 budget = (budget - 1) / choice_count;

	2990 for (intptr_t i = 0; i < choice_count; i++) {

	2991 RegExpNode* node = (*alternatives_)[i].node();

	2992 if (node == ignore_this_node) continue;

	2993 intptr_t node_eats_at_least =

	2994 node->EatsAtLeast(still_to_find, budget, not_at_start);

	2995 if (node_eats_at_least < min) min = node_eats_at_least;

	2996 if (min == 0) return 0;

	2997 }

	2998 return min;

	2999 }

	3000

	3001

	3002 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3003 intptr_t budget,

	3004 bool not_at_start) {

	3005 return EatsAtLeastHelper(still_to_find,

	3006 budget,

	3007 NULL,

	3008 not_at_start);

	3009 }

	3010

	3011

	3012 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3013 intptr_t budget,

	3014 bool not_at_start) {

	3015 if (budget <= 0) return 0;

	3016 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	3017 // afterwards.

	3018 RegExpNode* node = (*alternatives_)[1].node();

	3019 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);

	3020 }

	3021

	3022

	3023 // Takes the left-most 1-bit and smears it out, setting all bits to its right.

	3024 static inline uint32_t SmearBitsRight(uint32_t v) {

	3025 v \|= v >> 1;

	3026 v \|= v >> 2;

	3027 v \|= v >> 4;

	3028 v \|= v >> 8;

	3029 v \|= v >> 16;

	3030 return v;

	3031 }

	3032

	3033

	3034 // Here is the meat of GetQuickCheckDetails (see also the comment on the

	3035 // super-class in the .h file).

	3036 //

	3037 // We iterate along the text object, building up for each character a

	3038 // mask and value that can be used to test for a quick failure to match.

	3039 // The masks and values for the positions will be combined into a single

	3040 // machine word for the current character width in order to be used in

	3041 // generating a quick check.

	3042 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3043 RegExpCompiler* compiler,

	3044 intptr_t characters_filled_in,

	3045 bool not_at_start) {

	3046 ASSERT(characters_filled_in < details->characters());

	3047 intptr_t characters = details->characters();

	3048 intptr_t char_mask;

	3049 if (compiler->ascii()) {

	3050 char_mask = Symbols::kMaxOneCharCodeSymbol;

	3051 } else {

	3052 char_mask = Utf16::kMaxCodeUnit;

	3053 }

	3054 for (intptr_t k = 0; k < elms_->length(); k++) {

	3055 TextElement elm = elms_->At(k);

	3056 if (elm.text_type() == TextElement::ATOM) {

	3057 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	3058 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) {

	3059 QuickCheckDetails::Position* pos =

	3060 details->positions(characters_filled_in);

	3061 uint16_t c = quarks->At(i);

	3062 if (c > char_mask) {

	3063 // If we expect a non-ASCII character from an ASCII string,

	3064 // there is no way we can match. Not even case independent

	3065 // matching can turn an ASCII character into non-ASCII or

	3066 // vice versa.

	3067 details->set_cannot_match();

	3068 pos->determines_perfectly = false;

	3069 return;

	3070 }

	3071 if (compiler->ignore_case()) {

	3072 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3073 intptr_t length =

	3074 GetCaseIndependentLetters(c, compiler->ascii(), chars);

	3075 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

	3076 if (length == 1) {

	3077 // This letter has no case equivalents, so it's nice and simple

	3078 // and the mask-compare will determine definitely whether we have

	3079 // a match at this character position.

	3080 pos->mask = char_mask;

	3081 pos->value = c;

	3082 pos->determines_perfectly = true;

	3083 } else {

	3084 uint32_t common_bits = char_mask;

	3085 uint32_t bits = chars[0];

	3086 for (intptr_t j = 1; j < length; j++) {

	3087 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);

	3088 common_bits ^= differing_bits;

	3089 bits &= common_bits;

	3090 }

	3091 // If length is 2 and common bits has only one zero in it then

	3092 // our mask and compare instruction will determine definitely

	3093 // whether we have a match at this character position. Otherwise

	3094 // it can only be an approximate check.

	3095 uint32_t one_zero = (common_bits \| ~char_mask);

	3096 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {

	3097 pos->determines_perfectly = true;

	3098 }

	3099 pos->mask = common_bits;

	3100 pos->value = bits;

	3101 }

	3102 } else {

	3103 // Don't ignore case. Nice simple case where the mask-compare will

	3104 // determine definitely whether we have a match at this character

	3105 // position.

	3106 pos->mask = char_mask;

	3107 pos->value = c;

	3108 pos->determines_perfectly = true;

	3109 }

	3110 characters_filled_in++;

	3111 ASSERT(characters_filled_in <= details->characters());

	3112 if (characters_filled_in == details->characters()) {

	3113 return;

	3114 }

	3115 }

	3116 } else {

	3117 QuickCheckDetails::Position* pos =

	3118 details->positions(characters_filled_in);

	3119 RegExpCharacterClass* tree = elm.char_class();

	3120 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();

	3121 if (tree->is_negated()) {

	3122 // A quick check uses multi-character mask and compare. There is no

	3123 // useful way to incorporate a negative char class into this scheme

	3124 // so we just conservatively create a mask and value that will always

	3125 // succeed.

	3126 pos->mask = 0;

	3127 pos->value = 0;

	3128 } else {

	3129 intptr_t first_range = 0;

	3130 while (ranges->At(first_range).from() > char_mask) {

	3131 first_range++;

	3132 if (first_range == ranges->length()) {

	3133 details->set_cannot_match();

	3134 pos->determines_perfectly = false;

	3135 return;

	3136 }

	3137 }

	3138 CharacterRange range = ranges->At(first_range);

	3139 uint16_t from = range.from();

	3140 uint16_t to = range.to();

	3141 if (to > char_mask) {

	3142 to = char_mask;

	3143 }

	3144 uint32_t differing_bits = (from ^ to);

	3145 // A mask and compare is only perfect if the differing bits form a

	3146 // number like 00011111 with one single block of trailing 1s.

	3147 if ((differing_bits & (differing_bits + 1)) == 0 &&

	3148 from + differing_bits == to) {

	3149 pos->determines_perfectly = true;

	3150 }

	3151 uint32_t common_bits = ~SmearBitsRight(differing_bits);

	3152 uint32_t bits = (from & common_bits);

	3153 for (intptr_t i = first_range + 1; i < ranges->length(); i++) {

	3154 CharacterRange range = ranges->At(i);

	3155 uint16_t from = range.from();

	3156 uint16_t to = range.to();

	3157 if (from > char_mask) continue;

	3158 if (to > char_mask) to = char_mask;

	3159 // Here we are combining more ranges into the mask and compare

	3160 // value. With each new range the mask becomes more sparse and

	3161 // so the chances of a false positive rise. A character class

	3162 // with multiple ranges is assumed never to be equivalent to a

	3163 // mask and compare operation.

	3164 pos->determines_perfectly = false;

	3165 uint32_t new_common_bits = (from ^ to);

	3166 new_common_bits = ~SmearBitsRight(new_common_bits);

	3167 common_bits &= new_common_bits;

	3168 bits &= new_common_bits;

	3169 uint32_t differing_bits = (from & common_bits) ^ bits;

	3170 common_bits ^= differing_bits;

	3171 bits &= common_bits;

	3172 }

	3173 pos->mask = common_bits;

	3174 pos->value = bits;

	3175 }

	3176 characters_filled_in++;

	3177 ASSERT(characters_filled_in <= details->characters());

	3178 if (characters_filled_in == details->characters()) {

	3179 return;

	3180 }

	3181 }

	3182 }

	3183 ASSERT(characters_filled_in != details->characters());

	3184 if (!details->cannot_match()) {

	3185 on_success()-> GetQuickCheckDetails(details,

	3186 compiler,

	3187 characters_filled_in,

	3188 true);

	3189 }

	3190 }

	3191

	3192

	3193 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3194 RegExpCompiler* compiler,

	3195 intptr_t characters_filled_in,

	3196 bool not_at_start) {

	3197 if (body_can_be_zero_length_ \|\| info()->visited) return;

	3198 VisitMarker marker(info());

	3199 return ChoiceNode::GetQuickCheckDetails(details,

	3200 compiler,

	3201 characters_filled_in,

	3202 not_at_start);

	3203 }

	3204

	3205

	3206 intptr_t TextNode::Length() {

	3207 TextElement elm = elms_->Last();

	3208 ASSERT(elm.cp_offset() >= 0);

	3209 return elm.cp_offset() + elm.length();

	3210 }

	3211

	3212

	3213 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) {

	3214 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass);

	3215 if (ignore_case) {

	3216 return pass == SIMPLE_CHARACTER_MATCH;

	3217 } else {

	3218 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;

	3219 }

	3220 }

	3221

	3222

	3223 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,

	3224 const intptr_t* special_class,

	3225 intptr_t length) {

	3226 length--; // Remove final 0x10000.

	3227 ASSERT(special_class[length] == 0x10000);

	3228 ASSERT(ranges->length() != 0);

	3229 ASSERT(length != 0);

	3230 ASSERT(special_class[0] != 0);

	3231 if (ranges->length() != (length >> 1) + 1) {

	3232 return false;

	3233 }

	3234 CharacterRange range = ranges->At(0);

	3235 if (range.from() != 0) {

	3236 return false;

	3237 }

	3238 for (intptr_t i = 0; i < length; i += 2) {

	3239 if (special_class[i] != (range.to() + 1)) {

	3240 return false;

	3241 }

	3242 range = ranges->At((i >> 1) + 1);

	3243 if (special_class[i+1] != range.from()) {

	3244 return false;

	3245 }

	3246 }

	3247 if (range.to() != 0xffff) {

	3248 return false;

	3249 }

	3250 return true;

	3251 }

	3252

	3253

	3254 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges,

	3255 const intptr_t* special_class,

	3256 intptr_t length) {

	3257 length--; // Remove final 0x10000.

	3258 ASSERT(special_class[length] == 0x10000);

	3259 if (ranges->length() * 2 != length) {

	3260 return false;

	3261 }

	3262 for (intptr_t i = 0; i < length; i += 2) {

	3263 CharacterRange range = ranges->At(i >> 1);

	3264 if (range.from() != special_class[i] \|\|

	3265 range.to() != special_class[i + 1] - 1) {

	3266 return false;

	3267 }

	3268 }

	3269 return true;

	3270 }

	3271

	3272

	3273 bool RegExpCharacterClass::is_standard() {

	3274 // TODO(lrn): Remove need for this function, by not throwing away information

	3275 // along the way.

	3276 if (is_negated_) {

	3277 return false;

	3278 }

	3279 if (set_.is_standard()) {

	3280 return true;

	3281 }

	3282 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

	3283 set_.set_standard_set_type('s');

	3284 return true;

	3285 }

	3286 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

	3287 set_.set_standard_set_type('S');

	3288 return true;

	3289 }

	3290 if (CompareInverseRanges(set_.ranges(),

	3291 kLineTerminatorRanges,

	3292 kLineTerminatorRangeCount)) {

	3293 set_.set_standard_set_type('.');

	3294 return true;

	3295 }

	3296 if (CompareRanges(set_.ranges(),

	3297 kLineTerminatorRanges,

	3298 kLineTerminatorRangeCount)) {

	3299 set_.set_standard_set_type('n');

	3300 return true;

	3301 }

	3302 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

	3303 set_.set_standard_set_type('w');

	3304 return true;

	3305 }

	3306 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

	3307 set_.set_standard_set_type('W');

	3308 return true;

	3309 }

	3310 return false;

	3311 }

	3312

	3313

	3314 void TextNode::MakeCaseIndependent(bool is_ascii) {

	3315 intptr_t element_count = elms_->length();

	3316 for (intptr_t i = 0; i < element_count; i++) {

	3317 TextElement elm = elms_->At(i);

	3318 if (elm.text_type() == TextElement::CHAR_CLASS) {

	3319 RegExpCharacterClass* cc = elm.char_class();

	3320 // None of the standard character classes is different in the case

	3321 // independent case and it slows us down if we don't know that.

	3322 if (cc->is_standard()) continue;

	3323 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	3324 intptr_t range_count = ranges->length();

	3325 for (intptr_t j = 0; j < range_count; j++) {

	3326 (*ranges)[j].AddCaseEquivalents(ranges, is_ascii, I);

	3327 }

	3328 }

	3329 }

	3330 }

	3331

	3332

	3333 intptr_t TextNode::GreedyLoopTextLength() {

	3334 TextElement elm = elms_->At(elms_->length() - 1);

	3335 return elm.cp_offset() + elm.length();

	3336 }

	3337

	3338

	3339 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

	3340 RegExpCompiler* compiler) {

	3341 if (elms_->length() != 1) return NULL;

	3342 TextElement elm = elms_->At(0);

	3343 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

	3344 RegExpCharacterClass* node = elm.char_class();

	3345 ZoneGrowableArray<CharacterRange>* ranges = node->ranges();

	3346 if (!CharacterRange::IsCanonical(ranges)) {

	3347 CharacterRange::Canonicalize(ranges);

	3348 }

	3349 if (node->is_negated()) {

	3350 return ranges->length() == 0 ? on_success() : NULL;

	3351 }

	3352 if (ranges->length() != 1) return NULL;

	3353 uint32_t max_char;

	3354 if (compiler->ascii()) {

	3355 max_char = Symbols::kMaxOneCharCodeSymbol;

	3356 } else {

	3357 max_char = Utf16::kMaxCodeUnit;

	3358 }

	3359 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL;

	3360 }

	3361

	3362

	3363 ActionNode* ActionNode::SetRegister(intptr_t reg,

	3364 intptr_t val,

	3365 RegExpNode* on_success) {

	3366 ActionNode* result =

	3367 new(on_success->isolate()) ActionNode(SET_REGISTER, on_success);

	3368 result->data_.u_store_register.reg = reg;

	3369 result->data_.u_store_register.value = val;

	3370 return result;

	3371 }

	3372

	3373

	3374 ActionNode* ActionNode::IncrementRegister(intptr_t reg,

	3375 RegExpNode* on_success) {

	3376 ActionNode* result =

	3377 new(on_success->isolate()) ActionNode(INCREMENT_REGISTER, on_success);

	3378 result->data_.u_increment_register.reg = reg;

	3379 return result;

	3380 }

	3381

	3382

	3383 ActionNode* ActionNode::StorePosition(intptr_t reg,

	3384 bool is_capture,

	3385 RegExpNode* on_success) {

	3386 ActionNode* result =

	3387 new(on_success->isolate()) ActionNode(STORE_POSITION, on_success);

	3388 result->data_.u_position_register.reg = reg;

	3389 result->data_.u_position_register.is_capture = is_capture;

	3390 return result;

	3391 }

	3392

	3393

	3394 ActionNode* ActionNode::ClearCaptures(Interval range,

	3395 RegExpNode* on_success) {

	3396 ActionNode* result =

	3397 new(on_success->isolate()) ActionNode(CLEAR_CAPTURES, on_success);

	3398 result->data_.u_clear_captures.range_from = range.from();

	3399 result->data_.u_clear_captures.range_to = range.to();

	3400 return result;

	3401 }

	3402

	3403

	3404 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg,

	3405 intptr_t position_reg,

	3406 RegExpNode* on_success) {

	3407 ActionNode* result =

	3408 new(on_success->isolate()) ActionNode(BEGIN_SUBMATCH, on_success);

	3409 result->data_.u_submatch.stack_pointer_register = stack_reg;

	3410 result->data_.u_submatch.current_position_register = position_reg;

	3411 return result;

	3412 }

	3413

	3414

	3415 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg,

	3416 intptr_t position_reg,

	3417 intptr_t clear_register_count,

	3418 intptr_t clear_register_from,

	3419 RegExpNode* on_success) {

	3420 ActionNode* result =

	3421 new(on_success->isolate()) ActionNode(POSITIVE_SUBMATCH_SUCCESS,

	3422 on_success);

	3423 result->data_.u_submatch.stack_pointer_register = stack_reg;

	3424 result->data_.u_submatch.current_position_register = position_reg;

	3425 result->data_.u_submatch.clear_register_count = clear_register_count;

	3426 result->data_.u_submatch.clear_register_from = clear_register_from;

	3427 return result;

	3428 }

	3429

	3430

	3431 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register,

	3432 intptr_t repetition_register,

	3433 intptr_t repetition_limit,

	3434 RegExpNode* on_success) {

	3435 ActionNode* result =

	3436 new(on_success->isolate()) ActionNode(EMPTY_MATCH_CHECK, on_success);

	3437 result->data_.u_empty_match_check.start_register = start_register;

	3438 result->data_.u_empty_match_check.repetition_register = repetition_register;

	3439 result->data_.u_empty_match_check.repetition_limit = repetition_limit;

	3440 return result;

	3441 }

	3442

	3443

	3444 TextElement TextElement::Atom(RegExpAtom* atom) {

	3445 return TextElement(ATOM, atom);

	3446 }

	3447

	3448

	3449 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {

	3450 return TextElement(CHAR_CLASS, char_class);

	3451 }

	3452

	3453

	3454 intptr_t TextElement::length() const {

	3455 switch (text_type()) {

	3456 case ATOM:

	3457 return atom()->length();

	3458

	3459 case CHAR_CLASS:

	3460 return 1;

	3461 }

	3462 UNREACHABLE();

	3463 return 0;

	3464 }

	3465

	3466

	3467 static void AddClass(const intptr_t* elmv,

	3468 intptr_t elmc,

	3469 ZoneGrowableArray<CharacterRange>* ranges) {

	3470 elmc--;

	3471 ASSERT(elmv[elmc] == 0x10000);

	3472 for (intptr_t i = 0; i < elmc; i += 2) {

	3473 ASSERT(elmv[i] < elmv[i + 1]);

	3474 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));

	3475 }

	3476 }

	3477

	3478

	3479 static void AddClassNegated(const intptr_t *elmv,

	3480 intptr_t elmc,

	3481 ZoneGrowableArray<CharacterRange>* ranges) {

	3482 elmc--;

	3483 ASSERT(elmv[elmc] == 0x10000);

	3484 ASSERT(elmv[0] != 0x0000);

	3485 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit);

	3486 uint16_t last = 0x0000;

	3487 for (intptr_t i = 0; i < elmc; i += 2) {

	3488 ASSERT(last <= elmv[i] - 1);

	3489 ASSERT(elmv[i] < elmv[i + 1]);

	3490 ranges->Add(CharacterRange(last, elmv[i] - 1));

	3491 last = elmv[i + 1];

	3492 }

	3493 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit));

	3494 }

	3495

	3496

	3497 void CharacterRange::AddClassEscape(uint16_t type,

	3498 ZoneGrowableArray<CharacterRange>* ranges) {

	3499 switch (type) {

	3500 case 's':

	3501 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);

	3502 break;

	3503 case 'S':

	3504 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);

	3505 break;

	3506 case 'w':

	3507 AddClass(kWordRanges, kWordRangeCount, ranges);

	3508 break;

	3509 case 'W':

	3510 AddClassNegated(kWordRanges, kWordRangeCount, ranges);

	3511 break;

	3512 case 'd':

	3513 AddClass(kDigitRanges, kDigitRangeCount, ranges);

	3514 break;

	3515 case 'D':

	3516 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);

	3517 break;

	3518 case '.':

	3519 AddClassNegated(kLineTerminatorRanges,

	3520 kLineTerminatorRangeCount,

	3521 ranges);

	3522 break;

	3523 // This is not a character range as defined by the spec but a

	3524 // convenient shorthand for a character class that matches any

	3525 // character.

	3526 case '*':

	3527 ranges->Add(CharacterRange::Everything());

	3528 break;

	3529 // This is the set of characters matched by the $ and ^ symbols

	3530 // in multiline mode.

	3531 case 'n':

	3532 AddClass(kLineTerminatorRanges,

	3533 kLineTerminatorRangeCount,

	3534 ranges);

	3535 break;

	3536 default:

	3537 UNREACHABLE();

	3538 }

	3539 }

	3540

	3541

	3542 // Move a number of elements in a zonelist to another position

	3543 // in the same list. Handles overlapping source and target areas.

	3544 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list,

	3545 intptr_t from,

	3546 intptr_t to,

	3547 intptr_t count) {

	3548 // Ranges are potentially overlapping.

	3549 if (from < to) {

	3550 for (intptr_t i = count - 1; i >= 0; i--) {

	3551 (*list)[to + i] = list->At(from + i);

	3552 }

	3553 } else {

	3554 for (intptr_t i = 0; i < count; i++) {

	3555 (*list)[to + i] = list->At(from + i);

	3556 }

	3557 }

	3558 }

	3559

	3560

	3561 static intptr_t InsertRangeInCanonicalList(

	3562 ZoneGrowableArray<CharacterRange>* list,

	3563 intptr_t count,

	3564 CharacterRange insert) {

	3565 // Inserts a range into list[0..count[, which must be sorted

	3566 // by from value and non-overlapping and non-adjacent, using at most

	3567 // list[0..count] for the result. Returns the number of resulting

	3568 // canonicalized ranges. Inserting a range may collapse existing ranges into

	3569 // fewer ranges, so the return value can be anything in the range 1..count+1.

	3570 uint16_t from = insert.from();

	3571 uint16_t to = insert.to();

	3572 intptr_t start_pos = 0;

	3573 intptr_t end_pos = count;

	3574 for (intptr_t i = count - 1; i >= 0; i--) {

	3575 CharacterRange current = list->At(i);

	3576 if (current.from() > to + 1) {

	3577 end_pos = i;

	3578 } else if (current.to() + 1 < from) {

	3579 start_pos = i + 1;

	3580 break;

	3581 }

	3582 }

	3583

	3584 // Inserted range overlaps, or is adjacent to, ranges at positions

	3585 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are

	3586 // not affected by the insertion.

	3587 // If start_pos == end_pos, the range must be inserted before start_pos.

	3588 // if start_pos < end_pos, the entire range from start_pos to end_pos

	3589 // must be merged with the insert range.

	3590

	3591 if (start_pos == end_pos) {

	3592 // Insert between existing ranges at position start_pos.

	3593 if (start_pos < count) {

	3594 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);

	3595 }

	3596 (*list)[start_pos] = insert;

	3597 return count + 1;

	3598 }

	3599 if (start_pos + 1 == end_pos) {

	3600 // Replace single existing range at position start_pos.

	3601 CharacterRange to_replace = list->At(start_pos);

	3602 intptr_t new_from = Utils::Minimum(to_replace.from(), from);

	3603 intptr_t new_to = Utils::Maximum(to_replace.to(), to);

	3604 (*list)[start_pos] = CharacterRange(new_from, new_to);

	3605 return count;

	3606 }

	3607 // Replace a number of existing ranges from start_pos to end_pos - 1.

	3608 // Move the remaining ranges down.

	3609

	3610 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from);

	3611 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to);

	3612 if (end_pos < count) {

	3613 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);

	3614 }

	3615 (*list)[start_pos] = CharacterRange(new_from, new_to);

	3616 return count - (end_pos - start_pos) + 1;

	3617 }

	3618

	3619

	3620 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) {

	3621 ASSERT(ranges != NULL);

	3622 intptr_t n = ranges->length();

	3623 if (n <= 1) return true;

	3624 intptr_t max = ranges->At(0).to();

	3625 for (intptr_t i = 1; i < n; i++) {

	3626 CharacterRange next_range = ranges->At(i);

	3627 if (next_range.from() <= max + 1) return false;

	3628 max = next_range.to();

	3629 }

	3630 return true;

	3631 }

	3632

	3633

	3634 void CharacterRange::Canonicalize(

	3635 ZoneGrowableArray<CharacterRange>* character_ranges) {

	3636 if (character_ranges->length() <= 1) return;

	3637 // Check whether ranges are already canonical (increasing, non-overlapping,

	3638 // non-adjacent).

	3639 intptr_t n = character_ranges->length();

	3640 intptr_t max = character_ranges->At(0).to();

	3641 intptr_t i = 1;

	3642 while (i < n) {

	3643 CharacterRange current = character_ranges->At(i);

	3644 if (current.from() <= max + 1) {

	3645 break;

	3646 }

	3647 max = current.to();

	3648 i++;

	3649 }

	3650 // Canonical until the i'th range. If that's all of them, we are done.

	3651 if (i == n) return;

	3652

	3653 // The ranges at index i and forward are not canonicalized. Make them so by

	3654 // doing the equivalent of insertion sort (inserting each into the previous

	3655 // list, in order).

	3656 // Notice that inserting a range can reduce the number of ranges in the

	3657 // result due to combining of adjacent and overlapping ranges.

	3658 intptr_t read = i; // Range to insert.

	3659 intptr_t num_canonical = i; // Length of canonicalized part of list.

	3660 do {

	3661 num_canonical = InsertRangeInCanonicalList(character_ranges,

	3662 num_canonical,

	3663 character_ranges->At(read));

	3664 read++;

	3665 } while (read < n);

	3666 character_ranges->TruncateTo(num_canonical);

	3667

	3668 ASSERT(CharacterRange::IsCanonical(character_ranges));

	3669 }

	3670

	3671

	3672 void CharacterSet::Canonicalize() {

	3673 // Special/default classes are always considered canonical. The result

	3674 // of calling ranges() will be sorted.

	3675 if (ranges_ == NULL) return;

	3676 CharacterRange::Canonicalize(ranges_);

	3677 }

	3678

	3679

	3680 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() {

	3681 if (ranges_ == NULL) {

	3682 ranges_ = new ZoneGrowableArray<CharacterRange>(2);

	3683 CharacterRange::AddClassEscape(standard_set_type_, ranges_);

	3684 }

	3685 return ranges_;

	3686 }

	3687

	3688

	3689 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges,

	3690 ZoneGrowableArray<CharacterRange>* negated_ranges) {

	3691 ASSERT(CharacterRange::IsCanonical(ranges));

	3692 ASSERT(negated_ranges->length() == 0);

	3693 intptr_t range_count = ranges->length();

	3694 uint16_t from = 0;

	3695 intptr_t i = 0;

	3696 if (range_count > 0 && ranges->At(0).from() == 0) {

	3697 from = ranges->At(0).to();

	3698 i = 1;

	3699 }

	3700 while (i < range_count) {

	3701 CharacterRange range = ranges->At(i);

	3702 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));

	3703 from = range.to();

	3704 i++;

	3705 }

	3706 if (from < Utf16::kMaxCodeUnit) {

	3707 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit));

	3708 }

	3709 }

	3710

	3711

	3712 void CharacterRange::AddCaseEquivalents(

	3713 ZoneGrowableArray<CharacterRange>* ranges,

	3714 bool is_ascii,

	3715 Isolate* isolate) {

	3716 uint16_t bottom = from();

	3717 uint16_t top = to();

	3718 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {

	3719 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;

	3720 if (top > Symbols::kMaxOneCharCodeSymbol) {

	3721 top = Symbols::kMaxOneCharCodeSymbol;

	3722 }

	3723 }

	3724

	3725 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	3726 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;

	3727 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3728 if (top == bottom) {

	3729 // If this is a singleton we just expand the one character.

	3730 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T

	3731 for (intptr_t i = 0; i < length; i++) {

	3732 uint32_t chr = chars[i];

	3733 if (chr != bottom) {

	3734 ranges->Add(CharacterRange::Singleton(chars[i]));

	3735 }

	3736 }

	3737 } else {

	3738 // If this is a range we expand the characters block by block,

	3739 // expanding contiguous subranges (blocks) one at a time.

	3740 // The approach is as follows. For a given start character we

	3741 // look up the remainder of the block that contains it (represented

	3742 // by the end point), for instance we find 'z' if the character

	3743 // is 'c'. A block is characterized by the property

	3744 // that all characters uncanonicalize in the same way, except that

	3745 // each entry in the result is incremented by the distance from the first

	3746 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and // NOLINT

	3747 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].

	3748 // Once we've found the end point we look up its uncanonicalization

	3749 // and produce a range for each element. For instance for [c-f]

	3750 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

	3751 // add a range if it is not already contained in the input, so [c-f]

	3752 // will be skipped but [C-F] will be added. If this range is not

	3753 // completely contained in a block we do this for all the blocks

	3754 // covered by the range (handling characters that is not in a block

	3755 // as a "singleton block").

	3756 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3757 intptr_t pos = bottom;

	3758 while (pos <= top) {

	3759 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);

	3760 uint16_t block_end;

	3761 if (length == 0) {

	3762 block_end = pos;

	3763 } else {

	3764 ASSERT(length == 1);

	3765 block_end = range[0];

	3766 }

	3767 intptr_t end = (block_end > top) ? top : block_end;

	3768 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT

	3769 for (intptr_t i = 0; i < length; i++) {

	3770 uint32_t c = range[i];

	3771 uint16_t range_from = c - (block_end - pos);

	3772 uint16_t range_to = c - (block_end - end);

	3773 if (!(bottom <= range_from && range_to <= top)) {

	3774 ranges->Add(CharacterRange(range_from, range_to));

	3775 }

	3776 }

	3777 pos = end + 1;

	3778 }

	3779 }

	3780 }

	3781

	3782

	3783 // -------------------------------------------------------------------

	3784 // Splay tree

	3785

	3786

	3787 // Workaround for the fact that ZoneGrowableArray does not have contains().

	3788 static bool ArrayContains(ZoneGrowableArray<unsigned>* array,

	3789 unsigned value) {

	3790 for (intptr_t i = 0; i < array->length(); i++) {

	3791 if (array->At(i) == value) {

	3792 return true;

	3793 }

	3794 }

	3795 return false;

	3796 }

	3797

	3798

	3799 void OutSet::Set(unsigned value, Isolate* isolate) {

	3800 if (value < kFirstLimit) {

	3801 first_ \|= (1 << value);

	3802 } else {

	3803 if (remaining_ == NULL)

	3804 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1);

	3805

	3806 bool remaining_contains_value = ArrayContains(remaining_, value);

	3807 if (remaining_->is_empty() \|\| !remaining_contains_value) {

	3808 remaining_->Add(value);

	3809 }

	3810 }

	3811 }

	3812

	3813

	3814 bool OutSet::Get(unsigned value) const {

	3815 if (value < kFirstLimit) {

	3816 return (first_ & (1 << value)) != 0;

	3817 } else if (remaining_ == NULL) {

	3818 return false;

	3819 } else {

	3820 return ArrayContains(remaining_, value);

	3821 }

	3822 }

	3823

	3824 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3825 RegExpCompiler* compiler,

	3826 intptr_t filled_in,

	3827 bool not_at_start) {

	3828 if (assertion_type_ == AT_START && not_at_start) {

	3829 details->set_cannot_match();

	3830 return;

	3831 }

	3832 return on_success()->GetQuickCheckDetails(details,

	3833 compiler,

	3834 filled_in,

	3835 not_at_start);

	3836 }

	3837

	3838

	3839 void GuardedAlternative::AddGuard(Guard* guard, Isolate* isolate) {

	3840 if (guards_ == NULL)

	3841 guards_ = new(isolate) ZoneGrowableArray<Guard*>(1);

	3842 guards_->Add(guard);

	3843 }

	3844

	3845

	3846 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {

	3847 ASSERT(loop_node_ == NULL);

	3848 AddAlternative(alt);

	3849 loop_node_ = alt.node();

	3850 }

	3851

	3852

	3853 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {

	3854 ASSERT(continue_node_ == NULL);

	3855 AddAlternative(alt);

	3856 continue_node_ = alt.node();

	3857 }

	3858

	3859

	3860 void LoopChoiceNode::Accept(NodeVisitor* visitor) {

	3861 visitor->VisitLoopChoice(this);

	3862 }

	3863

	3864

	3865 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,

	3866 intptr_t budget,

	3867 bool not_at_start) {

	3868 return EatsAtLeastHelper(still_to_find,

	3869 budget - 1,

	3870 loop_node_,

	3871 not_at_start);

	3872 }

	3873

	3874

	3875 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

	3876 RegExpCompiler* compiler,

	3877 intptr_t characters_filled_in,

	3878 bool not_at_start) {

	3879 not_at_start = (not_at_start \|\| not_at_start_);

	3880 intptr_t choice_count = alternatives_->length();

	3881 ASSERT(choice_count > 0);

	3882 (*alternatives_)[0].node()->GetQuickCheckDetails(details,

	3883 compiler,

	3884 characters_filled_in,

	3885 not_at_start);

	3886 for (intptr_t i = 1; i < choice_count; i++) {

	3887 QuickCheckDetails new_details(details->characters());

	3888 RegExpNode* node = (*alternatives_)[i].node();

	3889 node->GetQuickCheckDetails(&new_details, compiler,

	3890 characters_filled_in,

	3891 not_at_start);

	3892 // Here we merge the quick match details of the two branches.

	3893 details->Merge(&new_details, characters_filled_in);

	3894 }

	3895 }

	3896

	3897

	3898 void QuickCheckDetails::Clear() {

	3899 for (intptr_t i = 0; i < characters_; i++) {

	3900 positions_[i].mask = 0;

	3901 positions_[i].value = 0;

	3902 positions_[i].determines_perfectly = false;

	3903 }

	3904 characters_ = 0;

	3905 }

	3906

	3907

	3908 void QuickCheckDetails::Advance(intptr_t by, bool ascii) {

	3909 ASSERT(by >= 0);

	3910 if (by >= characters_) {

	3911 Clear();

	3912 return;

	3913 }

	3914 for (intptr_t i = 0; i < characters_ - by; i++) {

	3915 positions_[i] = positions_[by + i];

	3916 }

	3917 for (intptr_t i = characters_ - by; i < characters_; i++) {

	3918 positions_[i].mask = 0;

	3919 positions_[i].value = 0;

	3920 positions_[i].determines_perfectly = false;

	3921 }

	3922 characters_ -= by;

	3923 // We could change mask_ and value_ here but we would never advance unless

	3924 // they had already been used in a check and they won't be used again because

	3925 // it would gain us nothing. So there's no point.

	3926 }

	3927

	3928

	3929 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) {

	3930 ASSERT(characters_ == other->characters_);

	3931 if (other->cannot_match_) {

	3932 return;

	3933 }

	3934 if (cannot_match_) {

	3935 this = other;

	3936 return;

	3937 }

	3938 for (intptr_t i = from_index; i < characters_; i++) {

	3939 QuickCheckDetails::Position* pos = positions(i);

	3940 QuickCheckDetails::Position* other_pos = other->positions(i);

	3941 if (pos->mask != other_pos->mask \|\|

	3942 pos->value != other_pos->value \|\|

	3943 !other_pos->determines_perfectly) {

	3944 // Our mask-compare operation will be approximate unless we have the

	3945 // exact same operation on both sides of the alternation.

	3946 pos->determines_perfectly = false;

	3947 }

	3948 pos->mask &= other_pos->mask;

	3949 pos->value &= pos->mask;

	3950 other_pos->value &= pos->mask;

	3951 uint16_t differing_bits = (pos->value ^ other_pos->value);

	3952 pos->mask &= ~differing_bits;

	3953 pos->value &= pos->mask;

	3954 }

	3955 }

	3956

	3957

	3958 // Finds the fixed match length of a sequence of nodes that goes from

	3959 // this alternative and back to this choice node. If there are variable

	3960 // length nodes or other complications in the way then return a sentinel

	3961 // value indicating that a greedy loop cannot be constructed.

	3962 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative(

	3963 GuardedAlternative* alternative) {

	3964 intptr_t length = 0;

	3965 RegExpNode* node = alternative->node();

	3966 // Later we will generate code for all these text nodes using recursion

	3967 // so we have to limit the max number.

	3968 intptr_t recursion_depth = 0;

	3969 while (node != this) {

	3970 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {

	3971 return kNodeIsTooComplexForGreedyLoops;

	3972 }

	3973 intptr_t node_length = node->GreedyLoopTextLength();

	3974 if (node_length == kNodeIsTooComplexForGreedyLoops) {

	3975 return kNodeIsTooComplexForGreedyLoops;

	3976 }

	3977 length += node_length;

	3978 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);

	3979 node = seq_node->on_success();

	3980 }

	3981 return length;

	3982 }

	3983

	3984

	3985 void NegativeLookaheadChoiceNode::GetQuickCheckDetails(

	3986 QuickCheckDetails* details,

	3987 RegExpCompiler* compiler,

	3988 intptr_t filled_in,

	3989 bool not_at_start) {

	3990 // Alternative 0 is the negative lookahead, alternative 1 is what comes

	3991 // afterwards.

	3992 RegExpNode* node = (*alternatives_)[1].node();

	3993 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);

	3994 }

	3995

	3996

	3997 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {

	3998 return RegExpEngine::CompilationResult("RegExp too big");

	3999 }

	4000

	4001

	4002 // Attempts to compile the regexp using an Irregexp code generator. Returns

	4003 // a fixed array or a null handle depending on whether it succeeded.

	4004 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,

	4005 intptr_t specialization_cid)

	4006 : next_register_(2 * (capture_count + 1)),

	4007 work_list_(NULL),

	4008 recursion_depth_(0),

	4009 ignore_case_(ignore_case),

	4010 specialization_cid_(specialization_cid),

	4011 reg_exp_too_big_(false),

	4012 current_expansion_factor_(1),

	4013 isolate_(Isolate::Current()) {

	4014 accept_ = new(I) EndNode(EndNode::ACCEPT, I);

	4015 }

	4016

	4017

	4018 RegExpEngine::CompilationResult RegExpCompiler::Assemble(

	4019 IRRegExpMacroAssembler* macro_assembler,

	4020 RegExpNode* start,

	4021 intptr_t capture_count,

	4022 const String& pattern) {

	4023 static const bool use_slow_safe_regexp_compiler = false;

	4024

	4025 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);

	4026 macro_assembler_ = macro_assembler;

	4027

	4028 ZoneGrowableArray<RegExpNode*> work_list(0);

	4029 work_list_ = &work_list;

	4030 BlockLabel fail;

	4031 macro_assembler_->PushBacktrack(&fail);

	4032 Trace new_trace;

	4033 start->Emit(this, &new_trace);

	4034 macro_assembler_->BindBlock(&fail);

	4035 macro_assembler_->Fail();

	4036 while (!work_list.is_empty()) {

	4037 work_list.RemoveLast()->Emit(this, &new_trace);

	4038 }

	4039 if (reg_exp_too_big_) return IrregexpRegExpTooBig();

	4040

	4041 macro_assembler->FinalizeIndirectGotos();

	4042

	4043 return RegExpEngine::CompilationResult(macro_assembler,

	4044 macro_assembler->graph_entry(),

	4045 macro_assembler->num_blocks(),

	4046 macro_assembler->num_stack_locals());

	4047 }

	4048

	4049

	4050 RegExpEngine::CompilationResult RegExpEngine::Compile(

	4051 RegExpCompileData* data,

	4052 const ParsedFunction* parsed_function,

	4053 const ZoneGrowableArray<const ICData*>& ic_data_array) {

	4054 Isolate* isolate = Isolate::Current();

	4055

	4056 const Function& function = parsed_function->function();

	4057 const intptr_t specialization_cid = function.regexp_cid();

	4058 const bool is_ascii = (specialization_cid == kOneByteStringCid \|\|

	4059 specialization_cid == kExternalOneByteStringCid);

	4060 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp());

	4061 const String& pattern = String::Handle(isolate, regexp.pattern());

	4062

	4063 ASSERT(!regexp.IsNull());

	4064 ASSERT(!pattern.IsNull());

	4065

	4066 const bool ignore_case = regexp.is_ignore_case();

	4067 const bool is_global = regexp.is_global();

	4068

	4069 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid);

	4070

	4071 // TODO(jgruber): Frequency sampling is currently disabled because of several

	4072 // issues. We do not want to store subject strings in the regexp object since

	4073 // they might be long and we should not prevent their garbage collection.

	4074 // Passing them to this function explicitly does not help, since we must

	4075 // generate exactly the same IR for both the unoptimizing and optimizing

	4076 // pipelines (otherwise it gets confused when i.e. deopt id's differ).

	4077 // An option would be to store sampling results in the regexp object, but

	4078 // I'm not sure the performance gains are relevant enough.

	4079

	4080 // Wrap the body of the regexp in capture #0.

	4081 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,

	4082 0,

	4083 &compiler,

	4084 compiler.accept());

	4085

	4086 RegExpNode* node = captured_body;

	4087 bool is_start_anchored = data->tree->IsAnchoredAtStart();

	4088 bool is_end_anchored = data->tree->IsAnchoredAtEnd();

	4089 intptr_t max_length = data->tree->max_match();

	4090 if (!is_start_anchored) {

	4091 // Add a .*? at the beginning, outside the body capture, unless

	4092 // this expression is anchored at the beginning.

	4093 RegExpNode* loop_node =

	4094 RegExpQuantifier::ToNode(0,

	4095 RegExpTree::kInfinity,

	4096 false,

	4097 new(isolate) RegExpCharacterClass('*'),

	4098 &compiler,

	4099 captured_body,

	4100 data->contains_anchor);

	4101

	4102 if (data->contains_anchor) {

	4103 // Unroll loop once, to take care of the case that might start

	4104 // at the start of input.

	4105 ChoiceNode* first_step_node = new(isolate) ChoiceNode(2, isolate);

	4106 first_step_node->AddAlternative(GuardedAlternative(captured_body));

	4107 first_step_node->AddAlternative(GuardedAlternative(

	4108 new(isolate) TextNode(

	4109 new(isolate) RegExpCharacterClass('*'), loop_node)));

	4110 node = first_step_node;

	4111 } else {

	4112 node = loop_node;

	4113 }

	4114 }

	4115 if (is_ascii) {

	4116 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

	4117 // Do it again to propagate the new nodes to places where they were not

	4118 // put because they had not been calculated yet.

	4119 if (node != NULL) {

	4120 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

	4121 }

	4122 }

	4123

	4124 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate);

	4125 data->node = node;

	4126 Analysis analysis(ignore_case, is_ascii);

	4127 analysis.EnsureAnalyzed(node);

	4128 if (analysis.has_failed()) {

	4129 const char* error_message = analysis.error_message();

	4130 return CompilationResult(error_message);

	4131 }

	4132

	4133 // Native regexp implementation.

	4134

	4135 IRRegExpMacroAssembler* macro_assembler =

	4136 new(isolate) IRRegExpMacroAssembler(specialization_cid,

	4137 data->capture_count,

	4138 parsed_function,

	4139 ic_data_array,

	4140 isolate);

	4141

	4142 // Inserted here, instead of in Assembler, because it depends on information

	4143 // in the AST that isn't replicated in the Node structure.

	4144 static const intptr_t kMaxBacksearchLimit = 1024;

	4145 if (is_end_anchored &&

	4146 !is_start_anchored &&

	4147 max_length < kMaxBacksearchLimit) {

	4148 macro_assembler->SetCurrentPositionFromEnd(max_length);

	4149 }

	4150

	4151 if (is_global) {

	4152 macro_assembler->set_global_mode(

	4153 (data->tree->min_match() > 0)

	4154 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK

	4155 : RegExpMacroAssembler::GLOBAL);

	4156 }

	4157

	4158 RegExpEngine::CompilationResult result =

	4159 compiler.Assemble(macro_assembler,

	4160 node,

	4161 data->capture_count,

	4162 pattern);

	4163

	4164 if (FLAG_trace_irregexp) {

	4165 macro_assembler->PrintBlocks();

	4166 }

	4167

	4168 return result;

	4169 }

	4170

	4171

	4172 static void CreateSpecializedFunction(Isolate* isolate,

	4173 JSRegExp* regexp,
	Ivan Posva 2014/10/10 06:35:54 const JSRegExp& const JSRegExp& jgruber1 2014/10/10 09:29:44 Done. Show quoted text On 2014/10/10 06:35:54, Ivan Posva wrote: > const JSRegExp& Done.
	4174 intptr_t specialization_cid,

	4175 const Object& owner) {

	4176 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;

	4177

	4178 Function& fn = Function::Handle(isolate,

	4179 Function::New(String::Handle(isolate, Symbols::New("RegExp")),

	4180 RawFunction::kIrregexpFunction,

	4181 true, // Static.

	4182 false, // Not const.

	4183 false, // Not abstract.

	4184 false, // Not external.

	4185 false, // Not native.

	4186 owner,

	4187 0)); // Requires a non-negative token position.

	4188

	4189 fn.set_num_fixed_parameters(kParamCount);

	4190 fn.set_parameter_types(
	Ivan Posva 2014/10/10 06:35:54 These parameter arrays should be allocated only on These parameter arrays should be allocated only once and shared between all regiexp functions. They are identical among all of them. jgruber1 2014/10/10 09:29:44 Where should they be cached? FYI, CreateMethodExtr Show quoted text On 2014/10/10 06:35:54, Ivan Posva wrote: > These parameter arrays should be allocated only once and shared between all > regiexp functions. They are identical among all of them. Where should they be cached? FYI, CreateMethodExtractor also creates new parameter types/names arrays each time it is called. I added a TODO in the meantime. Vyacheslav Egorov (Google) 2014/10/10 09:43:25 For starters they could be cached outside of this Show quoted text On 2014/10/10 09:29:44, jgruber wrote: > On 2014/10/10 06:35:54, Ivan Posva wrote: > > These parameter arrays should be allocated only once and shared between all > > regiexp functions. They are identical among all of them. > > Where should they be cached? FYI, CreateMethodExtractor also creates new > parameter types/names arrays each time it is called. I added a TODO in the > meantime. For starters they could be cached outside of this function in the caller. That's a small easy win, but I don't think any further engineering with caching them globally is worth it in terms of memory consumption, unless there is a case where 10000s of RegExps are floating around.
	4191 Array::Handle(isolate, Array::New(kParamCount, Heap::kOld)));

	4192 fn.set_parameter_names(

	4193 Array::Handle(isolate, Array::New(kParamCount, Heap::kOld)));

	4194 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType()));

	4195 fn.SetParameterNameAt(0, Symbols::string_param_());

	4196 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType()));

	4197 fn.SetParameterNameAt(1, Symbols::start_index_param_());

	4198 fn.set_result_type(Type::Handle(isolate, Type::ArrayType()));

	4199

	4200 // Cache the result.

	4201 regexp->set_function(specialization_cid, fn);

	4202

	4203 fn.set_regexp(*regexp);

	4204 fn.set_regexp_cid(specialization_cid);

	4205

	4206 // The function is compiled lazily during the first call.

	4207 }

	4208

	4209

	4210 RawJSRegExp* RegExpEngine::CreateJSRegExp(Isolate* isolate,

	4211 const String& pattern,

	4212 bool multi_line,

	4213 bool ignore_case) {

	4214 JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0));

	4215

	4216 regexp.set_pattern(pattern);

	4217

	4218 if (multi_line) regexp.set_is_multi_line();
	Ivan Posva 2014/10/10 06:35:54 if (cond) { statements; } if (cond) { statements; } jgruber1 2014/10/10 09:29:44 Done. Show quoted text On 2014/10/10 06:35:54, Ivan Posva wrote: > if (cond) { > statements; > } Done.
	4219 if (ignore_case) regexp.set_is_ignore_case();

	4220

	4221 // TODO(jgruber): We might want to use normal string searching algorithms

	4222 // for simple patterns.

	4223 regexp.set_is_complex();

	4224 regexp.set_is_global(); // All dart regexps are global.

	4225

	4226 const Library& lib = Library::Handle(isolate, Library::CoreLibrary());

	4227 const Class& owner = Class::Handle(isolate,

	4228 lib.LookupClass(String::Handle(isolate, Symbols::New("RegExp"))));

	4229

	4230 CreateSpecializedFunction(isolate, &regexp, kOneByteStringCid, owner);

	4231 CreateSpecializedFunction(isolate, &regexp, kTwoByteStringCid, owner);

	4232 CreateSpecializedFunction(isolate, &regexp, kExternalOneByteStringCid, owner);

	4233 CreateSpecializedFunction(isolate, &regexp, kExternalTwoByteStringCid, owner);

	4234

	4235 return regexp.raw();

	4236 }

	4237

	4238

	4239 void BoyerMoorePositionInfo::Set(intptr_t character) {

	4240 SetInterval(Interval(character, character));

	4241 }

	4242

	4243

	4244 ContainedInLattice AddRange(ContainedInLattice containment,

	4245 const intptr_t* ranges,

	4246 intptr_t ranges_length,

	4247 Interval new_range) {

	4248 ASSERT((ranges_length & 1) == 1);

	4249 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);

	4250 if (containment == kLatticeUnknown) return containment;

	4251 bool inside = false;

	4252 intptr_t last = 0;

	4253 for (intptr_t i = 0; i < ranges_length;

	4254 inside = !inside, last = ranges[i], i++) {

	4255 // Consider the range from last to ranges[i].

	4256 // We haven't got to the new range yet.

	4257 if (ranges[i] <= new_range.from()) continue;

	4258 // New range is wholly inside last-ranges[i]. Note that new_range.to() is

	4259 // inclusive, but the values in ranges are not.

	4260 if (last <= new_range.from() && new_range.to() < ranges[i]) {

	4261 return Combine(containment, inside ? kLatticeIn : kLatticeOut);

	4262 }

	4263 return kLatticeUnknown;

	4264 }

	4265 return containment;

	4266 }

	4267

	4268

	4269 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

	4270 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

	4271 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

	4272 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

	4273 surrogate_ =

	4274 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);

	4275 if (interval.to() - interval.from() >= kMapSize - 1) {

	4276 if (map_count_ != kMapSize) {

	4277 map_count_ = kMapSize;

	4278 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;

	4279 }

	4280 return;

	4281 }

	4282 for (intptr_t i = interval.from(); i <= interval.to(); i++) {

	4283 intptr_t mod_character = (i & kMask);

	4284 if (!map_->At(mod_character)) {

	4285 map_count_++;

	4286 (*map_)[mod_character] = true;

	4287 }

	4288 if (map_count_ == kMapSize) return;

	4289 }

	4290 }

	4291

	4292

	4293 void BoyerMoorePositionInfo::SetAll() {

	4294 s_ = w_ = d_ = kLatticeUnknown;

	4295 if (map_count_ != kMapSize) {

	4296 map_count_ = kMapSize;

	4297 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;

	4298 }

	4299 }

	4300

	4301

	4302 BoyerMooreLookahead::BoyerMooreLookahead(

	4303 intptr_t length, RegExpCompiler* compiler, Isolate* isolate)

	4304 : length_(length),

	4305 compiler_(compiler) {

	4306 if (compiler->ascii()) {

	4307 max_char_ = Symbols::kMaxOneCharCodeSymbol;

	4308 } else {

	4309 max_char_ = Utf16::kMaxCodeUnit;

	4310 }

	4311 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);

	4312 for (intptr_t i = 0; i < length; i++) {

	4313 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate));

	4314 }

	4315 }

	4316

	4317

	4318 // Take all the characters that will not prevent a successful match if they

	4319 // occur in the subject string in the range between min_lookahead and

	4320 // max_lookahead (inclusive) measured from the current position. If the

	4321 // character at max_lookahead offset is not one of these characters, then we

	4322 // can safely skip forwards by the number of characters in the range.

	4323 intptr_t BoyerMooreLookahead::GetSkipTable(

	4324 intptr_t min_lookahead,

	4325 intptr_t max_lookahead,

	4326 const TypedData& boolean_skip_table) {

	4327 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4328

	4329 const intptr_t kSkipArrayEntry = 0;

	4330 const intptr_t kDontSkipArrayEntry = 1;

	4331

	4332 for (intptr_t i = 0; i < kSize; i++) {

	4333 boolean_skip_table.SetUint8(i, kSkipArrayEntry);

	4334 }

	4335 intptr_t skip = max_lookahead + 1 - min_lookahead;

	4336

	4337 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4338 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4339 for (intptr_t j = 0; j < kSize; j++) {

	4340 if (map->at(j)) {

	4341 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry);

	4342 }

	4343 }

	4344 }

	4345

	4346 return skip;

	4347 }

	4348

	4349

	4350 // Find the longest range of lookahead that has the fewest number of different

	4351 // characters that can occur at a given position. Since we are optimizing two

	4352 // different parameters at once this is a tradeoff.

	4353 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {

	4354 intptr_t biggest_points = 0;

	4355 // If more than 32 characters out of 128 can occur it is unlikely that we can

	4356 // be lucky enough to step forwards much of the time.

	4357 const intptr_t kMaxMax = 32;

	4358 for (intptr_t max_number_of_chars = 4;

	4359 max_number_of_chars < kMaxMax;

	4360 max_number_of_chars *= 2) {

	4361 biggest_points =

	4362 FindBestInterval(max_number_of_chars, biggest_points, from, to);

	4363 }

	4364 if (biggest_points == 0) return false;

	4365 return true;

	4366 }

	4367

	4368

	4369 // Find the highest-points range between 0 and length_ where the character

	4370 // information is not too vague. 'Too vague' means that there are more than

	4371 // max_number_of_chars that can occur at this position. Calculates the number

	4372 // of points as the product of width-of-the-range and

	4373 // probability-of-finding-one-of-the-characters, where the probability is

	4374 // calculated using the frequency distribution of the sample subject string.

	4375 intptr_t BoyerMooreLookahead::FindBestInterval(

	4376 intptr_t max_number_of_chars,

	4377 intptr_t old_biggest_points,

	4378 intptr_t* from,

	4379 intptr_t* to) {

	4380 intptr_t biggest_points = old_biggest_points;

	4381 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4382 for (intptr_t i = 0; i < length_; ) {

	4383 while (i < length_ && Count(i) > max_number_of_chars) i++;

	4384 if (i == length_) break;

	4385 intptr_t remembered_from = i;

	4386 bool union_map[kSize];

	4387 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false;

	4388 while (i < length_ && Count(i) <= max_number_of_chars) {

	4389 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4390 for (intptr_t j = 0; j < kSize; j++) union_map[j] \|= map->at(j);

	4391 i++;

	4392 }

	4393 intptr_t frequency = 0;

	4394 for (intptr_t j = 0; j < kSize; j++) {

	4395 if (union_map[j]) {

	4396 // Add 1 to the frequency to give a small per-character boost for

	4397 // the cases where our sampling is not good enough and many

	4398 // characters have a frequency of zero. This means the frequency

	4399 // can theoretically be up to 2*kSize though we treat it mostly as

	4400 // a fraction of kSize.

	4401 frequency += compiler_->frequency_collator()->Frequency(j) + 1;

	4402 }

	4403 }

	4404 // We use the probability of skipping times the distance we are skipping to

	4405 // judge the effectiveness of this. Actually we have a cut-off: By

	4406 // dividing by 2 we switch off the skipping if the probability of skipping

	4407 // is less than 50%. This is because the multibyte mask-and-compare

	4408 // skipping in quickcheck is more likely to do well on this case.

	4409 bool in_quickcheck_range = ((i - remembered_from < 4) \|\|

	4410 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));

	4411 // Called 'probability' but it is only a rough estimate and can actually

	4412 // be outside the 0-kSize range.

	4413 intptr_t probability =

	4414 (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

	4415 intptr_t points = (i - remembered_from) * probability;

	4416 if (points > biggest_points) {

	4417 *from = remembered_from;

	4418 *to = i - 1;

	4419 biggest_points = points;

	4420 }

	4421 }

	4422 return biggest_points;

	4423 }

	4424

	4425

	4426 // See comment above on the implementation of GetSkipTable.

	4427 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {

	4428 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4429

	4430 intptr_t min_lookahead = 0;

	4431 intptr_t max_lookahead = 0;

	4432

	4433 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;

	4434

	4435 bool found_single_character = false;

	4436 intptr_t single_character = 0;

	4437 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4438 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4439 if (map->map_count() > 1 \|\|

	4440 (found_single_character && map->map_count() != 0)) {

	4441 found_single_character = false;

	4442 break;

	4443 }

	4444 for (intptr_t j = 0; j < kSize; j++) {

	4445 if (map->at(j)) {

	4446 found_single_character = true;

	4447 single_character = j;

	4448 break;

	4449 }

	4450 }

	4451 }

	4452

	4453 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead;

	4454

	4455 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {

	4456 // The mask-compare can probably handle this better.

	4457 return false;

	4458 }

	4459

	4460 if (found_single_character) {

	4461 BlockLabel cont, again;

	4462 masm->BindBlock(&again);

	4463 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4464 if (max_char_ > kSize) {

	4465 masm->CheckCharacterAfterAnd(single_character,

	4466 RegExpMacroAssembler::kTableMask,

	4467 &cont);

	4468 } else {

	4469 masm->CheckCharacter(single_character, &cont);

	4470 }

	4471 masm->AdvanceCurrentPosition(lookahead_width);

	4472 masm->GoTo(&again);

	4473 masm->BindBlock(&cont);

	4474 return true;

	4475 }

	4476

	4477 const TypedData& boolean_skip_table = TypedData::ZoneHandle(

	4478 compiler_->isolate(),

	4479 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	4480 intptr_t skip_distance = GetSkipTable(

	4481 min_lookahead, max_lookahead, boolean_skip_table);

	4482 ASSERT(skip_distance != 0);

	4483

	4484 BlockLabel cont, again;

	4485

	4486 masm->BindBlock(&again);

	4487 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4488 masm->CheckBitInTable(boolean_skip_table, &cont);

	4489 masm->AdvanceCurrentPosition(skip_distance);

	4490 masm->GoTo(&again);

	4491 masm->BindBlock(&cont);

	4492

	4493 return true;

	4494 }

	4495

	4496

	4497 // -------------------------------------------------------------------

	4498 // Analysis

	4499

	4500

	4501 void Analysis::EnsureAnalyzed(RegExpNode* that) {

	4502 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)

	4503 return;

	4504 that->info()->being_analyzed = true;

	4505 that->Accept(this);

	4506 that->info()->being_analyzed = false;

	4507 that->info()->been_analyzed = true;

	4508 }

	4509

	4510

	4511 void Analysis::VisitEnd(EndNode* that) {

	4512 // nothing to do

	4513 }

	4514

	4515

	4516 void TextNode::CalculateOffsets() {

	4517 intptr_t element_count = elements()->length();

	4518 // Set up the offsets of the elements relative to the start. This is a fixed

	4519 // quantity since a TextNode can only contain fixed-width things.

	4520 intptr_t cp_offset = 0;

	4521 for (intptr_t i = 0; i < element_count; i++) {

	4522 TextElement& elm = (*elements())[i];

	4523 elm.set_cp_offset(cp_offset);

	4524 cp_offset += elm.length();

	4525 }

	4526 }

	4527

	4528

	4529 void Analysis::VisitText(TextNode* that) {

	4530 if (ignore_case_) {

	4531 that->MakeCaseIndependent(is_ascii_);

	4532 }

	4533 EnsureAnalyzed(that->on_success());

	4534 if (!has_failed()) {

	4535 that->CalculateOffsets();

	4536 }

	4537 }

	4538

	4539

	4540 void Analysis::VisitAction(ActionNode* that) {

	4541 RegExpNode* target = that->on_success();

	4542 EnsureAnalyzed(target);

	4543 if (!has_failed()) {

	4544 // If the next node is interested in what it follows then this node

	4545 // has to be interested too so it can pass the information on.

	4546 that->info()->AddFromFollowing(target->info());

	4547 }

	4548 }

	4549

	4550

	4551 void Analysis::VisitChoice(ChoiceNode* that) {

	4552 NodeInfo* info = that->info();

	4553 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4554 RegExpNode* node = (*that->alternatives())[i].node();

	4555 EnsureAnalyzed(node);

	4556 if (has_failed()) return;

	4557 // Anything the following nodes need to know has to be known by

	4558 // this node also, so it can pass it on.

	4559 info->AddFromFollowing(node->info());

	4560 }

	4561 }

	4562

	4563

	4564 void Analysis::VisitLoopChoice(LoopChoiceNode* that) {

	4565 NodeInfo* info = that->info();

	4566 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4567 RegExpNode* node = (*that->alternatives())[i].node();

	4568 if (node != that->loop_node()) {

	4569 EnsureAnalyzed(node);

	4570 if (has_failed()) return;

	4571 info->AddFromFollowing(node->info());

	4572 }

	4573 }

	4574 // Check the loop last since it may need the value of this node

	4575 // to get a correct result.

	4576 EnsureAnalyzed(that->loop_node());

	4577 if (!has_failed()) {

	4578 info->AddFromFollowing(that->loop_node()->info());

	4579 }

	4580 }

	4581

	4582

	4583 void Analysis::VisitBackReference(BackReferenceNode* that) {

	4584 EnsureAnalyzed(that->on_success());

	4585 }

	4586

	4587

	4588 void Analysis::VisitAssertion(AssertionNode* that) {

	4589 EnsureAnalyzed(that->on_success());

	4590 }

	4591

	4592

	4593 // -------------------------------------------------------------------

	4594 // Dot/dotty output

	4595

	4596

	4597 #ifdef DEBUG

	4598

	4599

	4600 class DotPrinter: public NodeVisitor {

	4601 public:

	4602 explicit DotPrinter(bool ignore_case)

	4603 : ignore_case_(ignore_case) {}

	4604 void PrintNode(const char* label, RegExpNode* node);

	4605 void Visit(RegExpNode* node);

	4606 void PrintAttributes(RegExpNode* from);

	4607 void PrintOnFailure(RegExpNode* from, RegExpNode* to);

	4608 #define DECLARE_VISIT(Type) \

	4609 virtual void Visit##Type(Type##Node* that);

	4610 FOR_EACH_NODE_TYPE(DECLARE_VISIT)

	4611 #undef DECLARE_VISIT

	4612 private:

	4613 bool ignore_case_;

	4614 };

	4615

	4616

	4617 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {

	4618 OS::Print("digraph G {\n graph [label=\"");

	4619 for (intptr_t i = 0; label[i]; i++) {

	4620 switch (label[i]) {

	4621 case '\\':

	4622 OS::Print("\\\\");

	4623 break;

	4624 case '"':

	4625 OS::Print("\"");

	4626 break;

	4627 default:

	4628 OS::Print("%c", label[i]);

	4629 break;

	4630 }

	4631 }

	4632 OS::Print("\"];\n");

	4633 Visit(node);

	4634 OS::Print("}\n");

	4635 }

	4636

	4637

	4638 void DotPrinter::Visit(RegExpNode* node) {

	4639 if (node->info()->visited) return;

	4640 node->info()->visited = true;

	4641 node->Accept(this);

	4642 }

	4643

	4644

	4645 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {

	4646 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure);

	4647 Visit(on_failure);

	4648 }

	4649

	4650

	4651 class AttributePrinter : public ValueObject {

	4652 public:

	4653 AttributePrinter() : first_(true) {}

	4654 void PrintSeparator() {

	4655 if (first_) {

	4656 first_ = false;

	4657 } else {

	4658 OS::Print("\|");

	4659 }

	4660 }

	4661 void PrintBit(const char* name, bool value) {

	4662 if (!value) return;

	4663 PrintSeparator();

	4664 OS::Print("{%s}", name);

	4665 }

	4666 void PrintPositive(const char* name, intptr_t value) {

	4667 if (value < 0) return;

	4668 PrintSeparator();

	4669 OS::Print("{%s\|%" Pd "}", name, value);

	4670 }

	4671

	4672 private:

	4673 bool first_;

	4674 };

	4675

	4676

	4677 void DotPrinter::PrintAttributes(RegExpNode* that) {

	4678 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "

	4679 "margin=0.1, fontsize=10, label=\"{", that);

	4680 AttributePrinter printer;

	4681 NodeInfo* info = that->info();

	4682 printer.PrintBit("NI", info->follows_newline_interest);

	4683 printer.PrintBit("WI", info->follows_word_interest);

	4684 printer.PrintBit("SI", info->follows_start_interest);

	4685 BlockLabel* label = that->label();

	4686 if (label->IsBound())

	4687 printer.PrintPositive("@", label->Position());

	4688 OS::Print("}\"];\n"

	4689 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",

	4690 that, that);

	4691 }

	4692

	4693

	4694 static const bool kPrintDispatchTable = false;

	4695 void DotPrinter::VisitChoice(ChoiceNode* that) {

	4696 if (kPrintDispatchTable) {

	4697 OS::Print(" n%p [shape=Mrecord, label=\"", that);

	4698 UNIMPLEMENTED();

	4699 } else {

	4700 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that);

	4701 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4702 GuardedAlternative alt = that->alternatives()->At(i);

	4703 OS::Print(" n%p -> n%p", that, alt.node());

	4704 }

	4705 }

	4706 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {

	4707 GuardedAlternative alt = that->alternatives()->At(i);

	4708 alt.node()->Accept(this);

	4709 }

	4710 }

	4711

	4712

	4713 void DotPrinter::VisitText(TextNode* that) {

	4714 OS::Print(" n%p [label=\"", that);

	4715 for (intptr_t i = 0; i < that->elements()->length(); i++) {

	4716 if (i > 0) OS::Print(" ");

	4717 TextElement elm = that->elements()->At(i);

	4718 switch (elm.text_type()) {

	4719 case TextElement::ATOM: {

	4720 ZoneGrowableArray<uint16_t>* data = elm.atom()->data();

	4721 for (intptr_t i = 0; i < data->length(); i++) {

	4722 OS::Print("%c", static_cast<char>(data->At(i)));

	4723 }

	4724 break;

	4725 }

	4726 case TextElement::CHAR_CLASS: {

	4727 RegExpCharacterClass* node = elm.char_class();

	4728 OS::Print("[");

	4729 if (node->is_negated()) OS::Print("^");

	4730 for (intptr_t j = 0; j < node->ranges()->length(); j++) {

	4731 CharacterRange range = node->ranges()->At(j);

	4732 PrintUtf16(range.from());

	4733 OS::Print("-");

	4734 PrintUtf16(range.to());

	4735 }

	4736 OS::Print("]");

	4737 break;

	4738 }

	4739 default:

	4740 UNREACHABLE();

	4741 }

	4742 }

	4743 OS::Print("\", shape=box, peripheries=2];\n");

	4744 PrintAttributes(that);

	4745 OS::Print(" n%p -> n%p;\n", that, that->on_success());

	4746 Visit(that->on_success());

	4747 }

	4748

	4749

	4750 void DotPrinter::VisitBackReference(BackReferenceNode* that) {

	4751 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n",

	4752 that, that->start_register(), that->end_register());

	4753 PrintAttributes(that);

	4754 OS::Print(" n%p -> n%p;\n", that, that->on_success());

	4755 Visit(that->on_success());

	4756 }

	4757

	4758

	4759 void DotPrinter::VisitEnd(EndNode* that) {

	4760 OS::Print(" n%p [style=bold, shape=point];\n", that);

	4761 PrintAttributes(that);

	4762 }

	4763

	4764

	4765 void DotPrinter::VisitAssertion(AssertionNode* that) {

	4766 OS::Print(" n%p [", that);

	4767 switch (that->assertion_type()) {

	4768 case AssertionNode::AT_END:

	4769 OS::Print("label=\"$\", shape=septagon");

	4770 break;

	4771 case AssertionNode::AT_START:

	4772 OS::Print("label=\"^\", shape=septagon");

	4773 break;

	4774 case AssertionNode::AT_BOUNDARY:

	4775 OS::Print("label=\"\\b\", shape=septagon");

	4776 break;

	4777 case AssertionNode::AT_NON_BOUNDARY:

	4778 OS::Print("label=\"\\B\", shape=septagon");

	4779 break;

	4780 case AssertionNode::AFTER_NEWLINE:

	4781 OS::Print("label=\"(?<=\\n)\", shape=septagon");

	4782 break;

	4783 }

	4784 OS::Print("];\n");

	4785 PrintAttributes(that);

	4786 RegExpNode* successor = that->on_success();

	4787 OS::Print(" n%p -> n%p;\n", that, successor);

	4788 Visit(successor);

	4789 }

	4790

	4791

	4792 void DotPrinter::VisitAction(ActionNode* that) {

	4793 OS::Print(" n%p [", that);

	4794 switch (that->action_type_) {

	4795 case ActionNode::SET_REGISTER:

	4796 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon",

	4797 that->data_.u_store_register.reg,

	4798 that->data_.u_store_register.value);

	4799 break;

	4800 case ActionNode::INCREMENT_REGISTER:

	4801 OS::Print("label=\"$%" Pd "++\", shape=octagon",

	4802 that->data_.u_increment_register.reg);

	4803 break;

	4804 case ActionNode::STORE_POSITION:

	4805 OS::Print("label=\"$%" Pd ":=$pos\", shape=octagon",

	4806 that->data_.u_position_register.reg);

	4807 break;

	4808 case ActionNode::BEGIN_SUBMATCH:

	4809 OS::Print("label=\"$%" Pd ":=$pos,begin\", shape=septagon",

	4810 that->data_.u_submatch.current_position_register);

	4811 break;

	4812 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:

	4813 OS::Print("label=\"escape\", shape=septagon");

	4814 break;

	4815 case ActionNode::EMPTY_MATCH_CHECK:

	4816 OS::Print("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon",

	4817 that->data_.u_empty_match_check.start_register,

	4818 that->data_.u_empty_match_check.repetition_register,

	4819 that->data_.u_empty_match_check.repetition_limit);

	4820 break;

	4821 case ActionNode::CLEAR_CAPTURES: {

	4822 OS::Print("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon",

	4823 that->data_.u_clear_captures.range_from,

	4824 that->data_.u_clear_captures.range_to);

	4825 break;

	4826 }

	4827 }

	4828 OS::Print("];\n");

	4829 PrintAttributes(that);

	4830 RegExpNode* successor = that->on_success();

	4831 OS::Print(" n%p -> n%p;\n", that, successor);

	4832 Visit(successor);

	4833 }

	4834

	4835

	4836 void RegExpEngine::DotPrint(const char* label,

	4837 RegExpNode* node,

	4838 bool ignore_case) {

	4839 DotPrinter printer(ignore_case);

	4840 printer.PrintNode(label, node);

	4841 }

	4842

	4843

	4844 #endif // DEBUG

	4845

	4846 } // namespace dart

OLD	NEW

« runtime/vm/raw_object.h ('K') | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »