runtime/vm/regexp.cc - Issue 539153002: Port and integrate the irregexp engine from V8

Side by Side Diff: runtime/vm/regexp.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/regexp.h"	5 #include "vm/regexp.h"

6	6

	7 #include "vm/dart_entry.h"

	8 #include "vm/regexp_assembler.h"

7 #include "vm/regexp_ast.h"	9 #include "vm/regexp_ast.h"

	10 #include "vm/unibrow-inl.h"

8 #include "vm/unicode.h"	11 #include "vm/unicode.h"

9 #include "vm/symbols.h"	12 #include "vm/symbols.h"

10	13

11 #define I isolate()	14 #define I isolate()

12 #define CI compiler->isolate()	15 #define CI compiler->isolate()

13	16

14 namespace dart {	17 namespace dart {

15	18

	19 DECLARE_FLAG(bool, trace_irregexp);

16	20

17 #define DEFINE_ACCEPT(Type) \	21 #define DEFINE_ACCEPT(Type) \

18 void Type##Node::Accept(NodeVisitor* visitor) { \	22 void Type##Node::Accept(NodeVisitor* visitor) { \

19 visitor->Visit##Type(this); \	23 visitor->Visit##Type(this); \

20 }	24 }

21 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)	25 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)

22 #undef DEFINE_ACCEPT	26 #undef DEFINE_ACCEPT

23	27

24	28

25 // Default to generating optimized regexp code.	29 // Default to generating optimized regexp code.

26 static const bool kRegexpOptimization = true;	30 static const bool kRegexpOptimization = true;

27	31

28

29 // More makes code generation slower, less makes V8 benchmark score lower.	32 // More makes code generation slower, less makes V8 benchmark score lower.

30 static const intptr_t kMaxLookaheadForBoyerMoore = 8;	33 static const intptr_t kMaxLookaheadForBoyerMoore = 8;

	34

31 // In a 3-character pattern you can maximally step forwards 3 characters	35 // In a 3-character pattern you can maximally step forwards 3 characters

32 // at a time, which is not always enough to pay for the extra logic.	36 // at a time, which is not always enough to pay for the extra logic.

33 static const intptr_t kPatternTooShortForBoyerMoore = 2;	37 static const intptr_t kPatternTooShortForBoyerMoore = 2;

34	38

35

36 // The '2' variant has inclusive from and exclusive to.	39 // The '2' variant has inclusive from and exclusive to.

37 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,	40 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,

38 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.	41 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.

39 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,	42 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,

40 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,	43 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,

41 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,	44 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,

42 0xFEFF, 0xFF00, 0x10000 };	45 0xFEFF, 0xFF00, 0x10000 };

43 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);	46 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);

44 static const intptr_t kWordRanges[] = {	47 static const intptr_t kWordRanges[] = {

45 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };	48 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };

46 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);	49 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);

47 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 };	50 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 };

48 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);	51 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);

	52 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };

	53 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);

49 static const intptr_t kLineTerminatorRanges[] = {	54 static const intptr_t kLineTerminatorRanges[] = {

50 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 };	55 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 };

51 static const intptr_t kLineTerminatorRangeCount =	56 static const intptr_t kLineTerminatorRangeCount =

52 ARRAY_SIZE(kLineTerminatorRanges);	57 ARRAY_SIZE(kLineTerminatorRanges);

53	58

54	59

55 // TODO(jgruber): Remove me.

56 class RegExpMacroAssembler {

57 public:

58 // The implementation must be able to handle at least:

59 static const intptr_t kMaxRegister = (1 << 16) - 1;

60 };

61

62

63 // TODO(jgruber): Copied from regexp_ast.cc

64 static inline void PrintUtf16(uint16_t c) {	60 static inline void PrintUtf16(uint16_t c) {

65 const char* format = (0x20 <= c && c <= 0x7F)	61 const char* format = (0x20 <= c && c <= 0x7F) ?

66 ? "%c"	62 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";

67 : (c <= 0xff) ? "\\x%02x" : "\\u%04x";

68 OS::Print(format, c);	63 OS::Print(format, c);

69 }	64 }

70	65

71	66

72 // We need to check for the following characters: 0x39c 0x3bc 0x178.	67 // We need to check for the following characters: 0x39c 0x3bc 0x178.

73 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {	68 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {

74 // TODO(dcarney): this could be a lot more efficient.	69 // TODO(dcarney): this could be a lot more efficient.

75 return range.Contains(0x39c) \|\|	70 return range.Contains(0x39c) \|\|

76 range.Contains(0x3bc) \|\| range.Contains(0x178);	71 range.Contains(0x3bc) \|\| range.Contains(0x178);

77 }	72 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
126 return next_register_;	121 return next_register_;

127 }	122 }

128 return next_register_++;	123 return next_register_++;

129 }	124 }

130	125

131 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,	126 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,

132 RegExpNode* start,	127 RegExpNode* start,

133 intptr_t capture_count,	128 intptr_t capture_count,

134 const String& pattern);	129 const String& pattern);

135	130

	131 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

	132

136 static const intptr_t kImplementationOffset = 0;	133 static const intptr_t kImplementationOffset = 0;

137 static const intptr_t kNumberOfRegistersOffset = 0;	134 static const intptr_t kNumberOfRegistersOffset = 0;

138 static const intptr_t kCodeOffset = 1;	135 static const intptr_t kCodeOffset = 1;

139	136

140 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }	137 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

141 EndNode* accept() { return accept_; }	138 EndNode* accept() { return accept_; }

142	139

143 static const intptr_t kMaxRecursion = 100;	140 static const intptr_t kMaxRecursion = 100;

144 inline intptr_t recursion_depth() { return recursion_depth_; }	141 inline intptr_t recursion_depth() { return recursion_depth_; }

145 inline void IncrementRecursionDepth() { recursion_depth_++; }	142 inline void IncrementRecursionDepth() { recursion_depth_++; }

146 inline void DecrementRecursionDepth() { recursion_depth_--; }	143 inline void DecrementRecursionDepth() { recursion_depth_--; }

147	144

148 void SetRegExpTooBig() { reg_exp_too_big_ = true; }	145 void SetRegExpTooBig() { reg_exp_too_big_ = true; }

149	146

150 inline bool ignore_case() { return ignore_case_; }	147 inline bool ignore_case() { return ignore_case_; }

151 inline bool ascii() { return ascii_; }	148 inline bool ascii() { return ascii_; }

152	149

153 intptr_t current_expansion_factor() { return current_expansion_factor_; }	150 intptr_t current_expansion_factor() { return current_expansion_factor_; }

154 void set_current_expansion_factor(intptr_t value) {	151 void set_current_expansion_factor(intptr_t value) {

155 current_expansion_factor_ = value;	152 current_expansion_factor_ = value;

156 }	153 }

157	154

158 Isolate* isolate() const { return isolate_; }	155 Isolate* isolate() const { return isolate_; }

159	156

160 static const intptr_t kNoRegister = -1;	157 static const intptr_t kNoRegister = -1;

161	158

162 private:	159 private:

163 EndNode* accept_;	160 EndNode* accept_;

164 intptr_t next_register_;	161 intptr_t next_register_;

	162 ZoneGrowableArray<RegExpNode> work_list_;

165 intptr_t recursion_depth_;	163 intptr_t recursion_depth_;

166 RegExpMacroAssembler* macro_assembler_;	164 RegExpMacroAssembler* macro_assembler_;

167 bool ignore_case_;	165 bool ignore_case_;

168 bool ascii_;	166 bool ascii_;

169 bool reg_exp_too_big_;	167 bool reg_exp_too_big_;

170 intptr_t current_expansion_factor_;	168 intptr_t current_expansion_factor_;

171 Isolate* isolate_;	169 Isolate* isolate_;

172 };	170 };

173	171

174	172

	173 class RecursionCheck {

	174 public:

	175 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

	176 compiler->IncrementRecursionDepth();

	177 }

	178 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

	179 private:

	180 RegExpCompiler* compiler_;

	181 };

	182

	183

175 // Scoped object to keep track of how much we unroll quantifier loops in the	184 // Scoped object to keep track of how much we unroll quantifier loops in the

176 // regexp graph generator.	185 // regexp graph generator.

177 class RegExpExpansionLimiter {	186 class RegExpExpansionLimiter {

178 public:	187 public:

179 static const intptr_t kMaxExpansionFactor = 6;	188 static const intptr_t kMaxExpansionFactor = 6;

180 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)	189 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)

181 : compiler_(compiler),	190 : compiler_(compiler),

182 saved_expansion_factor_(compiler->current_expansion_factor()),	191 saved_expansion_factor_(compiler->current_expansion_factor()),

183 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {	192 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {

184 ASSERT(factor > 0);	193 ASSERT(factor > 0);

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
224 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,	233 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

225 RegExpNode* on_success) {	234 RegExpNode* on_success) {

226 ZoneGrowableArray<TextElement>* elms =	235 ZoneGrowableArray<TextElement>* elms =

227 new(CI) ZoneGrowableArray<TextElement>(1);	236 new(CI) ZoneGrowableArray<TextElement>(1);

228 for (intptr_t i = 0; i < elements()->length(); i++) {	237 for (intptr_t i = 0; i < elements()->length(); i++) {

229 elms->Add(elements()->At(i));	238 elms->Add(elements()->At(i));

230 }	239 }

231 return new(CI) TextNode(elms, on_success);	240 return new(CI) TextNode(elms, on_success);

232 }	241 }

233	242

	243

234 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,	244 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

235 RegExpNode* on_success) {	245 RegExpNode* on_success) {

236 return new(CI) TextNode(this, on_success);	246 return new(CI) TextNode(this, on_success);

237 }	247 }

238	248

239	249

240 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,	250 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,

241 RegExpNode* on_success) {	251 RegExpNode* on_success) {

242 ZoneGrowableArray<RegExpTree> alternatives = this->alternatives();	252 ZoneGrowableArray<RegExpTree> alternatives = this->alternatives();

243 intptr_t length = alternatives->length();	253 intptr_t length = alternatives->length();

(...skipping 474 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
718 // we don't need to check it.	728 // we don't need to check it.

719 if (neg_replacement == NULL) return set_replacement(replacement);	729 if (neg_replacement == NULL) return set_replacement(replacement);

720 alternatives_->At(0).set_node(neg_replacement);	730 alternatives_->At(0).set_node(neg_replacement);

721 return set_replacement(this);	731 return set_replacement(this);

722 }	732 }

723	733

724	734

725 // Code emission ---------------------------------------------------------------	735 // Code emission ---------------------------------------------------------------

726	736

727	737

728 #define DEFINE_EMIT(Type) \	738 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

729 void Type##Node::Emit(RegExpCompiler* compiler, Trace* trace) { \	739 Guard* guard,

730 UNIMPLEMENTED(); \	740 Trace* trace) {

731 }	741 switch (guard->op()) {

732 FOR_EACH_NODE_TYPE(DEFINE_EMIT)	742 case Guard::LT:

733 DEFINE_EMIT(LoopChoice)	743 ASSERT(!trace->mentions_reg(guard->reg()));

734 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler,	744 macro_assembler->IfRegisterGE(guard->reg(),

735 Trace* trace) {	745 guard->value(),

	746 trace->backtrack());

	747 break;

	748 case Guard::GEQ:

	749 ASSERT(!trace->mentions_reg(guard->reg()));

	750 macro_assembler->IfRegisterLT(guard->reg(),

	751 guard->value(),

	752 trace->backtrack());

	753 break;

	754 }

	755 }

	756

	757

	758 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

	759 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	760

	761 // Omit flushing the trace. We discard the entire stack frame anyway.

	762

	763 if (!label()->IsBound()) {

	764 // We are completely independent of the trace, since we ignore it,

	765 // so this code can be used as the generic version.

	766 assembler->BindBlock(label());

	767 }

	768

	769 // Throw away everything on the backtrack stack since the start

	770 // of the negative submatch and restore the character position.

	771 assembler->ReadCurrentPositionFromRegister(current_position_register_);

	772 assembler->ReadStackPointerFromRegister(stack_pointer_register_);

	773 if (clear_capture_count_ > 0) {

	774 // Clear any captures that might have been performed during the success

	775 // of the body of the negative look-ahead.

	776 intptr_t clear_capture_end =

	777 clear_capture_start_ + clear_capture_count_ - 1;

	778 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);

	779 }

	780 // Now that we have unwound the stack we find at the top of the stack the

	781 // backtrack that the BeginSubmatch node got.

	782 assembler->Backtrack();

	783 }

	784

	785

	786 bool Trace::GetStoredPosition(intptr_t reg, int* cp_offset) {

	787 ASSERT(*cp_offset == 0);

	788 for (DeferredAction* action = actions_;

	789 action != NULL;

	790 action = action->next()) {

	791 if (action->Mentions(reg)) {

	792 if (action->action_type() == ActionNode::STORE_POSITION) {

	793 cp_offset = static_cast<DeferredCapture>(action)->cp_offset();

	794 return true;

	795 } else {

	796 return false;

	797 }

	798 }

	799 }

	800 return false;

	801 }

	802

	803

	804 // This is called as we come into a loop choice node and some other tricky

	805 // nodes. It normalizes the state of the code generator to ensure we can

	806 // generate generic code.

	807 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers,

	808 Isolate* isolate) {

	809 intptr_t max_register = RegExpCompiler::kNoRegister;

	810 for (DeferredAction* action = actions_;

	811 action != NULL;

	812 action = action->next()) {

	813 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {

	814 Interval range = static_cast<DeferredClearCaptures*>(action)->range();

	815 for (intptr_t i = range.from(); i <= range.to(); i++)

	816 affected_registers->Set(i, isolate);

	817 if (range.to() > max_register) max_register = range.to();

	818 } else {

	819 affected_registers->Set(action->reg(), isolate);

	820 if (action->reg() > max_register) max_register = action->reg();

	821 }

	822 }

	823 return max_register;

	824 }

	825

	826

	827 bool Trace::DeferredAction::Mentions(intptr_t that) {

	828 if (action_type() == ActionNode::CLEAR_CAPTURES) {

	829 Interval range = static_cast<DeferredClearCaptures*>(this)->range();

	830 return range.Contains(that);

	831 } else {

	832 return reg() == that;

	833 }

	834 }

	835

	836

	837 bool Trace::mentions_reg(intptr_t reg) {

	838 for (DeferredAction* action = actions_;

	839 action != NULL;

	840 action = action->next()) {

	841 if (action->Mentions(reg))

	842 return true;

	843 }

	844 return false;

	845 }

	846

	847

	848 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,

	849 intptr_t max_register,

	850 const OutSet& registers_to_pop,

	851 const OutSet& registers_to_clear) {

	852 for (intptr_t reg = max_register; reg >= 0; reg--) {

	853 if (registers_to_pop.Get(reg)) {

	854 assembler->PopRegister(reg);

	855 } else if (registers_to_clear.Get(reg)) {

	856 intptr_t clear_to = reg;

	857 while (reg > 0 && registers_to_clear.Get(reg - 1)) {

	858 reg--;

	859 }

	860 assembler->ClearRegisters(reg, clear_to);

	861 }

	862 }

	863 }

	864

	865

	866 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,

	867 intptr_t max_register,

	868 const OutSet& affected_registers,

	869 OutSet* registers_to_pop,

	870 OutSet* registers_to_clear,

	871 Isolate* isolate) {

	872 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.

	873 const intptr_t push_limit = (assembler->stack_limit_slack() + 1) / 2;

	874

	875 // Count pushes performed to force a stack limit check occasionally.

	876 intptr_t pushes = 0;

	877

	878 for (intptr_t reg = 0; reg <= max_register; reg++) {

	879 if (!affected_registers.Get(reg)) {

	880 continue;

	881 }

	882

	883 // The chronologically first deferred action in the trace

	884 // is used to infer the action needed to restore a register

	885 // to its previous state (or not, if it's safe to ignore it).

	886 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

	887 DeferredActionUndoType undo_action = IGNORE;

	888

	889 intptr_t value = 0;

	890 bool absolute = false;

	891 bool clear = false;

	892 intptr_t store_position = -1;

	893 // This is a little tricky because we are scanning the actions in reverse

	894 // historical order (newest first).

	895 for (DeferredAction* action = actions_;

	896 action != NULL;

	897 action = action->next()) {

	898 if (action->Mentions(reg)) {

	899 switch (action->action_type()) {

	900 case ActionNode::SET_REGISTER: {

	901 Trace::DeferredSetRegister* psr =

	902 static_cast<Trace::DeferredSetRegister*>(action);

	903 if (!absolute) {

	904 value += psr->value();

	905 absolute = true;

	906 }

	907 // SET_REGISTER is currently only used for newly introduced loop

	908 // counters. They can have a significant previous value if they

	909 // occour in a loop. TODO(lrn): Propagate this information, so

	910 // we can set undo_action to IGNORE if we know there is no value to

	911 // restore.

	912 undo_action = RESTORE;

	913 ASSERT(store_position == -1);

	914 ASSERT(!clear);

	915 break;

	916 }

	917 case ActionNode::INCREMENT_REGISTER:

	918 if (!absolute) {

	919 value++;

	920 }

	921 ASSERT(store_position == -1);

	922 ASSERT(!clear);

	923 undo_action = RESTORE;

	924 break;

	925 case ActionNode::STORE_POSITION: {

	926 Trace::DeferredCapture* pc =

	927 static_cast<Trace::DeferredCapture*>(action);

	928 if (!clear && store_position == -1) {

	929 store_position = pc->cp_offset();

	930 }

	931

	932 // For captures we know that stores and clears alternate.

	933 // Other register, are never cleared, and if the occur

	934 // inside a loop, they might be assigned more than once.

	935 if (reg <= 1) {

	936 // Registers zero and one, aka "capture zero", is

	937 // always set correctly if we succeed. There is no

	938 // need to undo a setting on backtrack, because we

	939 // will set it again or fail.

	940 undo_action = IGNORE;

	941 } else {

	942 undo_action = pc->is_capture() ? CLEAR : RESTORE;

	943 }

	944 ASSERT(!absolute);

	945 ASSERT(value == 0);

	946 break;

	947 }

	948 case ActionNode::CLEAR_CAPTURES: {

	949 // Since we're scanning in reverse order, if we've already

	950 // set the position we have to ignore historically earlier

	951 // clearing operations.

	952 if (store_position == -1) {

	953 clear = true;

	954 }

	955 undo_action = RESTORE;

	956 ASSERT(!absolute);

	957 ASSERT(value == 0);

	958 break;

	959 }

	960 default:

	961 UNREACHABLE();

	962 break;

	963 }

	964 }

	965 }

	966 // Prepare for the undo-action (e.g., push if it's going to be popped).

	967 if (undo_action == RESTORE) {

	968 pushes++;

	969 RegExpMacroAssembler::StackCheckFlag stack_check =

	970 RegExpMacroAssembler::kNoStackLimitCheck;

	971 if (pushes == push_limit) {

	972 stack_check = RegExpMacroAssembler::kCheckStackLimit;

	973 pushes = 0;

	974 }

	975

	976 assembler->PushRegister(reg, stack_check);

	977 registers_to_pop->Set(reg, isolate);

	978 } else if (undo_action == CLEAR) {

	979 registers_to_clear->Set(reg, isolate);

	980 }

	981 // Perform the chronologically last action (or accumulated increment)

	982 // for the register.

	983 if (store_position != -1) {

	984 assembler->WriteCurrentPositionToRegister(reg, store_position);

	985 } else if (clear) {

	986 assembler->ClearRegisters(reg, reg);

	987 } else if (absolute) {

	988 assembler->SetRegister(reg, value);

	989 } else if (value != 0) {

	990 assembler->AdvanceRegister(reg, value);

	991 }

	992 }

	993 }

	994

	995

	996 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

	997 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	998

	999 ASSERT(!is_trivial());

	1000

	1001 if (actions_ == NULL && backtrack() == NULL) {

	1002 // Here we just have some deferred cp advances to fix and we are back to

	1003 // a normal situation. We may also have to forget some information gained

	1004 // through a quick check that was already performed.

	1005 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);

	1006 // Create a new trivial state and generate the node with that.

	1007 Trace new_state;

	1008 successor->Emit(compiler, &new_state);

	1009 return;

	1010 }

	1011

	1012 // Generate deferred actions here along with code to undo them again.

	1013 OutSet affected_registers;

	1014

	1015 if (backtrack() != NULL) {

	1016 // Here we have a concrete backtrack location. These are set up by choice

	1017 // nodes and so they indicate that we have a deferred save of the current

	1018 // position which we may need to emit here.

	1019 assembler->PushCurrentPosition();

	1020 }

	1021

	1022 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI);

	1023 OutSet registers_to_pop;

	1024 OutSet registers_to_clear;

	1025 PerformDeferredActions(assembler,

	1026 max_register,

	1027 affected_registers,

	1028 &registers_to_pop,

	1029 &registers_to_clear,

	1030 CI);

	1031 if (cp_offset_ != 0) {

	1032 assembler->AdvanceCurrentPosition(cp_offset_);

	1033 }

	1034

	1035 // Create a new trivial state and generate the node with that.

	1036 BlockLabel undo;

	1037 assembler->PushBacktrack(&undo);

	1038 Trace new_state;

	1039 successor->Emit(compiler, &new_state);

	1040

	1041 // On backtrack we need to restore state.

	1042 assembler->BindBlock(&undo);

	1043 RestoreAffectedRegisters(assembler,

	1044 max_register,

	1045 registers_to_pop,

	1046 registers_to_clear);

	1047 if (backtrack() == NULL) {

	1048 assembler->Backtrack();

	1049 } else {

	1050 assembler->PopCurrentPosition();

	1051 assembler->Jump(backtrack());

	1052 }

	1053 }

	1054

	1055

	1056 void Trace::InvalidateCurrentCharacter() {

	1057 characters_preloaded_ = 0;

	1058 }

	1059

	1060

	1061 void Trace::AdvanceCurrentPositionInTrace(intptr_t by,

	1062 RegExpCompiler* compiler) {

	1063 ASSERT(by > 0);

	1064 // We don't have an instruction for shifting the current character register

	1065 // down or for using a shifted value for anything so lets just forget that

	1066 // we preloaded any characters into it.

	1067 characters_preloaded_ = 0;

	1068 // Adjust the offsets of the quick check performed information. This

	1069 // information is used to find out what we already determined about the

	1070 // characters by means of mask and compare.

	1071 quick_check_performed_.Advance(by, compiler->ascii());

	1072 cp_offset_ += by;

	1073 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

	1074 compiler->SetRegExpTooBig();

	1075 cp_offset_ = 0;

	1076 }

	1077 bound_checked_up_to_ = Utils::Maximum(0, bound_checked_up_to_ - by);

	1078 }

	1079

	1080

	1081 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1082 if (!trace->is_trivial()) {

	1083 trace->Flush(compiler, this);

	1084 return;

	1085 }

	1086 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1087 if (!label()->IsBound()) {

	1088 assembler->BindBlock(label());

	1089 }

	1090 switch (action_) {

	1091 case ACCEPT:

	1092 assembler->Succeed();

	1093 return;

	1094 case BACKTRACK:

	1095 assembler->Jump(trace->backtrack());

	1096 return;

	1097 case NEGATIVE_SUBMATCH_SUCCESS:

	1098 // This case is handled in a different virtual method.

	1099 UNREACHABLE();

	1100 }

736 UNIMPLEMENTED();	1101 UNIMPLEMENTED();

737 }	1102 }

738 #undef DEFINE_EMIT	1103

739	1104

740	1105 bool QuickCheckDetails::Rationalize(bool asc) {

741 #define DEFINE_BMINFO(Type) \	1106 bool found_useful_op = false;

742 void Type##Node::FillInBMInfo(intptr_t initial_offset, \	1107 uint32_t char_mask;

743 intptr_t budget, \	1108 if (asc) {

744 BoyerMooreLookahead* bm, \	1109 char_mask = Symbols::kMaxOneCharCodeSymbol;

745 bool not_at_start) { \	1110 } else {

746 UNIMPLEMENTED(); \	1111 char_mask = Utf16::kMaxCodeUnit;

747 }	1112 }

748 FOR_EACH_NODE_TYPE(DEFINE_BMINFO)	1113 mask_ = 0;

749 DEFINE_BMINFO(LoopChoice)	1114 value_ = 0;

750 #undef DEFINE_BMINFO	1115 intptr_t char_shift = 0;

	1116 for (intptr_t i = 0; i < characters_; i++) {

	1117 Position* pos = &positions_[i];

	1118 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) {

	1119 found_useful_op = true;

	1120 }

	1121 mask_ \|= (pos->mask & char_mask) << char_shift;

	1122 value_ \|= (pos->value & char_mask) << char_shift;

	1123 char_shift += asc ? 8 : 16;

	1124 }

	1125 return found_useful_op;

	1126 }

	1127

	1128

	1129 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

	1130 Trace* trace,

	1131 bool preload_has_checked_bounds,

	1132 BlockLabel* on_possible_success,

	1133 QuickCheckDetails* details,

	1134 bool fall_through_on_failure) {

	1135 if (details->characters() == 0) return false;

	1136 GetQuickCheckDetails(

	1137 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);

	1138 if (details->cannot_match()) return false;

	1139 if (!details->Rationalize(compiler->ascii())) return false;

	1140 ASSERT(details->characters() == 1 \|\|

	1141 compiler->macro_assembler()->CanReadUnaligned());

	1142 uint32_t mask = details->mask();

	1143 uint32_t value = details->value();

	1144

	1145 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1146

	1147 if (trace->characters_preloaded() != details->characters()) {

	1148 assembler->LoadCurrentCharacter(trace->cp_offset(),

	1149 trace->backtrack(),

	1150 !preload_has_checked_bounds,

	1151 details->characters());

	1152 }

	1153

	1154

	1155 bool need_mask = true;

	1156

	1157 if (details->characters() == 1) {

	1158 // If number of characters preloaded is 1 then we used a byte or 16 bit

	1159 // load so the value is already masked down.

	1160 uint32_t char_mask;

	1161 if (compiler->ascii()) {

	1162 char_mask = Symbols::kMaxOneCharCodeSymbol;

	1163 } else {

	1164 char_mask = Utf16::kMaxCodeUnit;

	1165 }

	1166 if ((mask & char_mask) == char_mask) need_mask = false;

	1167 mask &= char_mask;

	1168 } else {

	1169 // For 2-character preloads in ASCII mode or 1-character preloads in

	1170 // TWO_BYTE mode we also use a 16 bit load with zero extend.

	1171 if (details->characters() == 2 && compiler->ascii()) {

	1172 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1173 } else if (details->characters() == 1 && !compiler->ascii()) {

	1174 if ((mask & 0xffff) == 0xffff) need_mask = false;

	1175 } else {

	1176 if (mask == 0xffffffff) need_mask = false;

	1177 }

	1178 }

	1179

	1180 if (fall_through_on_failure) {

	1181 if (need_mask) {

	1182 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

	1183 } else {

	1184 assembler->CheckCharacter(value, on_possible_success);

	1185 }

	1186 } else {

	1187 if (need_mask) {

	1188 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());

	1189 } else {

	1190 assembler->CheckNotCharacter(value, trace->backtrack());

	1191 }

	1192 }

	1193 return true;

	1194 }

	1195

	1196

	1197 // Emit the code to check for a ^ in multiline mode (1-character lookbehind

	1198 // that matches newline or the start of input).

	1199 static void EmitHat(RegExpCompiler* compiler,

	1200 RegExpNode* on_success,

	1201 Trace* trace) {

	1202 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1203 // We will be loading the previous character into the current character

	1204 // register.

	1205 Trace new_trace(*trace);

	1206 new_trace.InvalidateCurrentCharacter();

	1207

	1208 BlockLabel ok;

	1209 if (new_trace.cp_offset() == 0) {

	1210 // The start of input counts as a newline in this context, so skip to

	1211 // ok if we are at the start.

	1212 assembler->CheckAtStart(&ok);

	1213 }

	1214 // We already checked that we are not at the start of input so it must be

	1215 // OK to load the previous character.

	1216 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

	1217 new_trace.backtrack(),

	1218 false);

	1219 if (!assembler->CheckSpecialCharacterClass('n',

	1220 new_trace.backtrack())) {

	1221 // Newline means \n, \r, 0x2028 or 0x2029.

	1222 if (!compiler->ascii()) {

	1223 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

	1224 }

	1225 assembler->CheckCharacter('\n', &ok);

	1226 assembler->CheckNotCharacter('\r', new_trace.backtrack());

	1227 }

	1228 assembler->BindBlock(&ok);

	1229 on_success->Emit(compiler, &new_trace);

	1230 }

	1231

	1232

	1233 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1234 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1235 switch (assertion_type_) {

	1236 case AT_END: {

	1237 BlockLabel ok;

	1238 assembler->CheckPosition(trace->cp_offset(), &ok);

	1239 assembler->Jump(trace->backtrack());

	1240 assembler->BindBlock(&ok);

	1241 break;

	1242 }

	1243 case AT_START: {

	1244 if (trace->at_start() == Trace::FALSE_VALUE) {

	1245 assembler->Jump(trace->backtrack());

	1246 return;

	1247 }

	1248 if (trace->at_start() == Trace::UNKNOWN) {

	1249 assembler->CheckNotAtStart(trace->backtrack());

	1250 Trace at_start_trace = *trace;

	1251 at_start_trace.set_at_start(true);

	1252 on_success()->Emit(compiler, &at_start_trace);

	1253 return;

	1254 }

	1255 }

	1256 break;

	1257 case AFTER_NEWLINE:

	1258 EmitHat(compiler, on_success(), trace);

	1259 return;

	1260 case AT_BOUNDARY:

	1261 case AT_NON_BOUNDARY: {

	1262 EmitBoundaryCheck(compiler, trace);

	1263 return;

	1264 }

	1265 }

	1266 on_success()->Emit(compiler, trace);

	1267 }

	1268

	1269

	1270 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,

	1271 Trace* trace) {

	1272 // If we are generating a greedy loop then don't stop and don't reuse code.

	1273 if (trace->stop_node() != NULL) {

	1274 return CONTINUE;

	1275 }

	1276

	1277 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1278 if (trace->is_trivial()) {

	1279 if (label_.IsBound()) {

	1280 // We are being asked to generate a generic version, but that's already

	1281 // been done so just go to it.

	1282 macro_assembler->Jump(&label_);

	1283 return DONE;

	1284 }

	1285 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {

	1286 // To avoid too deep recursion we push the node to the work queue and just

	1287 // generate a goto here.

	1288 compiler->AddWork(this);

	1289 macro_assembler->Jump(&label_);

	1290 return DONE;

	1291 }

	1292 // Generate generic version of the node and bind the label for later use.

	1293 macro_assembler->BindBlock(&label_);

	1294 return CONTINUE;

	1295 }

	1296

	1297 // We are being asked to make a non-generic version. Keep track of how many

	1298 // non-generic versions we generate so as not to overdo it.

	1299 trace_count_++;

	1300 if (kRegexpOptimization &&

	1301 trace_count_ < kMaxCopiesCodeGenerated &&

	1302 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {

	1303 return CONTINUE;

	1304 }

	1305

	1306 // If we get here code has been generated for this node too many times or

	1307 // recursion is too deep. Time to switch to a generic version. The code for

	1308 // generic versions above can handle deep recursion properly.

	1309 trace->Flush(compiler, this);

	1310 return DONE;

	1311 }

	1312

	1313

	1314 // This generates the code to match a text node. A text node can contain

	1315 // straight character sequences (possibly to be matched in a case-independent

	1316 // way) and character classes. For efficiency we do not do this in a single

	1317 // pass from left to right. Instead we pass over the text node several times,

	1318 // emitting code for some character positions every time. See the comment on

	1319 // TextEmitPass for details.

	1320 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1321 LimitResult limit_result = LimitVersions(compiler, trace);

	1322 if (limit_result == DONE) return;

	1323 ASSERT(limit_result == CONTINUE);

	1324

	1325 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

	1326 compiler->SetRegExpTooBig();

	1327 return;

	1328 }

	1329

	1330 if (compiler->ascii()) {

	1331 intptr_t dummy = 0;

	1332 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

	1333 }

	1334

	1335 bool first_elt_done = false;

	1336 intptr_t bound_checked_to = trace->cp_offset() - 1;

	1337 bound_checked_to += trace->bound_checked_up_to();

	1338

	1339 // If a character is preloaded into the current character register then

	1340 // check that now.

	1341 if (trace->characters_preloaded() == 1) {

	1342 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1343 if (!SkipPass(pass, compiler->ignore_case())) {

	1344 TextEmitPass(compiler,

	1345 static_cast<TextEmitPassType>(pass),

	1346 true,

	1347 trace,

	1348 false,

	1349 &bound_checked_to);

	1350 }

	1351 }

	1352 first_elt_done = true;

	1353 }

	1354

	1355 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {

	1356 if (!SkipPass(pass, compiler->ignore_case())) {

	1357 TextEmitPass(compiler,

	1358 static_cast<TextEmitPassType>(pass),

	1359 false,

	1360 trace,

	1361 first_elt_done,

	1362 &bound_checked_to);

	1363 }

	1364 }

	1365

	1366 Trace successor_trace(*trace);

	1367 successor_trace.set_at_start(false);

	1368 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

	1369 RecursionCheck rc(compiler);

	1370 on_success()->Emit(compiler, &successor_trace);

	1371 }

	1372

	1373

	1374 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1375 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1376 if (trace->stop_node() == this) {

	1377 intptr_t text_length =

	1378 GreedyLoopTextLengthForAlternative(&(alternatives_->At(0)));

	1379 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);

	1380 // Update the counter-based backtracking info on the stack. This is an

	1381 // optimization for greedy loops (see below).

	1382 ASSERT(trace->cp_offset() == text_length);

	1383 macro_assembler->AdvanceCurrentPosition(text_length);

	1384 macro_assembler->Jump(trace->loop_label());

	1385 return;

	1386 }

	1387 ASSERT(trace->stop_node() == NULL);

	1388 if (!trace->is_trivial()) {

	1389 trace->Flush(compiler, this);

	1390 return;

	1391 }

	1392 ChoiceNode::Emit(compiler, trace);

	1393 }

	1394

	1395

	1396 // This class is used when generating the alternatives in a choice node. It

	1397 // records the way the alternative is being code generated.

	1398 class AlternativeGeneration {

	1399 public:

	1400 AlternativeGeneration()

	1401 : possible_success(),

	1402 expects_preload(false),

	1403 after(),

	1404 quick_check_details() { }

	1405 BlockLabel possible_success;

	1406 bool expects_preload;

	1407 BlockLabel after;

	1408 QuickCheckDetails quick_check_details;

	1409 };

	1410

	1411

	1412 // Creates a list of AlternativeGenerations. If the list has a reasonable

	1413 // size then it is on the stack, otherwise the excess is on the heap.

	1414 class AlternativeGenerationList {

	1415 public:

	1416 explicit AlternativeGenerationList(intptr_t count)

	1417 : alt_gens_(count) {

	1418 for (intptr_t i = 0; i < count && i < kAFew; i++) {

	1419 alt_gens_.Add(a_few_alt_gens_ + i);

	1420 }

	1421 for (intptr_t i = kAFew; i < count; i++) {

	1422 alt_gens_.Add(new AlternativeGeneration());

	1423 }

	1424 }

	1425 ~AlternativeGenerationList() {

	1426 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {

	1427 delete alt_gens_[i];

	1428 alt_gens_[i] = NULL;

	1429 }

	1430 }

	1431

	1432 AlternativeGeneration* at(intptr_t i) {

	1433 return alt_gens_[i];

	1434 }

	1435

	1436 private:

	1437 static const intptr_t kAFew = 10;

	1438 GrowableArray<AlternativeGeneration*> alt_gens_;

	1439 AlternativeGeneration a_few_alt_gens_[kAFew];

	1440 };

	1441

	1442

	1443 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1444 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1445 intptr_t choice_count = alternatives_->length();

	1446

	1447 #ifdef DEBUG

	1448 for (intptr_t i = 0; i < choice_count - 1; i++) {

	1449 GuardedAlternative alternative = alternatives_->At(i);

	1450 ZoneGrowableArray<Guard> guards = alternative.guards();

	1451 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1452 for (intptr_t j = 0; j < guard_count; j++) {

	1453 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));

	1454 }

	1455 }

	1456 #endif

	1457

	1458 LimitResult limit_result = LimitVersions(compiler, trace);

	1459 if (limit_result == DONE) return;

	1460 ASSERT(limit_result == CONTINUE);

	1461

	1462 intptr_t new_flush_budget = trace->flush_budget() / choice_count;

	1463 if (trace->flush_budget() == 0 && trace->actions() != NULL) {

	1464 trace->Flush(compiler, this);

	1465 return;

	1466 }

	1467

	1468 RecursionCheck rc(compiler);

	1469

	1470 Trace* current_trace = trace;

	1471

	1472 intptr_t text_length =

	1473 GreedyLoopTextLengthForAlternative(&(alternatives_->At(0)));

	1474 bool greedy_loop = false;

	1475 BlockLabel greedy_loop_label;

	1476 Trace counter_backtrack_trace;

	1477 counter_backtrack_trace.set_backtrack(&greedy_loop_label);

	1478 if (not_at_start()) counter_backtrack_trace.set_at_start(false);

	1479

	1480 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {

	1481 // Here we have special handling for greedy loops containing only text nodes

	1482 // and other simple nodes. These are handled by pushing the current

	1483 // position on the stack and then incrementing the current position each

	1484 // time around the switch. On backtrack we decrement the current position

	1485 // and check it against the pushed value. This avoids pushing backtrack

	1486 // information for each iteration of the loop, which could take up a lot of

	1487 // space.

	1488 greedy_loop = true;

	1489 ASSERT(trace->stop_node() == NULL);

	1490 macro_assembler->PushCurrentPosition();

	1491 current_trace = &counter_backtrack_trace;

	1492 BlockLabel greedy_match_failed;

	1493 Trace greedy_match_trace;

	1494 if (not_at_start()) greedy_match_trace.set_at_start(false);

	1495 greedy_match_trace.set_backtrack(&greedy_match_failed);

	1496 BlockLabel loop_label;

	1497 macro_assembler->BindBlock(&loop_label);

	1498 greedy_match_trace.set_stop_node(this);

	1499 greedy_match_trace.set_loop_label(&loop_label);

	1500 alternatives_->At(0).node()->Emit(compiler, &greedy_match_trace);

	1501 macro_assembler->BindBlock(&greedy_match_failed);

	1502 }

	1503

	1504 BlockLabel second_choice; // For use in greedy matches.

	1505 macro_assembler->BindBlock(&second_choice);

	1506

	1507 intptr_t first_normal_choice = greedy_loop ? 1 : 0;

	1508

	1509 bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;

	1510 const intptr_t kEatsAtLeastNotYetInitialized = -1;

	1511 intptr_t eats_at_least = kEatsAtLeastNotYetInitialized;

	1512

	1513 bool skip_was_emitted = false;

	1514

	1515 if (!greedy_loop && choice_count == 2) {

	1516 GuardedAlternative alt1 = alternatives_->At(1);

	1517 if (alt1.guards() == NULL \|\| alt1.guards()->length() == 0) {

	1518 RegExpNode* eats_anything_node = alt1.node();

	1519 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==

	1520 this) {

	1521 // At this point we know that we are at a non-greedy loop that will eat

	1522 // any character one at a time. Any non-anchored regexp has such a

	1523 // loop prepended to it in order to find where it starts. We look for

	1524 // a pattern of the form ...abc... where we can look 6 characters ahead

	1525 // and step forwards 3 if the character is not one of abc. Abc need

	1526 // not be atoms, they can be any reasonably limited character class or

	1527 // small alternation.

	1528 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.

	1529 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	1530 if (lookahead == NULL) {

	1531 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore,

	1532 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	1533 kRecursionBudget,

	1534 not_at_start));

	1535 if (eats_at_least >= 1) {

	1536 BoyerMooreLookahead* bm =

	1537 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	1538 GuardedAlternative alt0 = alternatives_->At(0);

	1539 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	1540 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);

	1541 }

	1542 } else {

	1543 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);

	1544 }

	1545 }

	1546 }

	1547 }

	1548

	1549 if (eats_at_least == kEatsAtLeastNotYetInitialized) {

	1550 // Save some time by looking at most one machine word ahead.

	1551 eats_at_least =

	1552 EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);

	1553 }

	1554 intptr_t preload_characters =

	1555 CalculatePreloadCharacters(compiler, eats_at_least);

	1556

	1557 bool preload_is_current = !skip_was_emitted &&

	1558 (current_trace->characters_preloaded() == preload_characters);

	1559 bool preload_has_checked_bounds = preload_is_current;

	1560

	1561 AlternativeGenerationList alt_gens(choice_count);

	1562

	1563 // For now we just call all choices one after the other. The idea ultimately

	1564 // is to use the Dispatch table to try only the relevant ones.

	1565 for (intptr_t i = first_normal_choice; i < choice_count; i++) {

	1566 GuardedAlternative alternative = alternatives_->At(i);

	1567 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1568 alt_gen->quick_check_details.set_characters(preload_characters);

	1569 ZoneGrowableArray<Guard> guards = alternative.guards();

	1570 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1571 Trace new_trace(*current_trace);

	1572 new_trace.set_characters_preloaded(preload_is_current ?

	1573 preload_characters :

	1574 0);

	1575 if (preload_has_checked_bounds) {

	1576 new_trace.set_bound_checked_up_to(preload_characters);

	1577 }

	1578 new_trace.quick_check_performed()->Clear();

	1579 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);

	1580 alt_gen->expects_preload = preload_is_current;

	1581 bool generate_full_check_inline = false;

	1582 if (kRegexpOptimization &&

	1583 try_to_emit_quick_check_for_alternative(i) &&

	1584 alternative.node()->EmitQuickCheck(compiler,

	1585 &new_trace,

	1586 preload_has_checked_bounds,

	1587 &alt_gen->possible_success,

	1588 &alt_gen->quick_check_details,

	1589 i < choice_count - 1)) {

	1590 // Quick check was generated for this choice.

	1591 preload_is_current = true;

	1592 preload_has_checked_bounds = true;

	1593 // On the last choice in the ChoiceNode we generated the quick

	1594 // check to fall through on possible success. So now we need to

	1595 // generate the full check inline.

	1596 if (i == choice_count - 1) {

	1597 macro_assembler->BindBlock(&alt_gen->possible_success);

	1598 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1599 new_trace.set_characters_preloaded(preload_characters);

	1600 new_trace.set_bound_checked_up_to(preload_characters);

	1601 generate_full_check_inline = true;

	1602 }

	1603 } else if (alt_gen->quick_check_details.cannot_match()) {

	1604 if (i == choice_count - 1 && !greedy_loop) {

	1605 macro_assembler->GoTo(trace->backtrack());

	1606 }

	1607 continue;

	1608 } else {

	1609 // No quick check was generated. Put the full code here.

	1610 // If this is not the first choice then there could be slow checks from

	1611 // previous cases that go here when they fail. There's no reason to

	1612 // insist that they preload characters since the slow check we are about

	1613 // to generate probably can't use it.

	1614 if (i != first_normal_choice) {

	1615 alt_gen->expects_preload = false;

	1616 new_trace.InvalidateCurrentCharacter();

	1617 }

	1618 if (i < choice_count - 1) {

	1619 new_trace.set_backtrack(&alt_gen->after);

	1620 }

	1621 generate_full_check_inline = true;

	1622 }

	1623 if (generate_full_check_inline) {

	1624 if (new_trace.actions() != NULL) {

	1625 new_trace.set_flush_budget(new_flush_budget);

	1626 }

	1627 for (intptr_t j = 0; j < guard_count; j++) {

	1628 GenerateGuard(macro_assembler, guards->At(j), &new_trace);

	1629 }

	1630 alternative.node()->Emit(compiler, &new_trace);

	1631 preload_is_current = false;

	1632 }

	1633 macro_assembler->BindBlock(&alt_gen->after);

	1634 }

	1635 if (greedy_loop) {

	1636 macro_assembler->BindBlock(&greedy_loop_label);

	1637 // If we have unwound to the bottom then backtrack.

	1638 macro_assembler->CheckGreedyLoop(trace->backtrack());

	1639 // Otherwise try the second priority at an earlier position.

	1640 macro_assembler->AdvanceCurrentPosition(-text_length);

	1641 macro_assembler->Jump(&second_choice);

	1642 }

	1643

	1644 // At this point we need to generate slow checks for the alternatives where

	1645 // the quick check was inlined. We can recognize these because the associated

	1646 // label was bound.

	1647 for (intptr_t i = first_normal_choice; i < choice_count - 1; i++) {

	1648 AlternativeGeneration* alt_gen = alt_gens.at(i);

	1649 Trace new_trace(*current_trace);

	1650 // If there are actions to be flushed we have to limit how many times

	1651 // they are flushed. Take the budget of the parent trace and distribute

	1652 // it fairly amongst the children.

	1653 if (new_trace.actions() != NULL) {

	1654 new_trace.set_flush_budget(new_flush_budget);

	1655 }

	1656 EmitOutOfLineContinuation(compiler,

	1657 &new_trace,

	1658 alternatives_->At(i),

	1659 alt_gen,

	1660 preload_characters,

	1661 alt_gens.at(i + 1)->expects_preload);

	1662 }

	1663 }

	1664

	1665

	1666 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

	1667 Trace* trace,

	1668 GuardedAlternative alternative,

	1669 AlternativeGeneration* alt_gen,

	1670 intptr_t preload_characters,

	1671 bool next_expects_preload) {

	1672 if (!alt_gen->possible_success.IsLinked()) return;

	1673

	1674 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1675 macro_assembler->BindBlock(&alt_gen->possible_success);

	1676 Trace out_of_line_trace(*trace);

	1677 out_of_line_trace.set_characters_preloaded(preload_characters);

	1678 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);

	1679 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);

	1680 ZoneGrowableArray<Guard> guards = alternative.guards();

	1681 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();

	1682 if (next_expects_preload) {

	1683 BlockLabel reload_current_char;

	1684 out_of_line_trace.set_backtrack(&reload_current_char);

	1685 for (intptr_t j = 0; j < guard_count; j++) {

	1686 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1687 }

	1688 alternative.node()->Emit(compiler, &out_of_line_trace);

	1689 macro_assembler->BindBlock(&reload_current_char);

	1690 // Reload the current character, since the next quick check expects that.

	1691 // We don't need to check bounds here because we only get into this

	1692 // code through a quick check which already did the checked load.

	1693 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),

	1694 NULL,

	1695 false,

	1696 preload_characters);

	1697 macro_assembler->Jump(&(alt_gen->after));

	1698 } else {

	1699 out_of_line_trace.set_backtrack(&(alt_gen->after));

	1700 for (intptr_t j = 0; j < guard_count; j++) {

	1701 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);

	1702 }

	1703 alternative.node()->Emit(compiler, &out_of_line_trace);

	1704 }

	1705 }

	1706

	1707

	1708 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1709 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1710 LimitResult limit_result = LimitVersions(compiler, trace);

	1711 if (limit_result == DONE) return;

	1712 ASSERT(limit_result == CONTINUE);

	1713

	1714 RecursionCheck rc(compiler);

	1715

	1716 switch (action_type_) {

	1717 case STORE_POSITION: {

	1718 Trace::DeferredCapture

	1719 new_capture(data_.u_position_register.reg,

	1720 data_.u_position_register.is_capture,

	1721 trace);

	1722 Trace new_trace = *trace;

	1723 new_trace.add_action(&new_capture);

	1724 on_success()->Emit(compiler, &new_trace);

	1725 break;

	1726 }

	1727 case INCREMENT_REGISTER: {

	1728 Trace::DeferredIncrementRegister

	1729 new_increment(data_.u_increment_register.reg);

	1730 Trace new_trace = *trace;

	1731 new_trace.add_action(&new_increment);

	1732 on_success()->Emit(compiler, &new_trace);

	1733 break;

	1734 }

	1735 case SET_REGISTER: {

	1736 Trace::DeferredSetRegister

	1737 new_set(data_.u_store_register.reg, data_.u_store_register.value);

	1738 Trace new_trace = *trace;

	1739 new_trace.add_action(&new_set);

	1740 on_success()->Emit(compiler, &new_trace);

	1741 break;

	1742 }

	1743 case CLEAR_CAPTURES: {

	1744 Trace::DeferredClearCaptures

	1745 new_capture(Interval(data_.u_clear_captures.range_from,

	1746 data_.u_clear_captures.range_to));

	1747 Trace new_trace = *trace;

	1748 new_trace.add_action(&new_capture);

	1749 on_success()->Emit(compiler, &new_trace);

	1750 break;

	1751 }

	1752 case BEGIN_SUBMATCH:

	1753 if (!trace->is_trivial()) {

	1754 trace->Flush(compiler, this);

	1755 } else {

	1756 assembler->WriteCurrentPositionToRegister(

	1757 data_.u_submatch.current_position_register, 0);

	1758 assembler->WriteStackPointerToRegister(

	1759 data_.u_submatch.stack_pointer_register);

	1760 on_success()->Emit(compiler, trace);

	1761 }

	1762 break;

	1763 case EMPTY_MATCH_CHECK: {

	1764 intptr_t start_pos_reg = data_.u_empty_match_check.start_register;

	1765 intptr_t stored_pos = 0;

	1766 intptr_t rep_reg = data_.u_empty_match_check.repetition_register;

	1767 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);

	1768 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);

	1769 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {

	1770 // If we know we haven't advanced and there is no minimum we

	1771 // can just backtrack immediately.

	1772 assembler->GoTo(trace->backtrack());

	1773 } else if (know_dist && stored_pos < trace->cp_offset()) {

	1774 // If we know we've advanced we can generate the continuation

	1775 // immediately.

	1776 on_success()->Emit(compiler, trace);

	1777 } else if (!trace->is_trivial()) {

	1778 trace->Flush(compiler, this);

	1779 } else {

	1780 BlockLabel skip_empty_check;

	1781 // If we have a minimum number of repetitions we check the current

	1782 // number first and skip the empty check if it's not enough.

	1783 if (has_minimum) {

	1784 intptr_t limit = data_.u_empty_match_check.repetition_limit;

	1785 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

	1786 }

	1787 // If the match is empty we bail out, otherwise we fall through

	1788 // to the on-success continuation.

	1789 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

	1790 trace->backtrack());

	1791 assembler->BindBlock(&skip_empty_check);

	1792 on_success()->Emit(compiler, trace);

	1793 }

	1794 break;

	1795 }

	1796 case POSITIVE_SUBMATCH_SUCCESS: {

	1797 if (!trace->is_trivial()) {

	1798 trace->Flush(compiler, this);

	1799 return;

	1800 }

	1801 assembler->ReadCurrentPositionFromRegister(

	1802 data_.u_submatch.current_position_register);

	1803 assembler->ReadStackPointerFromRegister(

	1804 data_.u_submatch.stack_pointer_register);

	1805 intptr_t clear_register_count = data_.u_submatch.clear_register_count;

	1806 if (clear_register_count == 0) {

	1807 on_success()->Emit(compiler, trace);

	1808 return;

	1809 }

	1810 intptr_t clear_registers_from = data_.u_submatch.clear_register_from;

	1811 BlockLabel clear_registers_backtrack;

	1812 Trace new_trace = *trace;

	1813 new_trace.set_backtrack(&clear_registers_backtrack);

	1814 on_success()->Emit(compiler, &new_trace);

	1815

	1816 assembler->BindBlock(&clear_registers_backtrack);

	1817 intptr_t clear_registers_to =

	1818 clear_registers_from + clear_register_count - 1;

	1819 assembler->ClearRegisters(clear_registers_from, clear_registers_to);

	1820

	1821 ASSERT(trace->backtrack() == NULL);

	1822 assembler->Backtrack();

	1823 return;

	1824 }

	1825 default:

	1826 UNREACHABLE();

	1827 }

	1828 }

	1829

	1830

	1831 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

	1832 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1833 if (!trace->is_trivial()) {

	1834 trace->Flush(compiler, this);

	1835 return;

	1836 }

	1837

	1838 LimitResult limit_result = LimitVersions(compiler, trace);

	1839 if (limit_result == DONE) return;

	1840 ASSERT(limit_result == CONTINUE);

	1841

	1842 RecursionCheck rc(compiler);

	1843

	1844 ASSERT(start_reg_ + 1 == end_reg_);

	1845 if (compiler->ignore_case()) {

	1846 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

	1847 trace->backtrack());

	1848 } else {

	1849 assembler->CheckNotBackReference(start_reg_, trace->backtrack());

	1850 }

	1851 on_success()->Emit(compiler, trace);

	1852 }

	1853

	1854

	1855 void ActionNode::FillInBMInfo(intptr_t offset,

	1856 intptr_t budget,

	1857 BoyerMooreLookahead* bm,

	1858 bool not_at_start) {

	1859 if (action_type_ == BEGIN_SUBMATCH) {

	1860 bm->SetRest(offset);

	1861 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {

	1862 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1863 }

	1864 SaveBMInfo(bm, not_at_start, offset);

	1865 }

	1866

	1867

	1868 void AssertionNode::FillInBMInfo(intptr_t offset,

	1869 intptr_t budget,

	1870 BoyerMooreLookahead* bm,

	1871 bool not_at_start) {

	1872 // Match the behaviour of EatsAtLeast on this node.

	1873 if (assertion_type() == AT_START && not_at_start) return;

	1874 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1875 SaveBMInfo(bm, not_at_start, offset);

	1876 }

	1877

	1878

	1879 void BackReferenceNode::FillInBMInfo(intptr_t offset,

	1880 intptr_t budget,

	1881 BoyerMooreLookahead* bm,

	1882 bool not_at_start) {

	1883 // Working out the set of characters that a backreference can match is too

	1884 // hard, so we just say that any character can match.

	1885 bm->SetRest(offset);

	1886 SaveBMInfo(bm, not_at_start, offset);

	1887 }

	1888

	1889

	1890 // Returns the number of characters in the equivalence class, omitting those

	1891 // that cannot occur in the source string because it is ASCII.

	1892 static intptr_t GetCaseIndependentLetters(uint16_t character,

	1893 bool ascii_subject,

	1894 unibrow::uchar* letters) {

	1895 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	1896 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters);

	1897 // Unibrow returns 0 or 1 for characters where case independence is

	1898 // trivial.

	1899 if (length == 0) {

	1900 letters[0] = character;

	1901 length = 1;

	1902 }

	1903 if (!ascii_subject \|\| character <= Symbols::kMaxOneCharCodeSymbol) {

	1904 return length;

	1905 }

	1906 // The standard requires that non-ASCII characters cannot have ASCII

	1907 // character codes in their equivalence class.

	1908 return 0;

	1909 }

	1910

	1911

	1912 void ChoiceNode::FillInBMInfo(intptr_t offset,

	1913 intptr_t budget,

	1914 BoyerMooreLookahead* bm,

	1915 bool not_at_start) {

	1916 ZoneGrowableArray<GuardedAlternative>* alts = alternatives();

	1917 budget = (budget - 1) / alts->length();

	1918 for (intptr_t i = 0; i < alts->length(); i++) {

	1919 GuardedAlternative& alt = alts->At(i);

	1920 if (alt.guards() != NULL && alt.guards()->length() != 0) {

	1921 bm->SetRest(offset); // Give up trying to fill in info.

	1922 SaveBMInfo(bm, not_at_start, offset);

	1923 return;

	1924 }

	1925 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);

	1926 }

	1927 SaveBMInfo(bm, not_at_start, offset);

	1928 }

	1929

	1930

	1931 void EndNode::FillInBMInfo(intptr_t offset,

	1932 intptr_t budget,

	1933 BoyerMooreLookahead* bm,

	1934 bool not_at_start) {

	1935 // Returning 0 from EatsAtLeast should ensure we never get here.

	1936 UNREACHABLE();

	1937 }

	1938

	1939

	1940 void LoopChoiceNode::FillInBMInfo(intptr_t offset,

	1941 intptr_t budget,

	1942 BoyerMooreLookahead* bm,

	1943 bool not_at_start) {

	1944 if (body_can_be_zero_length_ \|\| budget <= 0) {

	1945 bm->SetRest(offset);

	1946 SaveBMInfo(bm, not_at_start, offset);

	1947 return;

	1948 }

	1949 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);

	1950 SaveBMInfo(bm, not_at_start, offset);

	1951 }

	1952

	1953

	1954 void TextNode::FillInBMInfo(intptr_t initial_offset,

	1955 intptr_t budget,

	1956 BoyerMooreLookahead* bm,

	1957 bool not_at_start) {

	1958 if (initial_offset >= bm->length()) return;

	1959 intptr_t offset = initial_offset;

	1960 intptr_t max_char = bm->max_char();

	1961 for (intptr_t i = 0; i < elements()->length(); i++) {

	1962 if (offset >= bm->length()) {

	1963 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	1964 return;

	1965 }

	1966 TextElement text = elements()->At(i);

	1967 if (text.text_type() == TextElement::ATOM) {

	1968 RegExpAtom* atom = text.atom();

	1969 for (intptr_t j = 0; j < atom->length(); j++, offset++) {

	1970 if (offset >= bm->length()) {

	1971 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	1972 return;

	1973 }

	1974 uint16_t character = atom->data()->At(j);

	1975 if (bm->compiler()->ignore_case()) {

	1976 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	1977 intptr_t length = GetCaseIndependentLetters(

	1978 character,

	1979 bm->max_char() == Symbols::kMaxOneCharCodeSymbol,

	1980 chars);

	1981 for (intptr_t j = 0; j < length; j++) {

	1982 bm->Set(offset, chars[j]);

	1983 }

	1984 } else {

	1985 if (character <= max_char) bm->Set(offset, character);

	1986 }

	1987 }

	1988 } else {

	1989 ASSERT(text.text_type() == TextElement::CHAR_CLASS);

	1990 RegExpCharacterClass* char_class = text.char_class();

	1991 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();

	1992 if (char_class->is_negated()) {

	1993 bm->SetAll(offset);

	1994 } else {

	1995 for (intptr_t k = 0; k < ranges->length(); k++) {

	1996 CharacterRange& range = ranges->At(k);

	1997 if (range.from() > max_char) continue;

	1998 intptr_t to = Utils::Minimum(max_char, static_cast<int>(range.to()));

	1999 bm->SetInterval(offset, Interval(range.from(), to));

	2000 }

	2001 }

	2002 offset++;

	2003 }

	2004 }

	2005 if (offset >= bm->length()) {

	2006 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2007 return;

	2008 }

	2009 on_success()->FillInBMInfo(offset,

	2010 budget - 1,

	2011 bm,

	2012 true); // Not at start after a text node.

	2013 if (initial_offset == 0) set_bm_info(not_at_start, bm);

	2014 }

	2015

	2016

	2017 // Check for [0-9A-Z_a-z].

	2018 static void EmitWordCheck(RegExpMacroAssembler* assembler,

	2019 BlockLabel* word,

	2020 BlockLabel* non_word,

	2021 bool fall_through_on_word) {

	2022 if (assembler->CheckSpecialCharacterClass(

	2023 fall_through_on_word ? 'w' : 'W',

	2024 fall_through_on_word ? non_word : word)) {

	2025 // Optimized implementation available.

	2026 return;

	2027 }

	2028 assembler->CheckCharacterGT('z', non_word);

	2029 assembler->CheckCharacterLT('0', non_word);

	2030 assembler->CheckCharacterGT('a' - 1, word);

	2031 assembler->CheckCharacterLT('9' + 1, word);

	2032 assembler->CheckCharacterLT('A', non_word);

	2033 assembler->CheckCharacterLT('Z' + 1, word);

	2034 if (fall_through_on_word) {

	2035 assembler->CheckNotCharacter('_', non_word);

	2036 } else {

	2037 assembler->CheckCharacter('_', word);

	2038 }

	2039 }

751	2040

752	2041

753 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).	2042 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).

754 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {	2043 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {

755 UNIMPLEMENTED();	2044 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2045 Trace::TriBool next_is_word_character = Trace::UNKNOWN;

	2046 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);

	2047 BoyerMooreLookahead* lookahead = bm_info(not_at_start);

	2048 if (lookahead == NULL) {

	2049 intptr_t eats_at_least =

	2050 Utils::Minimum(kMaxLookaheadForBoyerMoore,

	2051 EatsAtLeast(kMaxLookaheadForBoyerMoore,

	2052 kRecursionBudget,

	2053 not_at_start));

	2054 if (eats_at_least >= 1) {

	2055 BoyerMooreLookahead* bm =

	2056 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);

	2057 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);

	2058 if (bm->at(0)->is_non_word())

	2059 next_is_word_character = Trace::FALSE_VALUE;

	2060 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;

	2061 }

	2062 } else {

	2063 if (lookahead->at(0)->is_non_word())

	2064 next_is_word_character = Trace::FALSE_VALUE;

	2065 if (lookahead->at(0)->is_word())

	2066 next_is_word_character = Trace::TRUE_VALUE;

	2067 }

	2068 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);

	2069 if (next_is_word_character == Trace::UNKNOWN) {

	2070 BlockLabel before_non_word;

	2071 BlockLabel before_word;

	2072 if (trace->characters_preloaded() != 1) {

	2073 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

	2074 }

	2075 // Fall through on non-word.

	2076 EmitWordCheck(assembler, &before_word, &before_non_word, false);

	2077 // Next character is not a word character.

	2078 assembler->BindBlock(&before_non_word);

	2079 BlockLabel ok;

	2080 // Backtrack on \B (non-boundary check) if previous is a word,

	2081 // since we know next is not a word and this would be a boundary.

	2082 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2083

	2084 if (!assembler->IsClosed()) {

	2085 assembler->Jump(&ok);

	2086 }

	2087

	2088 assembler->BindBlock(&before_word);

	2089 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2090 assembler->BindBlock(&ok);

	2091 } else if (next_is_word_character == Trace::TRUE_VALUE) {

	2092 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);

	2093 } else {

	2094 ASSERT(next_is_word_character == Trace::FALSE_VALUE);

	2095 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);

	2096 }

756 }	2097 }

757	2098

758	2099

759 void AssertionNode::BacktrackIfPrevious(	2100 void AssertionNode::BacktrackIfPrevious(

760 RegExpCompiler* compiler,	2101 RegExpCompiler* compiler,

761 Trace* trace,	2102 Trace* trace,

762 AssertionNode::IfPrevious backtrack_if_previous) {	2103 AssertionNode::IfPrevious backtrack_if_previous) {

763 UNIMPLEMENTED();	2104 RegExpMacroAssembler* assembler = compiler->macro_assembler();

764 }	2105 Trace new_trace(*trace);

765	2106 new_trace.InvalidateCurrentCharacter();

766	2107

	2108 BlockLabel fall_through, dummy;

	2109

	2110 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ?

	2111 new_trace.backtrack() :

	2112 &fall_through;

	2113 BlockLabel* word = backtrack_if_previous == kIsNonWord ?

	2114 &fall_through :

	2115 new_trace.backtrack();

	2116

	2117 if (new_trace.cp_offset() == 0) {

	2118 // The start of input counts as a non-word character, so the question is

	2119 // decided if we are at the start.

	2120 assembler->CheckAtStart(non_word);

	2121 }

	2122 // We already checked that we are not at the start of input so it must be

	2123 // OK to load the previous character.

	2124 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);

	2125 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);

	2126

	2127 assembler->BindBlock(&fall_through);

	2128 on_success()->Emit(compiler, &new_trace);

	2129 }

	2130

	2131

	2132 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {

	2133 if (quick_check == NULL) return false;

	2134 if (offset >= quick_check->characters()) return false;

	2135 return quick_check->positions(offset)->determines_perfectly;

	2136 }

	2137

	2138

	2139 static void UpdateBoundsCheck(intptr_t index, int* checked_up_to) {

	2140 if (index > *checked_up_to) {

	2141 *checked_up_to = index;

	2142 }

	2143 }

	2144

	2145

	2146 typedef bool EmitCharacterFunction(Isolate* isolate,

	2147 RegExpCompiler* compiler,

	2148 uint16_t c,

	2149 BlockLabel* on_failure,

	2150 intptr_t cp_offset,

	2151 bool check,

	2152 bool preloaded);

	2153

	2154

	2155 static inline bool EmitSimpleCharacter(Isolate* isolate,

	2156 RegExpCompiler* compiler,

	2157 uint16_t c,

	2158 BlockLabel* on_failure,

	2159 intptr_t cp_offset,

	2160 bool check,

	2161 bool preloaded) {

	2162 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2163 bool bound_checked = false;

	2164 if (!preloaded) {

	2165 assembler->LoadCurrentCharacter(

	2166 cp_offset,

	2167 on_failure,

	2168 check);

	2169 bound_checked = true;

	2170 }

	2171 assembler->CheckNotCharacter(c, on_failure);

	2172 return bound_checked;

	2173 }

	2174

	2175

	2176 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

	2177 bool ascii,

	2178 uint16_t c1,

	2179 uint16_t c2,

	2180 BlockLabel* on_failure) {

	2181 uint16_t char_mask;

	2182 if (ascii) {

	2183 char_mask = Symbols::kMaxOneCharCodeSymbol;

	2184 } else {

	2185 char_mask = Utf16::kMaxCodeUnit;

	2186 }

	2187 uint16_t exor = c1 ^ c2;

	2188 // Check whether exor has only one bit set.

	2189 if (((exor - 1) & exor) == 0) {

	2190 // If c1 and c2 differ only by one bit.

	2191 // Ecma262UnCanonicalize always gives the highest number last.

	2192 ASSERT(c2 > c1);

	2193 uint16_t mask = char_mask ^ exor;

	2194 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);

	2195 return true;

	2196 }

	2197 ASSERT(c2 > c1);

	2198 uint16_t diff = c2 - c1;

	2199 if (((diff - 1) & diff) == 0 && c1 >= diff) {

	2200 // If the characters differ by 2^n but don't differ by one bit then

	2201 // subtract the difference from the found character, then do the or

	2202 // trick. We avoid the theoretical case where negative numbers are

	2203 // involved in order to simplify code generation.

	2204 uint16_t mask = char_mask ^ diff;

	2205 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

	2206 diff,

	2207 mask,

	2208 on_failure);

	2209 return true;

	2210 }

	2211 return false;

	2212 }

	2213

	2214 // Only emits letters (things that have case). Only used for case independent

	2215 // matches.

	2216 static inline bool EmitAtomLetter(Isolate* isolate,

	2217 RegExpCompiler* compiler,

	2218 uint16_t c,

	2219 BlockLabel* on_failure,

	2220 intptr_t cp_offset,

	2221 bool check,

	2222 bool preloaded) {

	2223 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2224 bool ascii = compiler->ascii();

	2225 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2226 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2227 if (length <= 1) return false;

	2228 // We may not need to check against the end of the input string

	2229 // if this character lies before a character that matched.

	2230 if (!preloaded) {

	2231 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2232 }

	2233 BlockLabel ok;

	2234 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

	2235 switch (length) {

	2236 case 2: {

	2237 if (ShortCutEmitCharacterPair(macro_assembler,

	2238 ascii,

	2239 chars[0],

	2240 chars[1],

	2241 on_failure)) {

	2242 } else {

	2243 macro_assembler->CheckCharacter(chars[0], &ok);

	2244 macro_assembler->CheckNotCharacter(chars[1], on_failure);

	2245 macro_assembler->BindBlock(&ok);

	2246 }

	2247 break;

	2248 }

	2249 case 4:

	2250 macro_assembler->CheckCharacter(chars[3], &ok);

	2251 // Fall through!

	2252 case 3:

	2253 macro_assembler->CheckCharacter(chars[0], &ok);

	2254 macro_assembler->CheckCharacter(chars[1], &ok);

	2255 macro_assembler->CheckNotCharacter(chars[2], on_failure);

	2256 macro_assembler->BindBlock(&ok);

	2257 break;

	2258 default:

	2259 UNREACHABLE();

	2260 break;

	2261 }

	2262 return true;

	2263 }

	2264

	2265

	2266 // Only emits non-letters (things that don't have case). Only used for case

	2267 // independent matches.

	2268 static inline bool EmitAtomNonLetter(Isolate* isolate,

	2269 RegExpCompiler* compiler,

	2270 uint16_t c,

	2271 BlockLabel* on_failure,

	2272 intptr_t cp_offset,

	2273 bool check,

	2274 bool preloaded) {

	2275 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	2276 bool ascii = compiler->ascii();

	2277 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	2278 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);

	2279 if (length < 1) {

	2280 // This can't match. Must be an ASCII subject and a non-ASCII character.

	2281 // We do not need to do anything since the ASCII pass already handled this.

	2282 return false; // Bounds not checked.

	2283 }

	2284 bool checked = false;

	2285 // We handle the length > 1 case in a later pass.

	2286 if (length == 1) {

	2287 if (ascii && c > Symbols::kMaxOneCharCodeSymbol) {

	2288 // Can't match - see above.

	2289 return false; // Bounds not checked.

	2290 }

	2291 if (!preloaded) {

	2292 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

	2293 checked = check;

	2294 }

	2295 macro_assembler->CheckNotCharacter(c, on_failure);

	2296 }

	2297 return checked;

	2298 }

	2299

	2300

	2301 static void EmitBoundaryTest(RegExpMacroAssembler* masm,

	2302 intptr_t border,

	2303 BlockLabel* fall_through,

	2304 BlockLabel* above_or_equal,

	2305 BlockLabel* below) {

	2306 if (below != fall_through) {

	2307 masm->CheckCharacterLT(border, below);

	2308 if (above_or_equal != fall_through) masm->Jump(above_or_equal);

	2309 } else {

	2310 masm->CheckCharacterGT(border - 1, above_or_equal);

	2311 }

	2312 }

	2313

	2314

	2315 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,

	2316 intptr_t first,

	2317 intptr_t last,

	2318 BlockLabel* fall_through,

	2319 BlockLabel* in_range,

	2320 BlockLabel* out_of_range) {

	2321 if (in_range == fall_through) {

	2322 if (first == last) {

	2323 masm->CheckNotCharacter(first, out_of_range);

	2324 } else {

	2325 masm->CheckCharacterNotInRange(first, last, out_of_range);

	2326 }

	2327 } else {

	2328 if (first == last) {

	2329 masm->CheckCharacter(first, in_range);

	2330 } else {

	2331 masm->CheckCharacterInRange(first, last, in_range);

	2332 }

	2333 if (out_of_range != fall_through) masm->Jump(out_of_range);

	2334 }

	2335 }

	2336

	2337

	2338 static void CutOutRange(RegExpMacroAssembler* masm,

	2339 ZoneGrowableArray<int>* ranges,

	2340 intptr_t start_index,

	2341 intptr_t end_index,

	2342 intptr_t cut_index,

	2343 BlockLabel* even_label,

	2344 BlockLabel* odd_label) {

	2345 bool odd = (((cut_index - start_index) & 1) == 1);

	2346 BlockLabel* in_range_label = odd ? odd_label : even_label;

	2347 BlockLabel dummy;

	2348 EmitDoubleBoundaryTest(masm,

	2349 ranges->At(cut_index),

	2350 ranges->At(cut_index + 1) - 1,

	2351 &dummy,

	2352 in_range_label,

	2353 &dummy);

	2354 ASSERT(!dummy.IsLinked());

	2355 // Cut out the single range by rewriting the array. This creates a new

	2356 // range that is a merger of the two ranges on either side of the one we

	2357 // are cutting out. The oddity of the labels is preserved.

	2358 for (intptr_t j = cut_index; j > start_index; j--) {

	2359 ranges->At(j) = ranges->At(j - 1);

	2360 }

	2361 for (intptr_t j = cut_index + 1; j < end_index; j++) {

	2362 ranges->At(j) = ranges->At(j + 1);

	2363 }

	2364 }

	2365

	2366

	2367 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.

	2368 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.

	2369 static void EmitUseLookupTable(

	2370 RegExpMacroAssembler* masm,

	2371 ZoneGrowableArray<int>* ranges,

	2372 intptr_t start_index,

	2373 intptr_t end_index,

	2374 intptr_t min_char,

	2375 BlockLabel* fall_through,

	2376 BlockLabel* even_label,

	2377 BlockLabel* odd_label) {

	2378 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2379 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2380

	2381 intptr_t base = (min_char & ~kMask);

	2382

	2383 // Assert that everything is on one kTableSize page.

	2384 for (intptr_t i = start_index; i <= end_index; i++) {

	2385 ASSERT((ranges->At(i) & ~kMask) == base);

	2386 }

	2387 ASSERT(start_index == 0 \|\| (ranges->At(start_index - 1) & ~kMask) <= base);

	2388

	2389 char templ[kSize];

	2390 BlockLabel* on_bit_set;

	2391 BlockLabel* on_bit_clear;

	2392 intptr_t bit;

	2393 if (even_label == fall_through) {

	2394 on_bit_set = odd_label;

	2395 on_bit_clear = even_label;

	2396 bit = 1;

	2397 } else {

	2398 on_bit_set = even_label;

	2399 on_bit_clear = odd_label;

	2400 bit = 0;

	2401 }

	2402 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize;

	2403 i++) {

	2404 templ[i] = bit;

	2405 }

	2406 intptr_t j = 0;

	2407 bit ^= 1;

	2408 for (intptr_t i = start_index; i < end_index; i++) {

	2409 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) {

	2410 templ[j] = bit;

	2411 }

	2412 bit ^= 1;

	2413 }

	2414 for (intptr_t i = j; i < kSize; i++) {

	2415 templ[i] = bit;

	2416 }

	2417 // TODO(erikcorry): Cache these.

	2418 TypedData& ba = TypedData::ZoneHandle(

	2419 masm->isolate(),

	2420 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	2421 for (intptr_t i = 0; i < kSize; i++) {

	2422 ba.SetUint8(i, templ[i]);

	2423 }

	2424 masm->CheckBitInTable(ba, on_bit_set);

	2425 if (on_bit_clear != fall_through) masm->Jump(on_bit_clear);

	2426 }

	2427

	2428

	2429 // Unicode case. Split the search space into kSize spaces that are handled

	2430 // with recursion.

	2431 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,

	2432 intptr_t start_index,

	2433 intptr_t end_index,

	2434 int* new_start_index,

	2435 int* new_end_index,

	2436 int* border) {

	2437 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	2438 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;

	2439

	2440 intptr_t first = ranges->At(start_index);

	2441 intptr_t last = ranges->At(end_index) - 1;

	2442

	2443 *new_start_index = start_index;

	2444 *border = (ranges->At(start_index) & ~kMask) + kSize;

	2445 while (*new_start_index < end_index) {

	2446 if (ranges->At(new_start_index) > border) break;

	2447 (*new_start_index)++;

	2448 }

	2449 // new_start_index is the index of the first edge that is beyond the

	2450 // current kSize space.

	2451

	2452 // For very large search spaces we do a binary chop search of the non-ASCII

	2453 // space instead of just going to the end of the current kSize space. The

	2454 // heuristics are complicated a little by the fact that any 128-character

	2455 // encoding space can be quickly tested with a table lookup, so we don't

	2456 // wish to do binary chop search at a smaller granularity than that. A

	2457 // 128-character space can take up a lot of space in the ranges array if,

	2458 // for example, we only want to match every second character (eg. the lower

	2459 // case characters on some Unicode pages).

	2460 intptr_t binary_chop_index = (end_index + start_index) / 2;

	2461 // The first test ensures that we get to the code that handles the ASCII

	2462 // range with a single not-taken branch, speeding up this important

	2463 // character range (even non-ASCII charset-based text has spaces and

	2464 // punctuation).

	2465 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // ASCII case.

	2466 end_index - start_index > (new_start_index - start_index) 2 &&

	2467 last - first > kSize * 2 &&

	2468 binary_chop_index > *new_start_index &&

	2469 ranges->At(binary_chop_index) >= first + 2 * kSize) {

	2470 intptr_t scan_forward_for_section_border = binary_chop_index;;

	2471 intptr_t new_border = (ranges->At(binary_chop_index) \| kMask) + 1;

	2472

	2473 while (scan_forward_for_section_border < end_index) {

	2474 if (ranges->At(scan_forward_for_section_border) > new_border) {

	2475 *new_start_index = scan_forward_for_section_border;

	2476 *border = new_border;

	2477 break;

	2478 }

	2479 scan_forward_for_section_border++;

	2480 }

	2481 }

	2482

	2483 ASSERT(*new_start_index > start_index);

	2484 new_end_index = new_start_index - 1;

	2485 if (ranges->At(new_end_index) == border) {

	2486 (*new_end_index)--;

	2487 }

	2488 if (*border >= ranges->At(end_index)) {

	2489 *border = ranges->At(end_index);

	2490 *new_start_index = end_index; // Won't be used.

	2491 *new_end_index = end_index - 1;

	2492 }

	2493 }

	2494

	2495

	2496 // Gets a series of segment boundaries representing a character class. If the

	2497 // character is in the range between an even and an odd boundary (counting from

	2498 // start_index) then go to even_label, otherwise go to odd_label. We already

	2499 // know that the character is in the range of min_char to max_char inclusive.

	2500 // Either label can be NULL indicating backtracking. Either label can also be

	2501 // equal to the fall_through label.

	2502 static void GenerateBranches(RegExpMacroAssembler* masm,

	2503 ZoneGrowableArray<int>* ranges,

	2504 intptr_t start_index,

	2505 intptr_t end_index,

	2506 uint16_t min_char,

	2507 uint16_t max_char,

	2508 BlockLabel* fall_through,

	2509 BlockLabel* even_label,

	2510 BlockLabel* odd_label) {

	2511 intptr_t first = ranges->At(start_index);

	2512 intptr_t last = ranges->At(end_index) - 1;

	2513

	2514 ASSERT(min_char < first);

	2515

	2516 // Just need to test if the character is before or on-or-after

	2517 // a particular character.

	2518 if (start_index == end_index) {

	2519 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);

	2520 return;

	2521 }

	2522

	2523 // Another almost trivial case: There is one interval in the middle that is

	2524 // different from the end intervals.

	2525 if (start_index + 1 == end_index) {

	2526 EmitDoubleBoundaryTest(

	2527 masm, first, last, fall_through, even_label, odd_label);

	2528 return;

	2529 }

	2530

	2531 // It's not worth using table lookup if there are very few intervals in the

	2532 // character class.

	2533 if (end_index - start_index <= 6) {

	2534 // It is faster to test for individual characters, so we look for those

	2535 // first, then try arbitrary ranges in the second round.

	2536 static intptr_t kNoCutIndex = -1;

	2537 intptr_t cut = kNoCutIndex;

	2538 for (intptr_t i = start_index; i < end_index; i++) {

	2539 if (ranges->At(i) == ranges->At(i + 1) - 1) {

	2540 cut = i;

	2541 break;

	2542 }

	2543 }

	2544 if (cut == kNoCutIndex) cut = start_index;

	2545 CutOutRange(

	2546 masm, ranges, start_index, end_index, cut, even_label, odd_label);

	2547 ASSERT(end_index - start_index >= 2);

	2548 GenerateBranches(masm,

	2549 ranges,

	2550 start_index + 1,

	2551 end_index - 1,

	2552 min_char,

	2553 max_char,

	2554 fall_through,

	2555 even_label,

	2556 odd_label);

	2557 return;

	2558 }

	2559

	2560 // If there are a lot of intervals in the regexp, then we will use tables to

	2561 // determine whether the character is inside or outside the character class.

	2562 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;

	2563

	2564 if ((max_char >> kBits) == (min_char >> kBits)) {

	2565 EmitUseLookupTable(masm,

	2566 ranges,

	2567 start_index,

	2568 end_index,

	2569 min_char,

	2570 fall_through,

	2571 even_label,

	2572 odd_label);

	2573 return;

	2574 }

	2575

	2576 if ((min_char >> kBits) != (first >> kBits)) {

	2577 masm->CheckCharacterLT(first, odd_label);

	2578 GenerateBranches(masm,

	2579 ranges,

	2580 start_index + 1,

	2581 end_index,

	2582 first,

	2583 max_char,

	2584 fall_through,

	2585 odd_label,

	2586 even_label);

	2587 return;

	2588 }

	2589

	2590 intptr_t new_start_index = 0;

	2591 intptr_t new_end_index = 0;

	2592 intptr_t border = 0;

	2593

	2594 SplitSearchSpace(ranges,

	2595 start_index,

	2596 end_index,

	2597 &new_start_index,

	2598 &new_end_index,

	2599 &border);

	2600

	2601 BlockLabel handle_rest;

	2602 BlockLabel* above = &handle_rest;

	2603 if (border == last + 1) {

	2604 // We didn't find any section that started after the limit, so everything

	2605 // above the border is one of the terminal labels.

	2606 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;

	2607 ASSERT(new_end_index == end_index - 1);

	2608 }

	2609

	2610 ASSERT(start_index <= new_end_index);

	2611 ASSERT(new_start_index <= end_index);

	2612 ASSERT(start_index < new_start_index);

	2613 ASSERT(new_end_index < end_index);

	2614 ASSERT(new_end_index + 1 == new_start_index \|\|

	2615 (new_end_index + 2 == new_start_index &&

	2616 border == ranges->At(new_end_index + 1)));

	2617 ASSERT(min_char < border - 1);

	2618 ASSERT(border < max_char);

	2619 ASSERT(ranges->At(new_end_index) < border);

	2620 ASSERT(border < ranges->At(new_start_index) \|\|

	2621 (border == ranges->At(new_start_index) &&

	2622 new_start_index == end_index &&

	2623 new_end_index == end_index - 1 &&

	2624 border == last + 1));

	2625 ASSERT(new_start_index == 0 \|\| border >= ranges->At(new_start_index - 1));

	2626

	2627 masm->CheckCharacterGT(border - 1, above);

	2628 BlockLabel dummy;

	2629 GenerateBranches(masm,

	2630 ranges,

	2631 start_index,

	2632 new_end_index,

	2633 min_char,

	2634 border - 1,

	2635 &dummy,

	2636 even_label,

	2637 odd_label);

	2638

	2639 if (handle_rest.IsLinked()) {

	2640 masm->BindBlock(&handle_rest);

	2641 bool flip = (new_start_index & 1) != (start_index & 1);

	2642 GenerateBranches(masm,

	2643 ranges,

	2644 new_start_index,

	2645 end_index,

	2646 border,

	2647 max_char,

	2648 &dummy,

	2649 flip ? odd_label : even_label,

	2650 flip ? even_label : odd_label);

	2651 }

	2652 }

	2653

	2654

	2655 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

	2656 RegExpCharacterClass* cc,

	2657 bool ascii,

	2658 BlockLabel* on_failure,

	2659 intptr_t cp_offset,

	2660 bool check_offset,

	2661 bool preloaded,

	2662 Isolate* isolate) {

	2663 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

	2664 if (!CharacterRange::IsCanonical(ranges)) {

	2665 CharacterRange::Canonicalize(ranges);

	2666 }

	2667

	2668 intptr_t max_char;

	2669 if (ascii) {

	2670 max_char = Symbols::kMaxOneCharCodeSymbol;

	2671 } else {

	2672 max_char = Utf16::kMaxCodeUnit;

	2673 }

	2674

	2675 intptr_t range_count = ranges->length();

	2676

	2677 intptr_t last_valid_range = range_count - 1;

	2678 while (last_valid_range >= 0) {

	2679 CharacterRange& range = ranges->At(last_valid_range);

	2680 if (range.from() <= max_char) {

	2681 break;

	2682 }

	2683 last_valid_range--;

	2684 }

	2685

	2686 if (last_valid_range < 0) {

	2687 if (!cc->is_negated()) {

	2688 macro_assembler->GoTo(on_failure);

	2689 }

	2690 if (check_offset) {

	2691 macro_assembler->CheckPosition(cp_offset, on_failure);

	2692 }

	2693 return;

	2694 }

	2695

	2696 if (last_valid_range == 0 &&

	2697 ranges->At(0).IsEverything(max_char)) {

	2698 if (cc->is_negated()) {

	2699 macro_assembler->GoTo(on_failure);

	2700 } else {

	2701 // This is a common case hit by non-anchored expressions.

	2702 if (check_offset) {

	2703 macro_assembler->CheckPosition(cp_offset, on_failure);

	2704 }

	2705 }

	2706 return;

	2707 }

	2708 if (last_valid_range == 0 &&

	2709 !cc->is_negated() &&

	2710 ranges->At(0).IsEverything(max_char)) {

	2711 // This is a common case hit by non-anchored expressions.

	2712 if (check_offset) {

	2713 macro_assembler->CheckPosition(cp_offset, on_failure);

	2714 }

	2715 return;

	2716 }

	2717

	2718 if (!preloaded) {

	2719 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

	2720 }

	2721

	2722 if (cc->is_standard() &&

	2723 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

	2724 on_failure)) {

	2725 return;

	2726 }

	2727

	2728

	2729 // A new list with ascending entries. Each entry is a code unit

	2730 // where there is a boundary between code units that are part of

	2731 // the class and code units that are not. Normally we insert an

	2732 // entry at zero which goes to the failure label, but if there

	2733 // was already one there we fall through for success on that entry.

	2734 // Subsequent entries have alternating meaning (success/failure).

	2735 ZoneGrowableArray<int>* range_boundaries =

	2736 new(isolate) ZoneGrowableArray<int>(last_valid_range);

	2737

	2738 bool zeroth_entry_is_failure = !cc->is_negated();

	2739

	2740 for (intptr_t i = 0; i <= last_valid_range; i++) {

	2741 CharacterRange& range = ranges->At(i);

	2742 if (range.from() == 0) {

	2743 ASSERT(i == 0);

	2744 zeroth_entry_is_failure = !zeroth_entry_is_failure;

	2745 } else {

	2746 range_boundaries->Add(range.from());

	2747 }

	2748 range_boundaries->Add(range.to() + 1);

	2749 }

	2750 intptr_t end_index = range_boundaries->length() - 1;

	2751 if (range_boundaries->At(end_index) > max_char) {

	2752 end_index--;

	2753 }

	2754

	2755 BlockLabel fall_through;

	2756 GenerateBranches(macro_assembler,

	2757 range_boundaries,

	2758 0, // start_index.

	2759 end_index,

	2760 0, // min_char.

	2761 max_char,

	2762 &fall_through,

	2763 zeroth_entry_is_failure ? &fall_through : on_failure,

	2764 zeroth_entry_is_failure ? on_failure : &fall_through);

	2765 macro_assembler->BindBlock(&fall_through);

	2766 }

	2767

	2768

767 // We call this repeatedly to generate code for each pass over the text node.	2769 // We call this repeatedly to generate code for each pass over the text node.

768 // The passes are in increasing order of difficulty because we hope one	2770 // The passes are in increasing order of difficulty because we hope one

769 // of the first passes will fail in which case we are saved the work of the	2771 // of the first passes will fail in which case we are saved the work of the

770 // later passes. for example for the case independent regexp /%[asdfghjkl]a/	2772 // later passes. for example for the case independent regexp /%[asdfghjkl]a/

771 // we will check the '%' in the first pass, the case independent 'a' in the	2773 // we will check the '%' in the first pass, the case independent 'a' in the

772 // second pass and the character class in the last pass.	2774 // second pass and the character class in the last pass.

773 //	2775 //

774 // The passes are done from right to left, so for example to test for /bar/	2776 // The passes are done from right to left, so for example to test for /bar/

775 // we will first test for an 'r' with offset 2, then an 'a' with offset 1	2777 // we will first test for an 'r' with offset 2, then an 'a' with offset 1

776 // and then a 'b' with offset 0. This means we can avoid the end-of-input	2778 // and then a 'b' with offset 0. This means we can avoid the end-of-input

(...skipping 15 matching lines...) Expand all Loading...
792 // loading characters, which means we do not need to recheck the bounds	2794 // loading characters, which means we do not need to recheck the bounds

793 // up to the limit the quick check already checked. In addition the quick	2795 // up to the limit the quick check already checked. In addition the quick

794 // check can have involved a mask and compare operation which may simplify	2796 // check can have involved a mask and compare operation which may simplify

795 // or obviate the need for further checks at some character positions.	2797 // or obviate the need for further checks at some character positions.

796 void TextNode::TextEmitPass(RegExpCompiler* compiler,	2798 void TextNode::TextEmitPass(RegExpCompiler* compiler,

797 TextEmitPassType pass,	2799 TextEmitPassType pass,

798 bool preloaded,	2800 bool preloaded,

799 Trace* trace,	2801 Trace* trace,

800 bool first_element_checked,	2802 bool first_element_checked,

801 intptr_t* checked_up_to) {	2803 intptr_t* checked_up_to) {

802 UNIMPLEMENTED();	2804 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	2805 bool ascii = compiler->ascii();

	2806 BlockLabel* backtrack = trace->backtrack();

	2807 QuickCheckDetails* quick_check = trace->quick_check_performed();

	2808 intptr_t element_count = elms_->length();

	2809 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

	2810 TextElement elm = elms_->At(i);

	2811 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset();

	2812 if (elm.text_type() == TextElement::ATOM) {

	2813 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	2814 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) {

	2815 if (first_element_checked && i == 0 && j == 0) continue;

	2816 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

	2817 EmitCharacterFunction* emit_function = NULL;

	2818 switch (pass) {

	2819 case NON_ASCII_MATCH:

	2820 ASSERT(ascii);

	2821 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) {

	2822 assembler->GoTo(backtrack);

	2823 return;

	2824 }

	2825 break;

	2826 case NON_LETTER_CHARACTER_MATCH:

	2827 emit_function = &EmitAtomNonLetter;

	2828 break;

	2829 case SIMPLE_CHARACTER_MATCH:

	2830 emit_function = &EmitSimpleCharacter;

	2831 break;

	2832 case CASE_CHARACTER_MATCH:

	2833 emit_function = &EmitAtomLetter;

	2834 break;

	2835 default:

	2836 break;

	2837 }

	2838 if (emit_function != NULL) {

	2839 bool bound_checked = emit_function(I,

	2840 compiler,

	2841 quarks->At(j),

	2842 backtrack,

	2843 cp_offset + j,

	2844 *checked_up_to < cp_offset + j,

	2845 preloaded);

	2846 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

	2847 }

	2848 }

	2849 } else {

	2850 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

	2851 if (pass == CHARACTER_CLASS_MATCH) {

	2852 if (first_element_checked && i == 0) continue;

	2853 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

	2854 RegExpCharacterClass* cc = elm.char_class();

	2855 EmitCharClass(assembler,

	2856 cc,

	2857 ascii,

	2858 backtrack,

	2859 cp_offset,

	2860 *checked_up_to < cp_offset,

	2861 preloaded,

	2862 I);

	2863 UpdateBoundsCheck(cp_offset, checked_up_to);

	2864 }

	2865 }

	2866 }

803 }	2867 }

804	2868

805	2869

806 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,	2870 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

807 intptr_t eats_at_least) {	2871 intptr_t eats_at_least) {

808 UNIMPLEMENTED();	2872 intptr_t preload_characters = Utils::Minimum(4, eats_at_least);

809 return NULL;	2873 if (compiler->macro_assembler()->CanReadUnaligned()) {

	2874 bool ascii = compiler->ascii();

	2875 if (ascii) {

	2876 if (preload_characters > 4) preload_characters = 4;

	2877 // We can't preload 3 characters because there is no machine instruction

	2878 // to do that. We can't just load 4 because we could be reading

	2879 // beyond the end of the string, which could cause a memory fault.

	2880 if (preload_characters == 3) preload_characters = 2;

	2881 } else {

	2882 if (preload_characters > 2) preload_characters = 2;

	2883 }

	2884 } else {

	2885 if (preload_characters > 1) preload_characters = 1;

	2886 }

	2887 return preload_characters;

810 }	2888 }

811	2889

812	2890

813 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

814 Trace* trace,

815 GuardedAlternative alternative,

816 AlternativeGeneration* alt_gen,

817 intptr_t preload_characters,

818 bool next_expects_preload) {

819 UNIMPLEMENTED();

820 }

821

822

823 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,	2891 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,

824 intptr_t budget,	2892 intptr_t budget,

825 bool not_at_start) {	2893 bool not_at_start) {

826 intptr_t answer = Length();	2894 intptr_t answer = Length();

827 if (answer >= still_to_find) return answer;	2895 if (answer >= still_to_find) return answer;

828 if (budget <= 0) return answer;	2896 if (budget <= 0) return answer;

829 // We are not at start after this node so we set the last argument to 'true'.	2897 // We are not at start after this node so we set the last argument to 'true'.

830 return answer + on_success()->EatsAtLeast(still_to_find - answer,	2898 return answer + on_success()->EatsAtLeast(still_to_find - answer,

831 budget - 1,	2899 budget - 1,

832 true);	2900 true);

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
904 intptr_t budget,	2972 intptr_t budget,

905 bool not_at_start) {	2973 bool not_at_start) {

906 if (budget <= 0) return 0;	2974 if (budget <= 0) return 0;

907 // Alternative 0 is the negative lookahead, alternative 1 is what comes	2975 // Alternative 0 is the negative lookahead, alternative 1 is what comes

908 // afterwards.	2976 // afterwards.

909 RegExpNode* node = alternatives_->At(1).node();	2977 RegExpNode* node = alternatives_->At(1).node();

910 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);	2978 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);

911 }	2979 }

912	2980

913	2981

	2982 // Takes the left-most 1-bit and smears it out, setting all bits to its right.

	2983 static inline uint32_t SmearBitsRight(uint32_t v) {

	2984 v \|= v >> 1;

	2985 v \|= v >> 2;

	2986 v \|= v >> 4;

	2987 v \|= v >> 8;

	2988 v \|= v >> 16;

	2989 return v;

	2990 }

	2991

	2992

914 // Here is the meat of GetQuickCheckDetails (see also the comment on the	2993 // Here is the meat of GetQuickCheckDetails (see also the comment on the

915 // super-class in the .h file).	2994 // super-class in the .h file).

916 //	2995 //

917 // We iterate along the text object, building up for each character a	2996 // We iterate along the text object, building up for each character a

918 // mask and value that can be used to test for a quick failure to match.	2997 // mask and value that can be used to test for a quick failure to match.

919 // The masks and values for the positions will be combined into a single	2998 // The masks and values for the positions will be combined into a single

920 // machine word for the current character width in order to be used in	2999 // machine word for the current character width in order to be used in

921 // generating a quick check.	3000 // generating a quick check.

922 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,	3001 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

923 RegExpCompiler* compiler,	3002 RegExpCompiler* compiler,

924 intptr_t characters_filled_in,	3003 intptr_t characters_filled_in,

925 bool not_at_start) {	3004 bool not_at_start) {

926 UNIMPLEMENTED();	3005 ASSERT(characters_filled_in < details->characters());

	3006 intptr_t characters = details->characters();

	3007 intptr_t char_mask;

	3008 if (compiler->ascii()) {

	3009 char_mask = Symbols::kMaxOneCharCodeSymbol;

	3010 } else {

	3011 char_mask = Utf16::kMaxCodeUnit;

	3012 }

	3013 for (intptr_t k = 0; k < elms_->length(); k++) {

	3014 TextElement elm = elms_->At(k);

	3015 if (elm.text_type() == TextElement::ATOM) {

	3016 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();

	3017 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) {

	3018 QuickCheckDetails::Position* pos =

	3019 details->positions(characters_filled_in);

	3020 uint16_t c = quarks->At(i);

	3021 if (c > char_mask) {

	3022 // If we expect a non-ASCII character from an ASCII string,

	3023 // there is no way we can match. Not even case independent

	3024 // matching can turn an ASCII character into non-ASCII or

	3025 // vice versa.

	3026 details->set_cannot_match();

	3027 pos->determines_perfectly = false;

	3028 return;

	3029 }

	3030 if (compiler->ignore_case()) {

	3031 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3032 intptr_t length =

	3033 GetCaseIndependentLetters(c, compiler->ascii(), chars);

	3034 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

	3035 if (length == 1) {

	3036 // This letter has no case equivalents, so it's nice and simple

	3037 // and the mask-compare will determine definitely whether we have

	3038 // a match at this character position.

	3039 pos->mask = char_mask;

	3040 pos->value = c;

	3041 pos->determines_perfectly = true;

	3042 } else {

	3043 uint32_t common_bits = char_mask;

	3044 uint32_t bits = chars[0];

	3045 for (intptr_t j = 1; j < length; j++) {

	3046 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);

	3047 common_bits ^= differing_bits;

	3048 bits &= common_bits;

	3049 }

	3050 // If length is 2 and common bits has only one zero in it then

	3051 // our mask and compare instruction will determine definitely

	3052 // whether we have a match at this character position. Otherwise

	3053 // it can only be an approximate check.

	3054 uint32_t one_zero = (common_bits \| ~char_mask);

	3055 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {

	3056 pos->determines_perfectly = true;

	3057 }

	3058 pos->mask = common_bits;

	3059 pos->value = bits;

	3060 }

	3061 } else {

	3062 // Don't ignore case. Nice simple case where the mask-compare will

	3063 // determine definitely whether we have a match at this character

	3064 // position.

	3065 pos->mask = char_mask;

	3066 pos->value = c;

	3067 pos->determines_perfectly = true;

	3068 }

	3069 characters_filled_in++;

	3070 ASSERT(characters_filled_in <= details->characters());

	3071 if (characters_filled_in == details->characters()) {

	3072 return;

	3073 }

	3074 }

	3075 } else {

	3076 QuickCheckDetails::Position* pos =

	3077 details->positions(characters_filled_in);

	3078 RegExpCharacterClass* tree = elm.char_class();

	3079 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();

	3080 if (tree->is_negated()) {

	3081 // A quick check uses multi-character mask and compare. There is no

	3082 // useful way to incorporate a negative char class into this scheme

	3083 // so we just conservatively create a mask and value that will always

	3084 // succeed.

	3085 pos->mask = 0;

	3086 pos->value = 0;

	3087 } else {

	3088 intptr_t first_range = 0;

	3089 while (ranges->At(first_range).from() > char_mask) {

	3090 first_range++;

	3091 if (first_range == ranges->length()) {

	3092 details->set_cannot_match();

	3093 pos->determines_perfectly = false;

	3094 return;

	3095 }

	3096 }

	3097 CharacterRange range = ranges->At(first_range);

	3098 uint16_t from = range.from();

	3099 uint16_t to = range.to();

	3100 if (to > char_mask) {

	3101 to = char_mask;

	3102 }

	3103 uint32_t differing_bits = (from ^ to);

	3104 // A mask and compare is only perfect if the differing bits form a

	3105 // number like 00011111 with one single block of trailing 1s.

	3106 if ((differing_bits & (differing_bits + 1)) == 0 &&

	3107 from + differing_bits == to) {

	3108 pos->determines_perfectly = true;

	3109 }

	3110 uint32_t common_bits = ~SmearBitsRight(differing_bits);

	3111 uint32_t bits = (from & common_bits);

	3112 for (intptr_t i = first_range + 1; i < ranges->length(); i++) {

	3113 CharacterRange range = ranges->At(i);

	3114 uint16_t from = range.from();

	3115 uint16_t to = range.to();

	3116 if (from > char_mask) continue;

	3117 if (to > char_mask) to = char_mask;

	3118 // Here we are combining more ranges into the mask and compare

	3119 // value. With each new range the mask becomes more sparse and

	3120 // so the chances of a false positive rise. A character class

	3121 // with multiple ranges is assumed never to be equivalent to a

	3122 // mask and compare operation.

	3123 pos->determines_perfectly = false;

	3124 uint32_t new_common_bits = (from ^ to);

	3125 new_common_bits = ~SmearBitsRight(new_common_bits);

	3126 common_bits &= new_common_bits;

	3127 bits &= new_common_bits;

	3128 uint32_t differing_bits = (from & common_bits) ^ bits;

	3129 common_bits ^= differing_bits;

	3130 bits &= common_bits;

	3131 }

	3132 pos->mask = common_bits;

	3133 pos->value = bits;

	3134 }

	3135 characters_filled_in++;

	3136 ASSERT(characters_filled_in <= details->characters());

	3137 if (characters_filled_in == details->characters()) {

	3138 return;

	3139 }

	3140 }

	3141 }

	3142 ASSERT(characters_filled_in != details->characters());

	3143 if (!details->cannot_match()) {

	3144 on_success()-> GetQuickCheckDetails(details,

	3145 compiler,

	3146 characters_filled_in,

	3147 true);

	3148 }

927 }	3149 }

928	3150

929	3151

930 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,	3152 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

931 RegExpCompiler* compiler,	3153 RegExpCompiler* compiler,

932 intptr_t characters_filled_in,	3154 intptr_t characters_filled_in,

933 bool not_at_start) {	3155 bool not_at_start) {

934 UNIMPLEMENTED();	3156 if (body_can_be_zero_length_ \|\| info()->visited) return;

	3157 VisitMarker marker(info());

	3158 return ChoiceNode::GetQuickCheckDetails(details,

	3159 compiler,

	3160 characters_filled_in,

	3161 not_at_start);

935 }	3162 }

936	3163

937	3164

938 intptr_t TextNode::Length() {	3165 intptr_t TextNode::Length() {

939 TextElement elm = elms_->Last();	3166 TextElement elm = elms_->Last();

940 ASSERT(elm.cp_offset() >= 0);	3167 ASSERT(elm.cp_offset() >= 0);

941 return elm.cp_offset() + elm.length();	3168 return elm.cp_offset() + elm.length();

942 }	3169 }

943	3170

944	3171

(...skipping 518 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1463 Isolate* isolate) {	3690 Isolate* isolate) {

1464 uint16_t bottom = from();	3691 uint16_t bottom = from();

1465 uint16_t top = to();	3692 uint16_t top = to();

1466 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {	3693 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {

1467 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;	3694 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;

1468 if (top > Symbols::kMaxOneCharCodeSymbol) {	3695 if (top > Symbols::kMaxOneCharCodeSymbol) {

1469 top = Symbols::kMaxOneCharCodeSymbol;	3696 top = Symbols::kMaxOneCharCodeSymbol;

1470 }	3697 }

1471 }	3698 }

1472	3699

1473 UNIMPLEMENTED();	3700 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;

	3701 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;

	3702 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3703 if (top == bottom) {

	3704 // If this is a singleton we just expand the one character.

	3705 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T

	3706 for (intptr_t i = 0; i < length; i++) {

	3707 uint32_t chr = chars[i];

	3708 if (chr != bottom) {

	3709 ranges->Add(CharacterRange::Singleton(chars[i]));

	3710 }

	3711 }

	3712 } else {

	3713 // If this is a range we expand the characters block by block,

	3714 // expanding contiguous subranges (blocks) one at a time.

	3715 // The approach is as follows. For a given start character we

	3716 // look up the remainder of the block that contains it (represented

	3717 // by the end point), for instance we find 'z' if the character

	3718 // is 'c'. A block is characterized by the property

	3719 // that all characters uncanonicalize in the same way, except that

	3720 // each entry in the result is incremented by the distance from the first

	3721 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and // NOLINT

	3722 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].

	3723 // Once we've found the end point we look up its uncanonicalization

	3724 // and produce a range for each element. For instance for [c-f]

	3725 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

	3726 // add a range if it is not already contained in the input, so [c-f]

	3727 // will be skipped but [C-F] will be added. If this range is not

	3728 // completely contained in a block we do this for all the blocks

	3729 // covered by the range (handling characters that is not in a block

	3730 // as a "singleton block").

	3731 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

	3732 intptr_t pos = bottom;

	3733 while (pos <= top) {

	3734 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);

	3735 uint16_t block_end;

	3736 if (length == 0) {

	3737 block_end = pos;

	3738 } else {

	3739 ASSERT(length == 1);

	3740 block_end = range[0];

	3741 }

	3742 intptr_t end = (block_end > top) ? top : block_end;

	3743 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT

	3744 for (intptr_t i = 0; i < length; i++) {

	3745 uint32_t c = range[i];

	3746 uint16_t range_from = c - (block_end - pos);

	3747 uint16_t range_to = c - (block_end - end);

	3748 if (!(bottom <= range_from && range_to <= top)) {

	3749 ranges->Add(CharacterRange(range_from, range_to));

	3750 }

	3751 }

	3752 pos = end + 1;

	3753 }

	3754 }

	3755 }

1474	3756

1475 // unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	3757

1476 // if (top == bottom) {	3758 // -------------------------------------------------------------------

1477 // // If this is a singleton we just expand the one character.	3759 // Splay tree

1478 // intptr_t length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', ch ars); // NOLINT	3760

1479 // for (intptr_t i = 0; i < length; i++) {	3761

1480 // uint32_t chr = chars[i];	3762 // Workaround for the fact that ZoneGrowableArray does not have contains().

1481 // if (chr != bottom) {	3763 static bool ArrayContains(ZoneGrowableArray<unsigned>* array,

1482 // ranges->Add(CharacterRange::Singleton(chars[i]), zone);	3764 unsigned value) {

1483 // }	3765 for (intptr_t i = 0; i < array->length(); i++) {

1484 // }	3766 if (array->At(i) == value) {

1485 // } else {	3767 return true;

1486 // // If this is a range we expand the characters block by block,	3768 }

1487 // // expanding contiguous subranges (blocks) one at a time.	3769 }

1488 // // The approach is as follows. For a given start character we	3770 return false;

1489 // // look up the remainder of the block that contains it (represented	3771 }

1490 // // by the end point), for instance we find 'z' if the character	3772

1491 // // is 'c'. A block is characterized by the property	3773

1492 // // that all characters uncanonicalize in the same way, except that	3774 void OutSet::Set(unsigned value, Isolate* isolate) {

1493 // // each entry in the result is incremented by the distance from the first	3775 if (value < kFirstLimit) {

1494 // // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] a nd // NOLINT	3776 first_ \|= (1 << value);

1495 // // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].	3777 } else {

1496 // // Once we've found the end point we look up its uncanonicalization	3778 if (remaining_ == NULL)

1497 // // and produce a range for each element. For instance for [c-f]	3779 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1);

1498 // // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only	3780

1499 // // add a range if it is not already contained in the input, so [c-f]	3781 bool remaining_contains_value = ArrayContains(remaining_, value);

1500 // // will be skipped but [C-F] will be added. If this range is not	3782 if (remaining_->is_empty() \|\| !remaining_contains_value) {

1501 // // completely contained in a block we do this for all the blocks	3783 remaining_->Add(value);

1502 // // covered by the range (handling characters that is not in a block	3784 }

1503 // // as a "singleton block").	3785 }

1504 // unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];	3786 }

1505 // intptr_t pos = bottom;	3787

1506 // while (pos <= top) {	3788

1507 // intptr_t length = isolate->jsregexp_canonrange()->get(pos, '\0', range);	3789 bool OutSet::Get(unsigned value) const {

1508 // uint16_t block_end;	3790 if (value < kFirstLimit) {

1509 // if (length == 0) {	3791 return (first_ & (1 << value)) != 0;

1510 // block_end = pos;	3792 } else if (remaining_ == NULL) {

1511 // } else {	3793 return false;

1512 // ASSERT(length == 1);	3794 } else {

1513 // block_end = range[0];	3795 return ArrayContains(remaining_, value);

1514 // }	3796 }

1515 // intptr_t end = (block_end > top) ? top : block_end;

1516 // length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range) ; // NOLINT

1517 // for (intptr_t i = 0; i < length; i++) {

1518 // uint32_t c = range[i];

1519 // uint16_t range_from = c - (block_end - pos);

1520 // uint16_t range_to = c - (block_end - end);

1521 // if (!(bottom <= range_from && range_to <= top)) {

1522 // ranges->Add(CharacterRange(range_from, range_to));

1523 // }

1524 // }

1525 // pos = end + 1;

1526 // }

1527 // }

1528 }	3797 }

1529	3798

1530 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,	3799 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,

1531 RegExpCompiler* compiler,	3800 RegExpCompiler* compiler,

1532 intptr_t filled_in,	3801 intptr_t filled_in,

1533 bool not_at_start) {	3802 bool not_at_start) {

1534 if (assertion_type_ == AT_START && not_at_start) {	3803 if (assertion_type_ == AT_START && not_at_start) {

1535 details->set_cannot_match();	3804 details->set_cannot_match();

1536 return;	3805 return;

1537 }	3806 }

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1571 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,	3840 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,

1572 intptr_t budget,	3841 intptr_t budget,

1573 bool not_at_start) {	3842 bool not_at_start) {

1574 return EatsAtLeastHelper(still_to_find,	3843 return EatsAtLeastHelper(still_to_find,

1575 budget - 1,	3844 budget - 1,

1576 loop_node_,	3845 loop_node_,

1577 not_at_start);	3846 not_at_start);

1578 }	3847 }

1579	3848

1580	3849

1581 class RecursionCheck {

1582 public:

1583 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

1584 compiler->IncrementRecursionDepth();

1585 }

1586 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

1587 private:

1588 RegExpCompiler* compiler_;

1589 };

1590

1591

1592 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {	3850 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {

1593 UNIMPLEMENTED();	3851 UNIMPLEMENTED();

1594 return NULL;	3852 return NULL;

1595 }	3853 }

1596	3854

1597	3855

1598 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,	3856 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

1599 RegExpCompiler* compiler,	3857 RegExpCompiler* compiler,

1600 intptr_t characters_filled_in,	3858 intptr_t characters_filled_in,

1601 bool not_at_start) {	3859 bool not_at_start) {

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1720 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {	3978 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {

1721 return RegExpEngine::CompilationResult("RegExp too big");	3979 return RegExpEngine::CompilationResult("RegExp too big");

1722 }	3980 }

1723	3981

1724	3982

1725 // Attempts to compile the regexp using an Irregexp code generator. Returns	3983 // Attempts to compile the regexp using an Irregexp code generator. Returns

1726 // a fixed array or a null handle depending on whether it succeeded.	3984 // a fixed array or a null handle depending on whether it succeeded.

1727 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,	3985 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,

1728 bool ascii)	3986 bool ascii)

1729 : next_register_(2 * (capture_count + 1)),	3987 : next_register_(2 * (capture_count + 1)),

	3988 work_list_(NULL),

1730 recursion_depth_(0),	3989 recursion_depth_(0),

1731 ignore_case_(ignore_case),	3990 ignore_case_(ignore_case),

1732 ascii_(ascii),	3991 ascii_(ascii),

1733 reg_exp_too_big_(false),	3992 reg_exp_too_big_(false),

1734 current_expansion_factor_(1),	3993 current_expansion_factor_(1),

1735 isolate_(Isolate::Current()) {	3994 isolate_(Isolate::Current()) {

1736 accept_ = new(I) EndNode(EndNode::ACCEPT, I);	3995 accept_ = new(I) EndNode(EndNode::ACCEPT, I);

1737 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);	3996 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);

1738 }	3997 }

1739	3998

1740	3999

1741 RegExpEngine::CompilationResult RegExpCompiler::Assemble(	4000 RegExpEngine::CompilationResult RegExpCompiler::Assemble(

1742 RegExpMacroAssembler* macro_assembler,	4001 RegExpMacroAssembler* macro_assembler,

1743 RegExpNode* start,	4002 RegExpNode* start,

1744 intptr_t capture_count,	4003 intptr_t capture_count,

1745 const String& pattern) {	4004 const String& pattern) {

1746 UNIMPLEMENTED();	4005 static const bool use_slow_safe_regexp_compiler = false;

1747 return RegExpEngine::CompilationResult("");	4006

	4007 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);

	4008 macro_assembler_ = macro_assembler;

	4009

	4010 ZoneGrowableArray<RegExpNode*> work_list(0);

	4011 work_list_ = &work_list;

	4012 BlockLabel fail;

	4013 macro_assembler_->PushBacktrack(&fail);

	4014 Trace new_trace;

	4015 start->Emit(this, &new_trace);

	4016 macro_assembler_->BindBlock(&fail);

	4017 macro_assembler_->Fail();

	4018 while (!work_list.is_empty()) {

	4019 work_list.RemoveLast()->Emit(this, &new_trace);

	4020 }

	4021 if (reg_exp_too_big_) return IrregexpRegExpTooBig();

	4022

	4023 Function& function = macro_assembler->GetCode(pattern);

	4024

	4025 if (FLAG_trace_irregexp) {

	4026 macro_assembler->PrintBlocks();

	4027 }

	4028

	4029 return RegExpEngine::CompilationResult(function);

1748 }	4030 }

1749	4031

1750	4032

1751 RegExpEngine::CompilationResult RegExpEngine::Compile(	4033 RegExpEngine::CompilationResult RegExpEngine::Compile(

1752 RegExpCompileData* data,	4034 RegExpCompileData* data,

1753 bool ignore_case,	4035 bool ignore_case,

1754 bool is_global,	4036 bool is_global,

1755 bool is_multiline,	4037 bool is_multiline,

1756 const String& pattern,	4038 const String& pattern,

1757 const String& sample_subject,	4039 const String& sample_subject,

1758 bool is_ascii) {	4040 bool is_ascii) {

1759 Isolate* isolate = Isolate::Current();	4041 Isolate* isolate = Isolate::Current();

1760	4042

1761 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {	4043 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

1762 return IrregexpRegExpTooBig();	4044 return IrregexpRegExpTooBig();

1763 }	4045 }

1764 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);	4046 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);

1765	4047

1766 // TODO(jgruber): Character frequency sampling.	4048 // TODO(jgruber): Character frequency sampling.

1767	4049

1768 // Wrap the body of the regexp in capture #0.	4050 // Wrap the body of the regexp in capture #0.

1769 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,	4051 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,

1770 0,	4052 0,

1771 &compiler,	4053 &compiler,

1772 compiler.accept());	4054 compiler.accept());

1773	4055

1774 RegExpNode* node = captured_body;	4056 RegExpNode* node = captured_body;

1775 bool is_start_anchored = data->tree->IsAnchoredAtStart();	4057 bool is_start_anchored = data->tree->IsAnchoredAtStart();

	4058 bool is_end_anchored = data->tree->IsAnchoredAtEnd();

	4059 intptr_t max_length = data->tree->max_match();

1776 if (!is_start_anchored) {	4060 if (!is_start_anchored) {

1777 // Add a .*? at the beginning, outside the body capture, unless	4061 // Add a .*? at the beginning, outside the body capture, unless

1778 // this expression is anchored at the beginning.	4062 // this expression is anchored at the beginning.

1779 RegExpNode* loop_node =	4063 RegExpNode* loop_node =

1780 RegExpQuantifier::ToNode(0,	4064 RegExpQuantifier::ToNode(0,

1781 RegExpTree::kInfinity,	4065 RegExpTree::kInfinity,

1782 false,	4066 false,

1783 new(isolate) RegExpCharacterClass('*'),	4067 new(isolate) RegExpCharacterClass('*'),

1784 &compiler,	4068 &compiler,

1785 captured_body,	4069 captured_body,

(...skipping 23 matching lines...) Expand all Loading...
1809	4093

1810 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate);	4094 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate);

1811 data->node = node;	4095 data->node = node;

1812 Analysis analysis(ignore_case, is_ascii);	4096 Analysis analysis(ignore_case, is_ascii);

1813 analysis.EnsureAnalyzed(node);	4097 analysis.EnsureAnalyzed(node);

1814 if (analysis.has_failed()) {	4098 if (analysis.has_failed()) {

1815 const char* error_message = analysis.error_message();	4099 const char* error_message = analysis.error_message();

1816 return CompilationResult(error_message);	4100 return CompilationResult(error_message);

1817 }	4101 }

1818	4102

1819 // TODO(jgruber): Assemble native code.	4103 // Native regexp implementation.

1820	4104

1821 return RegExpEngine::CompilationResult("");	4105 IRRegExpMacroAssembler::Mode mode =

1822 }	4106 is_ascii ? IRRegExpMacroAssembler::ASCII

1823	4107 : IRRegExpMacroAssembler::UC16;

1824	4108

	4109 IRRegExpMacroAssembler macro_assembler(mode, data->capture_count, isolate);

	4110

	4111 // Inserted here, instead of in Assembler, because it depends on information

	4112 // in the AST that isn't replicated in the Node structure.

	4113 static const intptr_t kMaxBacksearchLimit = 1024;

	4114 if (is_end_anchored &&

	4115 !is_start_anchored &&

	4116 max_length < kMaxBacksearchLimit) {

	4117 macro_assembler.SetCurrentPositionFromEnd(max_length);

	4118 }

	4119

	4120 if (is_global) {

	4121 macro_assembler.set_global_mode(

	4122 (data->tree->min_match() > 0)

	4123 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK

	4124 : RegExpMacroAssembler::GLOBAL);

	4125 }

	4126

	4127 return compiler.Assemble(&macro_assembler,

	4128 node,

	4129 data->capture_count,

	4130 pattern);

	4131 }

	4132

	4133 RawJSRegExp* RegExpEngine::New(const String& pattern,

	4134 bool multi_line,

	4135 bool ignore_case) {

	4136 JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0));

	4137

	4138 regexp.set_pattern(pattern);

	4139

	4140 if (multi_line) regexp.set_is_multi_line();

	4141 if (ignore_case) regexp.set_is_ignore_case();

	4142

	4143 // TODO(jgruber): We might want to use normal string searching algorithms

	4144 // for simple patterns.

	4145 regexp.set_is_complex();

	4146 regexp.set_is_global(); // All dart regexps are global.

	4147

	4148 regexp.set_one_byte_function(Function::Handle());

	4149 regexp.set_two_byte_function(Function::Handle());

	4150

	4151 return regexp.raw();

	4152 }

	4153

	4154

	4155 void BoyerMoorePositionInfo::Set(intptr_t character) {

	4156 SetInterval(Interval(character, character));

	4157 }

	4158

	4159

	4160 ContainedInLattice AddRange(ContainedInLattice containment,

	4161 const int* ranges,

	4162 intptr_t ranges_length,

	4163 Interval new_range) {

	4164 ASSERT((ranges_length & 1) == 1);

	4165 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);

	4166 if (containment == kLatticeUnknown) return containment;

	4167 bool inside = false;

	4168 intptr_t last = 0;

	4169 for (intptr_t i = 0; i < ranges_length;

	4170 inside = !inside, last = ranges[i], i++) {

	4171 // Consider the range from last to ranges[i].

	4172 // We haven't got to the new range yet.

	4173 if (ranges[i] <= new_range.from()) continue;

	4174 // New range is wholly inside last-ranges[i]. Note that new_range.to() is

	4175 // inclusive, but the values in ranges are not.

	4176 if (last <= new_range.from() && new_range.to() < ranges[i]) {

	4177 return Combine(containment, inside ? kLatticeIn : kLatticeOut);

	4178 }

	4179 return kLatticeUnknown;

	4180 }

	4181 return containment;

	4182 }

	4183

	4184

	4185 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

	4186 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

	4187 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

	4188 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

	4189 surrogate_ =

	4190 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);

	4191 if (interval.to() - interval.from() >= kMapSize - 1) {

	4192 if (map_count_ != kMapSize) {

	4193 map_count_ = kMapSize;

	4194 for (intptr_t i = 0; i < kMapSize; i++) map_->At(i) = true;

	4195 }

	4196 return;

	4197 }

	4198 for (intptr_t i = interval.from(); i <= interval.to(); i++) {

	4199 intptr_t mod_character = (i & kMask);

	4200 if (!map_->At(mod_character)) {

	4201 map_count_++;

	4202 map_->At(mod_character) = true;

	4203 }

	4204 if (map_count_ == kMapSize) return;

	4205 }

	4206 }

	4207

	4208

	4209 void BoyerMoorePositionInfo::SetAll() {

	4210 s_ = w_ = d_ = kLatticeUnknown;

	4211 if (map_count_ != kMapSize) {

	4212 map_count_ = kMapSize;

	4213 for (intptr_t i = 0; i < kMapSize; i++) map_->At(i) = true;

	4214 }

	4215 }

	4216

	4217

	4218 BoyerMooreLookahead::BoyerMooreLookahead(

	4219 intptr_t length, RegExpCompiler* compiler, Isolate* isolate)

	4220 : length_(length),

	4221 compiler_(compiler) {

	4222 if (compiler->ascii()) {

	4223 max_char_ = Symbols::kMaxOneCharCodeSymbol;

	4224 } else {

	4225 max_char_ = Utf16::kMaxCodeUnit;

	4226 }

	4227 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);

	4228 for (intptr_t i = 0; i < length; i++) {

	4229 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate));

	4230 }

	4231 }

	4232

	4233

	4234 // Take all the characters that will not prevent a successful match if they

	4235 // occur in the subject string in the range between min_lookahead and

	4236 // max_lookahead (inclusive) measured from the current position. If the

	4237 // character at max_lookahead offset is not one of these characters, then we

	4238 // can safely skip forwards by the number of characters in the range.

	4239 intptr_t BoyerMooreLookahead::GetSkipTable(intptr_t min_lookahead,

	4240 intptr_t max_lookahead,

	4241 TypedData* boolean_skip_table) {

	4242 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4243

	4244 const intptr_t kSkipArrayEntry = 0;

	4245 const intptr_t kDontSkipArrayEntry = 1;

	4246

	4247 for (intptr_t i = 0; i < kSize; i++) {

	4248 boolean_skip_table->SetUint8(i, kSkipArrayEntry);

	4249 }

	4250 intptr_t skip = max_lookahead + 1 - min_lookahead;

	4251

	4252 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4253 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4254 for (intptr_t j = 0; j < kSize; j++) {

	4255 if (map->at(j)) {

	4256 boolean_skip_table->SetUint8(j, kDontSkipArrayEntry);

	4257 }

	4258 }

	4259 }

	4260

	4261 return skip;

	4262 }

	4263

	4264

	4265 // Find the longest range of lookahead that has the fewest number of different

	4266 // characters that can occur at a given position. Since we are optimizing two

	4267 // different parameters at once this is a tradeoff.

	4268 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {

	4269 intptr_t biggest_points = 0;

	4270 // If more than 32 characters out of 128 can occur it is unlikely that we can

	4271 // be lucky enough to step forwards much of the time.

	4272 const intptr_t kMaxMax = 32;

	4273 for (intptr_t max_number_of_chars = 4;

	4274 max_number_of_chars < kMaxMax;

	4275 max_number_of_chars *= 2) {

	4276 biggest_points =

	4277 FindBestInterval(max_number_of_chars, biggest_points, from, to);

	4278 }

	4279 if (biggest_points == 0) return false;

	4280 return true;

	4281 }

	4282

	4283

	4284 // Find the highest-points range between 0 and length_ where the character

	4285 // information is not too vague. 'Too vague' means that there are more than

	4286 // max_number_of_chars that can occur at this position. Calculates the number

	4287 // of points as the product of width-of-the-range and

	4288 // probability-of-finding-one-of-the-characters, where the probability is

	4289 // calculated using the frequency distribution of the sample subject string.

	4290 intptr_t BoyerMooreLookahead::FindBestInterval(

	4291 intptr_t max_number_of_chars,

	4292 intptr_t old_biggest_points,

	4293 int* from,

	4294 int* to) {

	4295 intptr_t biggest_points = old_biggest_points;

	4296 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4297 for (intptr_t i = 0; i < length_; ) {

	4298 while (i < length_ && Count(i) > max_number_of_chars) i++;

	4299 if (i == length_) break;

	4300 intptr_t remembered_from = i;

	4301 bool union_map[kSize];

	4302 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false;

	4303 while (i < length_ && Count(i) <= max_number_of_chars) {

	4304 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4305 for (intptr_t j = 0; j < kSize; j++) union_map[j] \|= map->at(j);

	4306 i++;

	4307 }

	4308 intptr_t frequency = 0;

	4309 for (intptr_t j = 0; j < kSize; j++) {

	4310 if (union_map[j]) {

	4311 // Add 1 to the frequency to give a small per-character boost for

	4312 // the cases where our sampling is not good enough and many

	4313 // characters have a frequency of zero. This means the frequency

	4314 // can theoretically be up to 2*kSize though we treat it mostly as

	4315 // a fraction of kSize.

	4316

	4317 // TODO(jgruber): Use frequency sampling once implemented.

	4318 }

	4319 }

	4320 // We use the probability of skipping times the distance we are skipping to

	4321 // judge the effectiveness of this. Actually we have a cut-off: By

	4322 // dividing by 2 we switch off the skipping if the probability of skipping

	4323 // is less than 50%. This is because the multibyte mask-and-compare

	4324 // skipping in quickcheck is more likely to do well on this case.

	4325 bool in_quickcheck_range = ((i - remembered_from < 4) \|\|

	4326 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));

	4327 // Called 'probability' but it is only a rough estimate and can actually

	4328 // be outside the 0-kSize range.

	4329 intptr_t probability =

	4330 (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

	4331 intptr_t points = (i - remembered_from) * probability;

	4332 if (points > biggest_points) {

	4333 *from = remembered_from;

	4334 *to = i - 1;

	4335 biggest_points = points;

	4336 }

	4337 }

	4338 return biggest_points;

	4339 }

	4340

	4341

	4342 // See comment above on the implementation of GetSkipTable.

	4343 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {

	4344 const intptr_t kSize = RegExpMacroAssembler::kTableSize;

	4345

	4346 intptr_t min_lookahead = 0;

	4347 intptr_t max_lookahead = 0;

	4348

	4349 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;

	4350

	4351 bool found_single_character = false;

	4352 intptr_t single_character = 0;

	4353 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {

	4354 BoyerMoorePositionInfo* map = bitmaps_->At(i);

	4355 if (map->map_count() > 1 \|\|

	4356 (found_single_character && map->map_count() != 0)) {

	4357 found_single_character = false;

	4358 break;

	4359 }

	4360 for (intptr_t j = 0; j < kSize; j++) {

	4361 if (map->at(j)) {

	4362 found_single_character = true;

	4363 single_character = j;

	4364 break;

	4365 }

	4366 }

	4367 }

	4368

	4369 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead;

	4370

	4371 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {

	4372 // The mask-compare can probably handle this better.

	4373 return false;

	4374 }

	4375

	4376 if (found_single_character) {

	4377 BlockLabel cont, again;

	4378 masm->BindBlock(&again);

	4379 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4380 if (max_char_ > kSize) {

	4381 masm->CheckCharacterAfterAnd(single_character,

	4382 RegExpMacroAssembler::kTableMask,

	4383 &cont);

	4384 } else {

	4385 masm->CheckCharacter(single_character, &cont);

	4386 }

	4387 masm->AdvanceCurrentPosition(lookahead_width);

	4388 masm->GoTo(&again);

	4389 masm->BindBlock(&cont);

	4390 return true;

	4391 }

	4392

	4393 TypedData& boolean_skip_table = TypedData::ZoneHandle(

	4394 compiler_->isolate(),

	4395 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));

	4396 intptr_t skip_distance = GetSkipTable(

	4397 min_lookahead, max_lookahead, &boolean_skip_table);

	4398 ASSERT(skip_distance != 0);

	4399

	4400 BlockLabel cont, again;

	4401

	4402 masm->BindBlock(&again);

	4403 masm->LoadCurrentCharacter(max_lookahead, &cont, true);

	4404 masm->CheckBitInTable(boolean_skip_table, &cont);

	4405 masm->AdvanceCurrentPosition(skip_distance);

	4406 masm->Jump(&again);

	4407 masm->BindBlock(&cont);

	4408

	4409 return true;

	4410 }

	4411

	4412

1825 // -------------------------------------------------------------------	4413 // -------------------------------------------------------------------

1826 // Analysis	4414 // Analysis

1827	4415

1828	4416

1829 void Analysis::EnsureAnalyzed(RegExpNode* that) {	4417 void Analysis::EnsureAnalyzed(RegExpNode* that) {

1830 // TODO(jgruber): Stack overflow check.	4418 // TODO(jgruber): Stack overflow check.

1831 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)	4419 if (that->info()->been_analyzed \|\| that->info()->being_analyzed)

1832 return;	4420 return;

1833 that->info()->being_analyzed = true;	4421 that->info()->being_analyzed = true;

1834 that->Accept(this);	4422 that->Accept(this);

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2004	4592

2005	4593

2006 void DotPrinter::PrintAttributes(RegExpNode* that) {	4594 void DotPrinter::PrintAttributes(RegExpNode* that) {

2007 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "	4595 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "

2008 "margin=0.1, fontsize=10, label=\"{", that);	4596 "margin=0.1, fontsize=10, label=\"{", that);

2009 AttributePrinter printer;	4597 AttributePrinter printer;

2010 NodeInfo* info = that->info();	4598 NodeInfo* info = that->info();

2011 printer.PrintBit("NI", info->follows_newline_interest);	4599 printer.PrintBit("NI", info->follows_newline_interest);

2012 printer.PrintBit("WI", info->follows_word_interest);	4600 printer.PrintBit("WI", info->follows_word_interest);

2013 printer.PrintBit("SI", info->follows_start_interest);	4601 printer.PrintBit("SI", info->follows_start_interest);

2014 Label* label = that->label();	4602 BlockLabel* label = that->label();

2015 if (label->IsBound())	4603 if (label->IsBound())

2016 printer.PrintPositive("@", label->Position());	4604 printer.PrintPositive("@", label->Position());

2017 OS::Print("}\"];\n"	4605 OS::Print("}\"];\n"

2018 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",	4606 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",

2019 that, that);	4607 that, that);

2020 }	4608 }

2021	4609

2022	4610

2023 static const bool kPrintDispatchTable = false;	4611 static const bool kPrintDispatchTable = false;

2024 void DotPrinter::VisitChoice(ChoiceNode* that) {	4612 void DotPrinter::VisitChoice(ChoiceNode* that) {

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2166 RegExpNode* node,	4754 RegExpNode* node,

2167 bool ignore_case) {	4755 bool ignore_case) {

2168 DotPrinter printer(ignore_case);	4756 DotPrinter printer(ignore_case);

2169 printer.PrintNode(label, node);	4757 printer.PrintNode(label, node);

2170 }	4758 }

2171	4759

2172	4760

2173 #endif // DEBUG	4761 #endif // DEBUG

2174	4762

2175 } // namespace dart	4763 } // namespace dart

OLD	NEW

« runtime/vm/regexp.h ('K') | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | runtime/vm/regexp_assembler.cc » ('J')