Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: runtime/vm/regexp.cc

Issue 754383002: Revert "Integrate the Irregexp Regular Expression Engine." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h" 7 // SNIP
8 #include "vm/regexp_assembler.h"
9 #include "vm/regexp_ast.h"
10 #include "vm/unibrow-inl.h"
11 #include "vm/unicode.h"
12 #include "vm/symbols.h"
13
14 #define I (isolate())
15 #define CI (compiler->isolate())
16 8
17 namespace dart { 9 namespace dart {
18 10
19 DECLARE_FLAG(bool, trace_irregexp); 11 // SNIP
20
21 // Default to generating optimized regexp code.
22 static const bool kRegexpOptimization = true;
23
24 // More makes code generation slower, less makes V8 benchmark score lower.
25 static const intptr_t kMaxLookaheadForBoyerMoore = 8;
26
27 ContainedInLattice AddRange(ContainedInLattice containment,
28 const intptr_t* ranges,
29 intptr_t ranges_length,
30 Interval new_range) {
31 ASSERT((ranges_length & 1) == 1);
32 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);
33 if (containment == kLatticeUnknown) return containment;
34 bool inside = false;
35 intptr_t last = 0;
36 for (intptr_t i = 0; i < ranges_length;
37 inside = !inside, last = ranges[i], i++) {
38 // Consider the range from last to ranges[i].
39 // We haven't got to the new range yet.
40 if (ranges[i] <= new_range.from()) continue;
41 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
42 // inclusive, but the values in ranges are not.
43 if (last <= new_range.from() && new_range.to() < ranges[i]) {
44 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
45 }
46 return kLatticeUnknown;
47 }
48 return containment;
49 }
50 12
51 // ------------------------------------------------------------------- 13 // -------------------------------------------------------------------
52 // Implementation of the Irregexp regular expression engine. 14 // Implementation of the Irregexp regular expression engine.
53 // 15 //
54 // The Irregexp regular expression engine is intended to be a complete 16 // The Irregexp regular expression engine is intended to be a complete
55 // implementation of ECMAScript regular expressions. It generates 17 // implementation of ECMAScript regular expressions. It generates either
56 // IR code that is subsequently compiled to native code. 18 // bytecodes or native code.
57 19
58 // The Irregexp regexp engine is structured in three steps. 20 // The Irregexp regexp engine is structured in three steps.
59 // 1) The parser generates an abstract syntax tree. See regexp_ast.cc. 21 // 1) The parser generates an abstract syntax tree. See ast.cc.
60 // 2) From the AST a node network is created. The nodes are all 22 // 2) From the AST a node network is created. The nodes are all
61 // subclasses of RegExpNode. The nodes represent states when 23 // subclasses of RegExpNode. The nodes represent states when
62 // executing a regular expression. Several optimizations are 24 // executing a regular expression. Several optimizations are
63 // performed on the node network. 25 // performed on the node network.
64 // 3) From the nodes we generate IR instructions that can actually 26 // 3) From the nodes we generate either byte codes or native code
65 // execute the regular expression (perform the search). The 27 // that can actually execute the regular expression (perform
66 // code generation step is described in more detail below. 28 // the search). The code generation step is described in more
29 // detail below.
67 30
68 // Code generation. 31 // Code generation.
69 // 32 //
70 // The nodes are divided into four main categories. 33 // The nodes are divided into four main categories.
71 // * Choice nodes 34 // * Choice nodes
72 // These represent places where the regular expression can 35 // These represent places where the regular expression can
73 // match in more than one way. For example on entry to an 36 // match in more than one way. For example on entry to an
74 // alternation (foo|bar) or a repetition (*, +, ? or {}). 37 // alternation (foo|bar) or a repetition (*, +, ? or {}).
75 // * Action nodes 38 // * Action nodes
76 // These represent places where some action should be 39 // These represent places where some action should be
77 // performed. Examples include recording the current position 40 // performed. Examples include recording the current position
78 // in the input string to a register (in order to implement 41 // in the input string to a register (in order to implement
79 // captures) or other actions on register for example in order 42 // captures) or other actions on register for example in order
80 // to implement the counters needed for {} repetitions. 43 // to implement the counters needed for {} repetitions.
81 // * Matching nodes 44 // * Matching nodes
82 // These attempt to match some element part of the input string. 45 // These attempt to match some element part of the input string.
83 // Examples of elements include character classes, plain strings 46 // Examples of elements include character classes, plain strings
84 // or back references. 47 // or back references.
85 // * End nodes 48 // * End nodes
86 // These are used to implement the actions required on finding 49 // These are used to implement the actions required on finding
87 // a successful match or failing to find a match. 50 // a successful match or failing to find a match.
88 // 51 //
89 // The code generated maintains some state as it runs. This consists of the 52 // The code generated (whether as byte codes or native code) maintains
90 // following elements: 53 // some state as it runs. This consists of the following elements:
91 // 54 //
92 // * The capture registers. Used for string captures. 55 // * The capture registers. Used for string captures.
93 // * Other registers. Used for counters etc. 56 // * Other registers. Used for counters etc.
94 // * The current position. 57 // * The current position.
95 // * The stack of backtracking information. Used when a matching node 58 // * The stack of backtracking information. Used when a matching node
96 // fails to find a match and needs to try an alternative. 59 // fails to find a match and needs to try an alternative.
97 // 60 //
98 // Conceptual regular expression execution model: 61 // Conceptual regular expression execution model:
99 // 62 //
100 // There is a simple conceptual model of regular expression execution 63 // There is a simple conceptual model of regular expression execution
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 // to that trace. The code generator therefore has the ability to generate 153 // to that trace. The code generator therefore has the ability to generate
191 // code for each node several times. In order to limit the size of the 154 // code for each node several times. In order to limit the size of the
192 // generated code there is an arbitrary limit on how many specialized sets of 155 // generated code there is an arbitrary limit on how many specialized sets of
193 // code may be generated for a given node. If the limit is reached, the 156 // code may be generated for a given node. If the limit is reached, the
194 // trace is flushed and a generic version of the code for a node is emitted. 157 // trace is flushed and a generic version of the code for a node is emitted.
195 // This is subsequently used for that node. The code emitted for non-generic 158 // This is subsequently used for that node. The code emitted for non-generic
196 // trace is not recorded in the node and so it cannot currently be reused in 159 // trace is not recorded in the node and so it cannot currently be reused in
197 // the event that code generation is requested for an identical trace. 160 // the event that code generation is requested for an identical trace.
198 161
199 162
200 void RegExpTree::AppendToText(RegExpText* text) { 163 void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
201 UNREACHABLE(); 164 UNREACHABLE();
202 } 165 }
203 166
204 167
205 void RegExpAtom::AppendToText(RegExpText* text) { 168 void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
206 text->AddElement(TextElement::Atom(this)); 169 text->AddElement(TextElement::Atom(this), zone);
207 } 170 }
208 171
209 172
210 void RegExpCharacterClass::AppendToText(RegExpText* text) { 173 void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
211 text->AddElement(TextElement::CharClass(this)); 174 text->AddElement(TextElement::CharClass(this), zone);
212 } 175 }
213 176
214 177
215 void RegExpText::AppendToText(RegExpText* text) { 178 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
216 for (intptr_t i = 0; i < elements()->length(); i++) 179 for (int i = 0; i < elements()->length(); i++)
217 text->AddElement((*elements())[i]); 180 text->AddElement(elements()->at(i), zone);
218 } 181 }
219 182
220 183
221 TextElement TextElement::Atom(RegExpAtom* atom) { 184 TextElement TextElement::Atom(RegExpAtom* atom) {
222 return TextElement(ATOM, atom); 185 return TextElement(ATOM, atom);
223 } 186 }
224 187
225 188
226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { 189 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
227 return TextElement(CHAR_CLASS, char_class); 190 return TextElement(CHAR_CLASS, char_class);
228 } 191 }
229 192
230 193
231 intptr_t TextElement::length() const { 194 int TextElement::length() const {
232 switch (text_type()) { 195 switch (text_type()) {
233 case ATOM: 196 case ATOM:
234 return atom()->length(); 197 return atom()->length();
235 198
236 case CHAR_CLASS: 199 case CHAR_CLASS:
237 return 1; 200 return 1;
238 } 201 }
239 UNREACHABLE(); 202 UNREACHABLE();
240 return 0; 203 return 0;
241 } 204 }
242 205
243 206
244 class FrequencyCollator : public ValueObject { 207 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
208 if (table_ == NULL) {
209 table_ = new(zone()) DispatchTable(zone());
210 DispatchTableConstructor cons(table_, ignore_case, zone());
211 cons.BuildTable(this);
212 }
213 return table_;
214 }
215
216
217 class FrequencyCollator {
245 public: 218 public:
246 FrequencyCollator() : total_samples_(0) { 219 FrequencyCollator() : total_samples_(0) {
247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { 220 for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
248 frequencies_[i] = CharacterFrequency(i); 221 frequencies_[i] = CharacterFrequency(i);
249 } 222 }
250 } 223 }
251 224
252 void CountCharacter(intptr_t character) { 225 void CountCharacter(int character) {
253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); 226 int index = (character & RegExpMacroAssembler::kTableMask);
254 frequencies_[index].Increment(); 227 frequencies_[index].Increment();
255 total_samples_++; 228 total_samples_++;
256 } 229 }
257 230
258 // Does not measure in percent, but rather per-128 (the table size from the 231 // Does not measure in percent, but rather per-128 (the table size from the
259 // regexp macro assembler). 232 // regexp macro assembler).
260 intptr_t Frequency(intptr_t in_character) { 233 int Frequency(int in_character) {
261 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); 234 DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
262 if (total_samples_ < 1) return 1; // Division by zero. 235 if (total_samples_ < 1) return 1; // Division by zero.
263 intptr_t freq_in_per128 = 236 int freq_in_per128 =
264 (frequencies_[in_character].counter() * 128) / total_samples_; 237 (frequencies_[in_character].counter() * 128) / total_samples_;
265 return freq_in_per128; 238 return freq_in_per128;
266 } 239 }
267 240
268 private: 241 private:
269 class CharacterFrequency { 242 class CharacterFrequency {
270 public: 243 public:
271 CharacterFrequency() : counter_(0), character_(-1) { } 244 CharacterFrequency() : counter_(0), character_(-1) { }
272 explicit CharacterFrequency(intptr_t character) 245 explicit CharacterFrequency(int character)
273 : counter_(0), character_(character) { } 246 : counter_(0), character_(character) { }
274 247
275 void Increment() { counter_++; } 248 void Increment() { counter_++; }
276 intptr_t counter() { return counter_; } 249 int counter() { return counter_; }
277 intptr_t character() { return character_; } 250 int character() { return character_; }
278 251
279 private: 252 private:
280 intptr_t counter_; 253 int counter_;
281 intptr_t character_; 254 int character_;
282
283 DISALLOW_ALLOCATION();
284 }; 255 };
285 256
286 257
287 private: 258 private:
288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 259 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
289 intptr_t total_samples_; 260 int total_samples_;
290 }; 261 };
291 262
292 263
293 class RegExpCompiler : public ValueObject { 264 class RegExpCompiler {
294 public: 265 public:
295 RegExpCompiler(intptr_t capture_count, 266 RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,
296 bool ignore_case, 267 Zone* zone);
297 intptr_t specialization_cid);
298 268
299 intptr_t AllocateRegister() { 269 int AllocateRegister() {
270 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
271 reg_exp_too_big_ = true;
272 return next_register_;
273 }
300 return next_register_++; 274 return next_register_++;
301 } 275 }
302 276
303 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, 277 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
304 RegExpNode* start, 278 RegExpNode* start,
305 intptr_t capture_count, 279 int capture_count,
306 const String& pattern); 280 Handle<String> pattern);
307 281
308 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 282 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
309 283
310 static const intptr_t kImplementationOffset = 0; 284 static const int kImplementationOffset = 0;
311 static const intptr_t kNumberOfRegistersOffset = 0; 285 static const int kNumberOfRegistersOffset = 0;
312 static const intptr_t kCodeOffset = 1; 286 static const int kCodeOffset = 1;
313 287
314 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 288 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
315 EndNode* accept() { return accept_; } 289 EndNode* accept() { return accept_; }
316 290
317 static const intptr_t kMaxRecursion = 100; 291 static const int kMaxRecursion = 100;
318 inline intptr_t recursion_depth() { return recursion_depth_; } 292 inline int recursion_depth() { return recursion_depth_; }
319 inline void IncrementRecursionDepth() { recursion_depth_++; } 293 inline void IncrementRecursionDepth() { recursion_depth_++; }
320 inline void DecrementRecursionDepth() { recursion_depth_--; } 294 inline void DecrementRecursionDepth() { recursion_depth_--; }
321 295
322 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 296 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
323 297
324 inline bool ignore_case() { return ignore_case_; } 298 inline bool ignore_case() { return ignore_case_; }
325 inline bool one_byte() const { 299 inline bool one_byte() { return one_byte_; }
326 return (specialization_cid_ == kOneByteStringCid ||
327 specialization_cid_ == kExternalOneByteStringCid);
328 }
329 inline intptr_t specialization_cid() { return specialization_cid_; }
330 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 300 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
331 301
332 intptr_t current_expansion_factor() { return current_expansion_factor_; } 302 int current_expansion_factor() { return current_expansion_factor_; }
333 void set_current_expansion_factor(intptr_t value) { 303 void set_current_expansion_factor(int value) {
334 current_expansion_factor_ = value; 304 current_expansion_factor_ = value;
335 } 305 }
336 306
337 Isolate* isolate() const { return isolate_; } 307 Zone* zone() const { return zone_; }
338 308
339 static const intptr_t kNoRegister = -1; 309 static const int kNoRegister = -1;
340 310
341 private: 311 private:
342 EndNode* accept_; 312 EndNode* accept_;
343 intptr_t next_register_; 313 int next_register_;
344 ZoneGrowableArray<RegExpNode*>* work_list_; 314 List<RegExpNode*>* work_list_;
345 intptr_t recursion_depth_; 315 int recursion_depth_;
346 IRRegExpMacroAssembler* macro_assembler_; 316 RegExpMacroAssembler* macro_assembler_;
347 bool ignore_case_; 317 bool ignore_case_;
348 intptr_t specialization_cid_; 318 bool one_byte_;
349 bool reg_exp_too_big_; 319 bool reg_exp_too_big_;
350 intptr_t current_expansion_factor_; 320 int current_expansion_factor_;
351 FrequencyCollator frequency_collator_; 321 FrequencyCollator frequency_collator_;
352 Isolate* isolate_; 322 Zone* zone_;
353 }; 323 };
354 324
355 325
356 class RecursionCheck : public ValueObject { 326 class RecursionCheck {
357 public: 327 public:
358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 328 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
359 compiler->IncrementRecursionDepth(); 329 compiler->IncrementRecursionDepth();
360 } 330 }
361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 331 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
362 private: 332 private:
363 RegExpCompiler* compiler_; 333 RegExpCompiler* compiler_;
364 }; 334 };
365 335
366 336
367 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 337 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
368 return RegExpEngine::CompilationResult("RegExp too big"); 338 return RegExpEngine::CompilationResult(isolate, "RegExp too big");
369 } 339 }
370 340
371 341
372 // Attempts to compile the regexp using an Irregexp code generator. Returns 342 // Attempts to compile the regexp using an Irregexp code generator. Returns
373 // a fixed array or a null handle depending on whether it succeeded. 343 // a fixed array or a null handle depending on whether it succeeded.
374 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, 344 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
375 intptr_t specialization_cid) 345 bool one_byte, Zone* zone)
376 : next_register_(2 * (capture_count + 1)), 346 : next_register_(2 * (capture_count + 1)),
377 work_list_(NULL), 347 work_list_(NULL),
378 recursion_depth_(0), 348 recursion_depth_(0),
379 ignore_case_(ignore_case), 349 ignore_case_(ignore_case),
380 specialization_cid_(specialization_cid), 350 one_byte_(one_byte),
381 reg_exp_too_big_(false), 351 reg_exp_too_big_(false),
382 current_expansion_factor_(1), 352 current_expansion_factor_(1),
383 isolate_(Isolate::Current()) { 353 frequency_collator_(),
384 accept_ = new(I) EndNode(EndNode::ACCEPT, I); 354 zone_(zone) {
355 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
356 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
385 } 357 }
386 358
387 359
388 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 360 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
389 IRRegExpMacroAssembler* macro_assembler, 361 RegExpMacroAssembler* macro_assembler,
390 RegExpNode* start, 362 RegExpNode* start,
391 intptr_t capture_count, 363 int capture_count,
392 const String& pattern) { 364 Handle<String> pattern) {
393 static const bool use_slow_safe_regexp_compiler = false; 365 Heap* heap = pattern->GetHeap();
366
367 bool use_slow_safe_regexp_compiler = false;
368 if (heap->total_regexp_code_generated() >
369 RegExpImpl::kRegWxpCompiledLimit &&
370 heap->isolate()->memory_allocator()->SizeExecutable() >
371 RegExpImpl::kRegExpExecutableMemoryLimit) {
372 use_slow_safe_regexp_compiler = true;
373 }
394 374
395 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); 375 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
396 macro_assembler_ = macro_assembler;
397 376
398 ZoneGrowableArray<RegExpNode*> work_list(0); 377 #ifdef DEBUG
378 if (FLAG_trace_regexp_assembler)
379 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
380 else
381 #endif
382 macro_assembler_ = macro_assembler;
383
384 List <RegExpNode*> work_list(0);
399 work_list_ = &work_list; 385 work_list_ = &work_list;
400 BlockLabel fail; 386 Label fail;
401 macro_assembler_->PushBacktrack(&fail); 387 macro_assembler_->PushBacktrack(&fail);
402 Trace new_trace; 388 Trace new_trace;
403 start->Emit(this, &new_trace); 389 start->Emit(this, &new_trace);
404 macro_assembler_->BindBlock(&fail); 390 macro_assembler_->Bind(&fail);
405 macro_assembler_->Fail(); 391 macro_assembler_->Fail();
406 while (!work_list.is_empty()) { 392 while (!work_list.is_empty()) {
407 work_list.RemoveLast()->Emit(this, &new_trace); 393 work_list.RemoveLast()->Emit(this, &new_trace);
408 } 394 }
409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); 395 if (reg_exp_too_big_) return IrregexpRegExpTooBig(zone_->isolate());
410 396
411 macro_assembler->GenerateBacktrackBlock(); 397 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
412 398 heap->IncreaseTotalRegexpCodeGenerated(code->Size());
413 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), 399 work_list_ = NULL;
414 macro_assembler->graph_entry(), 400 #ifdef DEBUG
415 macro_assembler->num_blocks(), 401 if (FLAG_print_code) {
416 macro_assembler->num_stack_locals()); 402 CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer());
403 OFStream os(trace_scope.file());
404 Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
405 }
406 if (FLAG_trace_regexp_assembler) {
407 delete macro_assembler_;
408 }
409 #endif
410 return RegExpEngine::CompilationResult(*code, next_register_);
417 } 411 }
418 412
419 413
420 bool Trace::DeferredAction::Mentions(intptr_t that) { 414 bool Trace::DeferredAction::Mentions(int that) {
421 if (action_type() == ActionNode::CLEAR_CAPTURES) { 415 if (action_type() == ActionNode::CLEAR_CAPTURES) {
422 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 416 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
423 return range.Contains(that); 417 return range.Contains(that);
424 } else { 418 } else {
425 return reg() == that; 419 return reg() == that;
426 } 420 }
427 } 421 }
428 422
429 423
430 bool Trace::mentions_reg(intptr_t reg) { 424 bool Trace::mentions_reg(int reg) {
431 for (DeferredAction* action = actions_; 425 for (DeferredAction* action = actions_;
432 action != NULL; 426 action != NULL;
433 action = action->next()) { 427 action = action->next()) {
434 if (action->Mentions(reg)) 428 if (action->Mentions(reg))
435 return true; 429 return true;
436 } 430 }
437 return false; 431 return false;
438 } 432 }
439 433
440 434
441 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { 435 bool Trace::GetStoredPosition(int reg, int* cp_offset) {
442 ASSERT(*cp_offset == 0); 436 DCHECK_EQ(0, *cp_offset);
443 for (DeferredAction* action = actions_; 437 for (DeferredAction* action = actions_;
444 action != NULL; 438 action != NULL;
445 action = action->next()) { 439 action = action->next()) {
446 if (action->Mentions(reg)) { 440 if (action->Mentions(reg)) {
447 if (action->action_type() == ActionNode::STORE_POSITION) { 441 if (action->action_type() == ActionNode::STORE_POSITION) {
448 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); 442 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
449 return true; 443 return true;
450 } else { 444 } else {
451 return false; 445 return false;
452 } 446 }
453 } 447 }
454 } 448 }
455 return false; 449 return false;
456 } 450 }
457 451
458 452
459 // This is called as we come into a loop choice node and some other tricky 453 int Trace::FindAffectedRegisters(OutSet* affected_registers,
460 // nodes. It normalizes the state of the code generator to ensure we can 454 Zone* zone) {
461 // generate generic code. 455 int max_register = RegExpCompiler::kNoRegister;
462 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers,
463 Isolate* isolate) {
464 intptr_t max_register = RegExpCompiler::kNoRegister;
465 for (DeferredAction* action = actions_; 456 for (DeferredAction* action = actions_;
466 action != NULL; 457 action != NULL;
467 action = action->next()) { 458 action = action->next()) {
468 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { 459 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
469 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); 460 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
470 for (intptr_t i = range.from(); i <= range.to(); i++) 461 for (int i = range.from(); i <= range.to(); i++)
471 affected_registers->Set(i, isolate); 462 affected_registers->Set(i, zone);
472 if (range.to() > max_register) max_register = range.to(); 463 if (range.to() > max_register) max_register = range.to();
473 } else { 464 } else {
474 affected_registers->Set(action->reg(), isolate); 465 affected_registers->Set(action->reg(), zone);
475 if (action->reg() > max_register) max_register = action->reg(); 466 if (action->reg() > max_register) max_register = action->reg();
476 } 467 }
477 } 468 }
478 return max_register; 469 return max_register;
479 } 470 }
480 471
481 472
482 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, 473 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
483 intptr_t max_register, 474 int max_register,
484 const OutSet& registers_to_pop, 475 const OutSet& registers_to_pop,
485 const OutSet& registers_to_clear) { 476 const OutSet& registers_to_clear) {
486 for (intptr_t reg = max_register; reg >= 0; reg--) { 477 for (int reg = max_register; reg >= 0; reg--) {
487 if (registers_to_pop.Get(reg)) { 478 if (registers_to_pop.Get(reg)) {
488 assembler->PopRegister(reg); 479 assembler->PopRegister(reg);
489 } else if (registers_to_clear.Get(reg)) { 480 } else if (registers_to_clear.Get(reg)) {
490 intptr_t clear_to = reg; 481 int clear_to = reg;
491 while (reg > 0 && registers_to_clear.Get(reg - 1)) { 482 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
492 reg--; 483 reg--;
493 } 484 }
494 assembler->ClearRegisters(reg, clear_to); 485 assembler->ClearRegisters(reg, clear_to);
495 } 486 }
496 } 487 }
497 } 488 }
498 489
499 490
500 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 491 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
501 intptr_t max_register, 492 int max_register,
502 const OutSet& affected_registers, 493 const OutSet& affected_registers,
503 OutSet* registers_to_pop, 494 OutSet* registers_to_pop,
504 OutSet* registers_to_clear, 495 OutSet* registers_to_clear,
505 Isolate* isolate) { 496 Zone* zone) {
506 for (intptr_t reg = 0; reg <= max_register; reg++) { 497 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
498 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
499
500 // Count pushes performed to force a stack limit check occasionally.
501 int pushes = 0;
502
503 for (int reg = 0; reg <= max_register; reg++) {
507 if (!affected_registers.Get(reg)) { 504 if (!affected_registers.Get(reg)) {
508 continue; 505 continue;
509 } 506 }
510 507
511 // The chronologically first deferred action in the trace 508 // The chronologically first deferred action in the trace
512 // is used to infer the action needed to restore a register 509 // is used to infer the action needed to restore a register
513 // to its previous state (or not, if it's safe to ignore it). 510 // to its previous state (or not, if it's safe to ignore it).
514 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 511 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
515 DeferredActionUndoType undo_action = IGNORE; 512 DeferredActionUndoType undo_action = IGNORE;
516 513
517 intptr_t value = 0; 514 int value = 0;
518 bool absolute = false; 515 bool absolute = false;
519 bool clear = false; 516 bool clear = false;
520 intptr_t store_position = -1; 517 int store_position = -1;
521 // This is a little tricky because we are scanning the actions in reverse 518 // This is a little tricky because we are scanning the actions in reverse
522 // historical order (newest first). 519 // historical order (newest first).
523 for (DeferredAction* action = actions_; 520 for (DeferredAction* action = actions_;
524 action != NULL; 521 action != NULL;
525 action = action->next()) { 522 action = action->next()) {
526 if (action->Mentions(reg)) { 523 if (action->Mentions(reg)) {
527 switch (action->action_type()) { 524 switch (action->action_type()) {
528 case ActionNode::SET_REGISTER: { 525 case ActionNode::SET_REGISTER: {
529 Trace::DeferredSetRegister* psr = 526 Trace::DeferredSetRegister* psr =
530 static_cast<Trace::DeferredSetRegister*>(action); 527 static_cast<Trace::DeferredSetRegister*>(action);
531 if (!absolute) { 528 if (!absolute) {
532 value += psr->value(); 529 value += psr->value();
533 absolute = true; 530 absolute = true;
534 } 531 }
535 // SET_REGISTER is currently only used for newly introduced loop 532 // SET_REGISTER is currently only used for newly introduced loop
536 // counters. They can have a significant previous value if they 533 // counters. They can have a significant previous value if they
537 // occour in a loop. TODO(lrn): Propagate this information, so 534 // occour in a loop. TODO(lrn): Propagate this information, so
538 // we can set undo_action to IGNORE if we know there is no value to 535 // we can set undo_action to IGNORE if we know there is no value to
539 // restore. 536 // restore.
540 undo_action = RESTORE; 537 undo_action = RESTORE;
541 ASSERT(store_position == -1); 538 DCHECK_EQ(store_position, -1);
542 ASSERT(!clear); 539 DCHECK(!clear);
543 break; 540 break;
544 } 541 }
545 case ActionNode::INCREMENT_REGISTER: 542 case ActionNode::INCREMENT_REGISTER:
546 if (!absolute) { 543 if (!absolute) {
547 value++; 544 value++;
548 } 545 }
549 ASSERT(store_position == -1); 546 DCHECK_EQ(store_position, -1);
550 ASSERT(!clear); 547 DCHECK(!clear);
551 undo_action = RESTORE; 548 undo_action = RESTORE;
552 break; 549 break;
553 case ActionNode::STORE_POSITION: { 550 case ActionNode::STORE_POSITION: {
554 Trace::DeferredCapture* pc = 551 Trace::DeferredCapture* pc =
555 static_cast<Trace::DeferredCapture*>(action); 552 static_cast<Trace::DeferredCapture*>(action);
556 if (!clear && store_position == -1) { 553 if (!clear && store_position == -1) {
557 store_position = pc->cp_offset(); 554 store_position = pc->cp_offset();
558 } 555 }
559 556
560 // For captures we know that stores and clears alternate. 557 // For captures we know that stores and clears alternate.
561 // Other register, are never cleared, and if the occur 558 // Other register, are never cleared, and if the occur
562 // inside a loop, they might be assigned more than once. 559 // inside a loop, they might be assigned more than once.
563 if (reg <= 1) { 560 if (reg <= 1) {
564 // Registers zero and one, aka "capture zero", is 561 // Registers zero and one, aka "capture zero", is
565 // always set correctly if we succeed. There is no 562 // always set correctly if we succeed. There is no
566 // need to undo a setting on backtrack, because we 563 // need to undo a setting on backtrack, because we
567 // will set it again or fail. 564 // will set it again or fail.
568 undo_action = IGNORE; 565 undo_action = IGNORE;
569 } else { 566 } else {
570 undo_action = pc->is_capture() ? CLEAR : RESTORE; 567 undo_action = pc->is_capture() ? CLEAR : RESTORE;
571 } 568 }
572 ASSERT(!absolute); 569 DCHECK(!absolute);
573 ASSERT(value == 0); 570 DCHECK_EQ(value, 0);
574 break; 571 break;
575 } 572 }
576 case ActionNode::CLEAR_CAPTURES: { 573 case ActionNode::CLEAR_CAPTURES: {
577 // Since we're scanning in reverse order, if we've already 574 // Since we're scanning in reverse order, if we've already
578 // set the position we have to ignore historically earlier 575 // set the position we have to ignore historically earlier
579 // clearing operations. 576 // clearing operations.
580 if (store_position == -1) { 577 if (store_position == -1) {
581 clear = true; 578 clear = true;
582 } 579 }
583 undo_action = RESTORE; 580 undo_action = RESTORE;
584 ASSERT(!absolute); 581 DCHECK(!absolute);
585 ASSERT(value == 0); 582 DCHECK_EQ(value, 0);
586 break; 583 break;
587 } 584 }
588 default: 585 default:
589 UNREACHABLE(); 586 UNREACHABLE();
590 break; 587 break;
591 } 588 }
592 } 589 }
593 } 590 }
594 // Prepare for the undo-action (e.g., push if it's going to be popped). 591 // Prepare for the undo-action (e.g., push if it's going to be popped).
595 if (undo_action == RESTORE) { 592 if (undo_action == RESTORE) {
596 assembler->PushRegister(reg); 593 pushes++;
597 registers_to_pop->Set(reg, isolate); 594 RegExpMacroAssembler::StackCheckFlag stack_check =
595 RegExpMacroAssembler::kNoStackLimitCheck;
596 if (pushes == push_limit) {
597 stack_check = RegExpMacroAssembler::kCheckStackLimit;
598 pushes = 0;
599 }
600
601 assembler->PushRegister(reg, stack_check);
602 registers_to_pop->Set(reg, zone);
598 } else if (undo_action == CLEAR) { 603 } else if (undo_action == CLEAR) {
599 registers_to_clear->Set(reg, isolate); 604 registers_to_clear->Set(reg, zone);
600 } 605 }
601 // Perform the chronologically last action (or accumulated increment) 606 // Perform the chronologically last action (or accumulated increment)
602 // for the register. 607 // for the register.
603 if (store_position != -1) { 608 if (store_position != -1) {
604 assembler->WriteCurrentPositionToRegister(reg, store_position); 609 assembler->WriteCurrentPositionToRegister(reg, store_position);
605 } else if (clear) { 610 } else if (clear) {
606 assembler->ClearRegisters(reg, reg); 611 assembler->ClearRegisters(reg, reg);
607 } else if (absolute) { 612 } else if (absolute) {
608 assembler->SetRegister(reg, value); 613 assembler->SetRegister(reg, value);
609 } else if (value != 0) { 614 } else if (value != 0) {
610 assembler->AdvanceRegister(reg, value); 615 assembler->AdvanceRegister(reg, value);
611 } 616 }
612 } 617 }
613 } 618 }
614 619
615 620
616 // This is called as we come into a loop choice node and some other tricky 621 // This is called as we come into a loop choice node and some other tricky
617 // nodes. It normalizes the state of the code generator to ensure we can 622 // nodes. It normalizes the state of the code generator to ensure we can
618 // generate generic code. 623 // generate generic code.
619 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 624 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
620 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 625 RegExpMacroAssembler* assembler = compiler->macro_assembler();
621 626
622 ASSERT(!is_trivial()); 627 DCHECK(!is_trivial());
623 628
624 if (actions_ == NULL && backtrack() == NULL) { 629 if (actions_ == NULL && backtrack() == NULL) {
625 // Here we just have some deferred cp advances to fix and we are back to 630 // Here we just have some deferred cp advances to fix and we are back to
626 // a normal situation. We may also have to forget some information gained 631 // a normal situation. We may also have to forget some information gained
627 // through a quick check that was already performed. 632 // through a quick check that was already performed.
628 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 633 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
629 // Create a new trivial state and generate the node with that. 634 // Create a new trivial state and generate the node with that.
630 Trace new_state; 635 Trace new_state;
631 successor->Emit(compiler, &new_state); 636 successor->Emit(compiler, &new_state);
632 return; 637 return;
633 } 638 }
634 639
635 // Generate deferred actions here along with code to undo them again. 640 // Generate deferred actions here along with code to undo them again.
636 OutSet affected_registers; 641 OutSet affected_registers;
637 642
638 if (backtrack() != NULL) { 643 if (backtrack() != NULL) {
639 // Here we have a concrete backtrack location. These are set up by choice 644 // Here we have a concrete backtrack location. These are set up by choice
640 // nodes and so they indicate that we have a deferred save of the current 645 // nodes and so they indicate that we have a deferred save of the current
641 // position which we may need to emit here. 646 // position which we may need to emit here.
642 assembler->PushCurrentPosition(); 647 assembler->PushCurrentPosition();
643 } 648 }
644 649
645 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI); 650 int max_register = FindAffectedRegisters(&affected_registers,
651 compiler->zone());
646 OutSet registers_to_pop; 652 OutSet registers_to_pop;
647 OutSet registers_to_clear; 653 OutSet registers_to_clear;
648 PerformDeferredActions(assembler, 654 PerformDeferredActions(assembler,
649 max_register, 655 max_register,
650 affected_registers, 656 affected_registers,
651 &registers_to_pop, 657 &registers_to_pop,
652 &registers_to_clear, 658 &registers_to_clear,
653 CI); 659 compiler->zone());
654 if (cp_offset_ != 0) { 660 if (cp_offset_ != 0) {
655 assembler->AdvanceCurrentPosition(cp_offset_); 661 assembler->AdvanceCurrentPosition(cp_offset_);
656 } 662 }
657 663
658 // Create a new trivial state and generate the node with that. 664 // Create a new trivial state and generate the node with that.
659 BlockLabel undo; 665 Label undo;
660 assembler->PushBacktrack(&undo); 666 assembler->PushBacktrack(&undo);
661 Trace new_state; 667 Trace new_state;
662 successor->Emit(compiler, &new_state); 668 successor->Emit(compiler, &new_state);
663 669
664 // On backtrack we need to restore state. 670 // On backtrack we need to restore state.
665 assembler->BindBlock(&undo); 671 assembler->Bind(&undo);
666 RestoreAffectedRegisters(assembler, 672 RestoreAffectedRegisters(assembler,
667 max_register, 673 max_register,
668 registers_to_pop, 674 registers_to_pop,
669 registers_to_clear); 675 registers_to_clear);
670 if (backtrack() == NULL) { 676 if (backtrack() == NULL) {
671 assembler->Backtrack(); 677 assembler->Backtrack();
672 } else { 678 } else {
673 assembler->PopCurrentPosition(); 679 assembler->PopCurrentPosition();
674 assembler->GoTo(backtrack()); 680 assembler->GoTo(backtrack());
675 } 681 }
676 } 682 }
677 683
678 684
679 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 685 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
680 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 686 RegExpMacroAssembler* assembler = compiler->macro_assembler();
681 687
682 // Omit flushing the trace. We discard the entire stack frame anyway. 688 // Omit flushing the trace. We discard the entire stack frame anyway.
683 689
684 if (!label()->IsBound()) { 690 if (!label()->is_bound()) {
685 // We are completely independent of the trace, since we ignore it, 691 // We are completely independent of the trace, since we ignore it,
686 // so this code can be used as the generic version. 692 // so this code can be used as the generic version.
687 assembler->BindBlock(label()); 693 assembler->Bind(label());
688 } 694 }
689 695
690 // Throw away everything on the backtrack stack since the start 696 // Throw away everything on the backtrack stack since the start
691 // of the negative submatch and restore the character position. 697 // of the negative submatch and restore the character position.
692 assembler->ReadCurrentPositionFromRegister(current_position_register_); 698 assembler->ReadCurrentPositionFromRegister(current_position_register_);
693 assembler->ReadStackPointerFromRegister(stack_pointer_register_); 699 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
694 if (clear_capture_count_ > 0) { 700 if (clear_capture_count_ > 0) {
695 // Clear any captures that might have been performed during the success 701 // Clear any captures that might have been performed during the success
696 // of the body of the negative look-ahead. 702 // of the body of the negative look-ahead.
697 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; 703 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
698 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); 704 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
699 } 705 }
700 // Now that we have unwound the stack we find at the top of the stack the 706 // Now that we have unwound the stack we find at the top of the stack the
701 // backtrack that the BeginSubmatch node got. 707 // backtrack that the BeginSubmatch node got.
702 assembler->Backtrack(); 708 assembler->Backtrack();
703 } 709 }
704 710
705 711
706 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { 712 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
707 if (!trace->is_trivial()) { 713 if (!trace->is_trivial()) {
708 trace->Flush(compiler, this); 714 trace->Flush(compiler, this);
709 return; 715 return;
710 } 716 }
711 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 717 RegExpMacroAssembler* assembler = compiler->macro_assembler();
712 if (!label()->IsBound()) { 718 if (!label()->is_bound()) {
713 assembler->BindBlock(label()); 719 assembler->Bind(label());
714 } 720 }
715 switch (action_) { 721 switch (action_) {
716 case ACCEPT: 722 case ACCEPT:
717 assembler->Succeed(); 723 assembler->Succeed();
718 return; 724 return;
719 case BACKTRACK: 725 case BACKTRACK:
720 assembler->GoTo(trace->backtrack()); 726 assembler->GoTo(trace->backtrack());
721 return; 727 return;
722 case NEGATIVE_SUBMATCH_SUCCESS: 728 case NEGATIVE_SUBMATCH_SUCCESS:
723 // This case is handled in a different virtual method. 729 // This case is handled in a different virtual method.
724 UNREACHABLE(); 730 UNREACHABLE();
725 } 731 }
726 UNIMPLEMENTED(); 732 UNIMPLEMENTED();
727 } 733 }
728 734
729 735
730 void GuardedAlternative::AddGuard(Guard* guard, Isolate* isolate) { 736 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
731 if (guards_ == NULL) 737 if (guards_ == NULL)
732 guards_ = new(isolate) ZoneGrowableArray<Guard*>(1); 738 guards_ = new(zone) ZoneList<Guard*>(1, zone);
733 guards_->Add(guard); 739 guards_->Add(guard, zone);
734 } 740 }
735 741
736 742
737 ActionNode* ActionNode::SetRegister(intptr_t reg, 743 ActionNode* ActionNode::SetRegister(int reg,
738 intptr_t val, 744 int val,
739 RegExpNode* on_success) { 745 RegExpNode* on_success) {
740 ActionNode* result = 746 ActionNode* result =
741 new(on_success->isolate()) ActionNode(SET_REGISTER, on_success); 747 new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
742 result->data_.u_store_register.reg = reg; 748 result->data_.u_store_register.reg = reg;
743 result->data_.u_store_register.value = val; 749 result->data_.u_store_register.value = val;
744 return result; 750 return result;
745 } 751 }
746 752
747 753
748 ActionNode* ActionNode::IncrementRegister(intptr_t reg, 754 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
749 RegExpNode* on_success) {
750 ActionNode* result = 755 ActionNode* result =
751 new(on_success->isolate()) ActionNode(INCREMENT_REGISTER, on_success); 756 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
752 result->data_.u_increment_register.reg = reg; 757 result->data_.u_increment_register.reg = reg;
753 return result; 758 return result;
754 } 759 }
755 760
756 761
757 ActionNode* ActionNode::StorePosition(intptr_t reg, 762 ActionNode* ActionNode::StorePosition(int reg,
758 bool is_capture, 763 bool is_capture,
759 RegExpNode* on_success) { 764 RegExpNode* on_success) {
760 ActionNode* result = 765 ActionNode* result =
761 new(on_success->isolate()) ActionNode(STORE_POSITION, on_success); 766 new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
762 result->data_.u_position_register.reg = reg; 767 result->data_.u_position_register.reg = reg;
763 result->data_.u_position_register.is_capture = is_capture; 768 result->data_.u_position_register.is_capture = is_capture;
764 return result; 769 return result;
765 } 770 }
766 771
767 772
768 ActionNode* ActionNode::ClearCaptures(Interval range, 773 ActionNode* ActionNode::ClearCaptures(Interval range,
769 RegExpNode* on_success) { 774 RegExpNode* on_success) {
770 ActionNode* result = 775 ActionNode* result =
771 new(on_success->isolate()) ActionNode(CLEAR_CAPTURES, on_success); 776 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
772 result->data_.u_clear_captures.range_from = range.from(); 777 result->data_.u_clear_captures.range_from = range.from();
773 result->data_.u_clear_captures.range_to = range.to(); 778 result->data_.u_clear_captures.range_to = range.to();
774 return result; 779 return result;
775 } 780 }
776 781
777 782
778 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, 783 ActionNode* ActionNode::BeginSubmatch(int stack_reg,
779 intptr_t position_reg, 784 int position_reg,
780 RegExpNode* on_success) { 785 RegExpNode* on_success) {
781 ActionNode* result = 786 ActionNode* result =
782 new(on_success->isolate()) ActionNode(BEGIN_SUBMATCH, on_success); 787 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
783 result->data_.u_submatch.stack_pointer_register = stack_reg; 788 result->data_.u_submatch.stack_pointer_register = stack_reg;
784 result->data_.u_submatch.current_position_register = position_reg; 789 result->data_.u_submatch.current_position_register = position_reg;
785 return result; 790 return result;
786 } 791 }
787 792
788 793
789 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, 794 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
790 intptr_t position_reg, 795 int position_reg,
791 intptr_t clear_register_count, 796 int clear_register_count,
792 intptr_t clear_register_from, 797 int clear_register_from,
793 RegExpNode* on_success) { 798 RegExpNode* on_success) {
794 ActionNode* result = 799 ActionNode* result =
795 new(on_success->isolate()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, 800 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
796 on_success);
797 result->data_.u_submatch.stack_pointer_register = stack_reg; 801 result->data_.u_submatch.stack_pointer_register = stack_reg;
798 result->data_.u_submatch.current_position_register = position_reg; 802 result->data_.u_submatch.current_position_register = position_reg;
799 result->data_.u_submatch.clear_register_count = clear_register_count; 803 result->data_.u_submatch.clear_register_count = clear_register_count;
800 result->data_.u_submatch.clear_register_from = clear_register_from; 804 result->data_.u_submatch.clear_register_from = clear_register_from;
801 return result; 805 return result;
802 } 806 }
803 807
804 808
805 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, 809 ActionNode* ActionNode::EmptyMatchCheck(int start_register,
806 intptr_t repetition_register, 810 int repetition_register,
807 intptr_t repetition_limit, 811 int repetition_limit,
808 RegExpNode* on_success) { 812 RegExpNode* on_success) {
809 ActionNode* result = 813 ActionNode* result =
810 new(on_success->isolate()) ActionNode(EMPTY_MATCH_CHECK, on_success); 814 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
811 result->data_.u_empty_match_check.start_register = start_register; 815 result->data_.u_empty_match_check.start_register = start_register;
812 result->data_.u_empty_match_check.repetition_register = repetition_register; 816 result->data_.u_empty_match_check.repetition_register = repetition_register;
813 result->data_.u_empty_match_check.repetition_limit = repetition_limit; 817 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
814 return result; 818 return result;
815 } 819 }
816 820
817 821
818 #define DEFINE_ACCEPT(Type) \ 822 #define DEFINE_ACCEPT(Type) \
819 void Type##Node::Accept(NodeVisitor* visitor) { \ 823 void Type##Node::Accept(NodeVisitor* visitor) { \
820 visitor->Visit##Type(this); \ 824 visitor->Visit##Type(this); \
821 } 825 }
822 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 826 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
823 #undef DEFINE_ACCEPT 827 #undef DEFINE_ACCEPT
824 828
825 829
826 void LoopChoiceNode::Accept(NodeVisitor* visitor) { 830 void LoopChoiceNode::Accept(NodeVisitor* visitor) {
827 visitor->VisitLoopChoice(this); 831 visitor->VisitLoopChoice(this);
828 } 832 }
829 833
830 834
831 // ------------------------------------------------------------------- 835 // -------------------------------------------------------------------
832 // Emit code. 836 // Emit code.
833 837
834 838
835 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 839 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
836 Guard* guard, 840 Guard* guard,
837 Trace* trace) { 841 Trace* trace) {
838 switch (guard->op()) { 842 switch (guard->op()) {
839 case Guard::LT: 843 case Guard::LT:
840 ASSERT(!trace->mentions_reg(guard->reg())); 844 DCHECK(!trace->mentions_reg(guard->reg()));
841 macro_assembler->IfRegisterGE(guard->reg(), 845 macro_assembler->IfRegisterGE(guard->reg(),
842 guard->value(), 846 guard->value(),
843 trace->backtrack()); 847 trace->backtrack());
844 break; 848 break;
845 case Guard::GEQ: 849 case Guard::GEQ:
846 ASSERT(!trace->mentions_reg(guard->reg())); 850 DCHECK(!trace->mentions_reg(guard->reg()));
847 macro_assembler->IfRegisterLT(guard->reg(), 851 macro_assembler->IfRegisterLT(guard->reg(),
848 guard->value(), 852 guard->value(),
849 trace->backtrack()); 853 trace->backtrack());
850 break; 854 break;
851 } 855 }
852 } 856 }
853 857
854 858
855 // Returns the number of characters in the equivalence class, omitting those 859 // Returns the number of characters in the equivalence class, omitting those
856 // that cannot occur in the source string because it is ASCII. 860 // that cannot occur in the source string because it is ASCII.
857 static intptr_t GetCaseIndependentLetters(uint16_t character, 861 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
858 bool one_byte_subject, 862 bool one_byte_subject,
859 int32_t* letters) { 863 unibrow::uchar* letters) {
860 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; 864 int length =
861 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); 865 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
862 // Unibrow returns 0 or 1 for characters where case independence is 866 // Unibrow returns 0 or 1 for characters where case independence is
863 // trivial. 867 // trivial.
864 if (length == 0) { 868 if (length == 0) {
865 letters[0] = character; 869 letters[0] = character;
866 length = 1; 870 length = 1;
867 } 871 }
868 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { 872 if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {
869 return length; 873 return length;
870 } 874 }
871 875
872 // The standard requires that non-ASCII characters cannot have ASCII 876 // The standard requires that non-ASCII characters cannot have ASCII
873 // character codes in their equivalence class. 877 // character codes in their equivalence class.
874 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, 878 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
875 // is it? For example, \u00C5 is equivalent to \u212B. 879 // is it? For example, \u00C5 is equivalent to \u212B.
876 return 0; 880 return 0;
877 } 881 }
878 882
879 883
880 static inline bool EmitSimpleCharacter(Isolate* isolate, 884 static inline bool EmitSimpleCharacter(Isolate* isolate,
881 RegExpCompiler* compiler, 885 RegExpCompiler* compiler,
882 uint16_t c, 886 uc16 c,
883 BlockLabel* on_failure, 887 Label* on_failure,
884 intptr_t cp_offset, 888 int cp_offset,
885 bool check, 889 bool check,
886 bool preloaded) { 890 bool preloaded) {
887 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 891 RegExpMacroAssembler* assembler = compiler->macro_assembler();
888 bool bound_checked = false; 892 bool bound_checked = false;
889 if (!preloaded) { 893 if (!preloaded) {
890 assembler->LoadCurrentCharacter( 894 assembler->LoadCurrentCharacter(
891 cp_offset, 895 cp_offset,
892 on_failure, 896 on_failure,
893 check); 897 check);
894 bound_checked = true; 898 bound_checked = true;
895 } 899 }
896 assembler->CheckNotCharacter(c, on_failure); 900 assembler->CheckNotCharacter(c, on_failure);
897 return bound_checked; 901 return bound_checked;
898 } 902 }
899 903
900 904
901 // Only emits non-letters (things that don't have case). Only used for case 905 // Only emits non-letters (things that don't have case). Only used for case
902 // independent matches. 906 // independent matches.
903 static inline bool EmitAtomNonLetter(Isolate* isolate, 907 static inline bool EmitAtomNonLetter(Isolate* isolate,
904 RegExpCompiler* compiler, 908 RegExpCompiler* compiler,
905 uint16_t c, 909 uc16 c,
906 BlockLabel* on_failure, 910 Label* on_failure,
907 intptr_t cp_offset, 911 int cp_offset,
908 bool check, 912 bool check,
909 bool preloaded) { 913 bool preloaded) {
910 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 914 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
911 bool one_byte = compiler->one_byte(); 915 bool one_byte = compiler->one_byte();
912 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 916 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
913 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); 917 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
914 if (length < 1) { 918 if (length < 1) {
915 // This can't match. Must be an one-byte subject and a non-one-byte 919 // This can't match. Must be an one-byte subject and a non-one-byte
916 // character. We do not need to do anything since the one-byte pass 920 // character. We do not need to do anything since the one-byte pass
917 // already handled this. 921 // already handled this.
918 return false; // Bounds not checked. 922 return false; // Bounds not checked.
919 } 923 }
920 bool checked = false; 924 bool checked = false;
921 // We handle the length > 1 case in a later pass. 925 // We handle the length > 1 case in a later pass.
922 if (length == 1) { 926 if (length == 1) {
923 if (one_byte && c > Symbols::kMaxOneCharCodeSymbol) { 927 if (one_byte && c > String::kMaxOneByteCharCodeU) {
924 // Can't match - see above. 928 // Can't match - see above.
925 return false; // Bounds not checked. 929 return false; // Bounds not checked.
926 } 930 }
927 if (!preloaded) { 931 if (!preloaded) {
928 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 932 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
929 checked = check; 933 checked = check;
930 } 934 }
931 macro_assembler->CheckNotCharacter(c, on_failure); 935 macro_assembler->CheckNotCharacter(c, on_failure);
932 } 936 }
933 return checked; 937 return checked;
934 } 938 }
935 939
936 940
937 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 941 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
938 bool one_byte, 942 bool one_byte, uc16 c1, uc16 c2,
939 uint16_t c1, 943 Label* on_failure) {
940 uint16_t c2, 944 uc16 char_mask;
941 BlockLabel* on_failure) {
942 uint16_t char_mask;
943 if (one_byte) { 945 if (one_byte) {
944 char_mask = Symbols::kMaxOneCharCodeSymbol; 946 char_mask = String::kMaxOneByteCharCode;
945 } else { 947 } else {
946 char_mask = Utf16::kMaxCodeUnit; 948 char_mask = String::kMaxUtf16CodeUnit;
947 } 949 }
948 uint16_t exor = c1 ^ c2; 950 uc16 exor = c1 ^ c2;
949 // Check whether exor has only one bit set. 951 // Check whether exor has only one bit set.
950 if (((exor - 1) & exor) == 0) { 952 if (((exor - 1) & exor) == 0) {
951 // If c1 and c2 differ only by one bit. 953 // If c1 and c2 differ only by one bit.
952 // Ecma262UnCanonicalize always gives the highest number last. 954 // Ecma262UnCanonicalize always gives the highest number last.
953 ASSERT(c2 > c1); 955 DCHECK(c2 > c1);
954 uint16_t mask = char_mask ^ exor; 956 uc16 mask = char_mask ^ exor;
955 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); 957 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
956 return true; 958 return true;
957 } 959 }
958 ASSERT(c2 > c1); 960 DCHECK(c2 > c1);
959 uint16_t diff = c2 - c1; 961 uc16 diff = c2 - c1;
960 if (((diff - 1) & diff) == 0 && c1 >= diff) { 962 if (((diff - 1) & diff) == 0 && c1 >= diff) {
961 // If the characters differ by 2^n but don't differ by one bit then 963 // If the characters differ by 2^n but don't differ by one bit then
962 // subtract the difference from the found character, then do the or 964 // subtract the difference from the found character, then do the or
963 // trick. We avoid the theoretical case where negative numbers are 965 // trick. We avoid the theoretical case where negative numbers are
964 // involved in order to simplify code generation. 966 // involved in order to simplify code generation.
965 uint16_t mask = char_mask ^ diff; 967 uc16 mask = char_mask ^ diff;
966 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 968 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
967 diff, 969 diff,
968 mask, 970 mask,
969 on_failure); 971 on_failure);
970 return true; 972 return true;
971 } 973 }
972 return false; 974 return false;
973 } 975 }
974 976
975 977
976 typedef bool EmitCharacterFunction(Isolate* isolate, 978 typedef bool EmitCharacterFunction(Isolate* isolate,
977 RegExpCompiler* compiler, 979 RegExpCompiler* compiler,
978 uint16_t c, 980 uc16 c,
979 BlockLabel* on_failure, 981 Label* on_failure,
980 intptr_t cp_offset, 982 int cp_offset,
981 bool check, 983 bool check,
982 bool preloaded); 984 bool preloaded);
983 985
984 // Only emits letters (things that have case). Only used for case independent 986 // Only emits letters (things that have case). Only used for case independent
985 // matches. 987 // matches.
986 static inline bool EmitAtomLetter(Isolate* isolate, 988 static inline bool EmitAtomLetter(Isolate* isolate,
987 RegExpCompiler* compiler, 989 RegExpCompiler* compiler,
988 uint16_t c, 990 uc16 c,
989 BlockLabel* on_failure, 991 Label* on_failure,
990 intptr_t cp_offset, 992 int cp_offset,
991 bool check, 993 bool check,
992 bool preloaded) { 994 bool preloaded) {
993 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 995 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
994 bool one_byte = compiler->one_byte(); 996 bool one_byte = compiler->one_byte();
995 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 997 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
996 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); 998 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
997 if (length <= 1) return false; 999 if (length <= 1) return false;
998 // We may not need to check against the end of the input string 1000 // We may not need to check against the end of the input string
999 // if this character lies before a character that matched. 1001 // if this character lies before a character that matched.
1000 if (!preloaded) { 1002 if (!preloaded) {
1001 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1003 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1002 } 1004 }
1003 BlockLabel ok; 1005 Label ok;
1004 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1006 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1005 switch (length) { 1007 switch (length) {
1006 case 2: { 1008 case 2: {
1007 if (ShortCutEmitCharacterPair(macro_assembler, 1009 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
1008 one_byte, 1010 chars[1], on_failure)) {
1009 chars[0],
1010 chars[1],
1011 on_failure)) {
1012 } else { 1011 } else {
1013 macro_assembler->CheckCharacter(chars[0], &ok); 1012 macro_assembler->CheckCharacter(chars[0], &ok);
1014 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1013 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1015 macro_assembler->BindBlock(&ok); 1014 macro_assembler->Bind(&ok);
1016 } 1015 }
1017 break; 1016 break;
1018 } 1017 }
1019 case 4: 1018 case 4:
1020 macro_assembler->CheckCharacter(chars[3], &ok); 1019 macro_assembler->CheckCharacter(chars[3], &ok);
1021 // Fall through! 1020 // Fall through!
1022 case 3: 1021 case 3:
1023 macro_assembler->CheckCharacter(chars[0], &ok); 1022 macro_assembler->CheckCharacter(chars[0], &ok);
1024 macro_assembler->CheckCharacter(chars[1], &ok); 1023 macro_assembler->CheckCharacter(chars[1], &ok);
1025 macro_assembler->CheckNotCharacter(chars[2], on_failure); 1024 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1026 macro_assembler->BindBlock(&ok); 1025 macro_assembler->Bind(&ok);
1027 break; 1026 break;
1028 default: 1027 default:
1029 UNREACHABLE(); 1028 UNREACHABLE();
1030 break; 1029 break;
1031 } 1030 }
1032 return true; 1031 return true;
1033 } 1032 }
1034 1033
1035 1034
1036 static void EmitBoundaryTest(RegExpMacroAssembler* masm, 1035 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1037 intptr_t border, 1036 int border,
1038 BlockLabel* fall_through, 1037 Label* fall_through,
1039 BlockLabel* above_or_equal, 1038 Label* above_or_equal,
1040 BlockLabel* below) { 1039 Label* below) {
1041 if (below != fall_through) { 1040 if (below != fall_through) {
1042 masm->CheckCharacterLT(border, below); 1041 masm->CheckCharacterLT(border, below);
1043 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); 1042 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1044 } else { 1043 } else {
1045 masm->CheckCharacterGT(border - 1, above_or_equal); 1044 masm->CheckCharacterGT(border - 1, above_or_equal);
1046 } 1045 }
1047 } 1046 }
1048 1047
1049 1048
1050 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, 1049 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1051 intptr_t first, 1050 int first,
1052 intptr_t last, 1051 int last,
1053 BlockLabel* fall_through, 1052 Label* fall_through,
1054 BlockLabel* in_range, 1053 Label* in_range,
1055 BlockLabel* out_of_range) { 1054 Label* out_of_range) {
1056 if (in_range == fall_through) { 1055 if (in_range == fall_through) {
1057 if (first == last) { 1056 if (first == last) {
1058 masm->CheckNotCharacter(first, out_of_range); 1057 masm->CheckNotCharacter(first, out_of_range);
1059 } else { 1058 } else {
1060 masm->CheckCharacterNotInRange(first, last, out_of_range); 1059 masm->CheckCharacterNotInRange(first, last, out_of_range);
1061 } 1060 }
1062 } else { 1061 } else {
1063 if (first == last) { 1062 if (first == last) {
1064 masm->CheckCharacter(first, in_range); 1063 masm->CheckCharacter(first, in_range);
1065 } else { 1064 } else {
1066 masm->CheckCharacterInRange(first, last, in_range); 1065 masm->CheckCharacterInRange(first, last, in_range);
1067 } 1066 }
1068 if (out_of_range != fall_through) masm->GoTo(out_of_range); 1067 if (out_of_range != fall_through) masm->GoTo(out_of_range);
1069 } 1068 }
1070 } 1069 }
1071 1070
1072 1071
1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. 1072 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. 1073 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1075 static void EmitUseLookupTable( 1074 static void EmitUseLookupTable(
1076 RegExpMacroAssembler* masm, 1075 RegExpMacroAssembler* masm,
1077 ZoneGrowableArray<int>* ranges, 1076 ZoneList<int>* ranges,
1078 intptr_t start_index, 1077 int start_index,
1079 intptr_t end_index, 1078 int end_index,
1080 intptr_t min_char, 1079 int min_char,
1081 BlockLabel* fall_through, 1080 Label* fall_through,
1082 BlockLabel* even_label, 1081 Label* even_label,
1083 BlockLabel* odd_label) { 1082 Label* odd_label) {
1084 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; 1083 static const int kSize = RegExpMacroAssembler::kTableSize;
1085 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; 1084 static const int kMask = RegExpMacroAssembler::kTableMask;
1086 1085
1087 intptr_t base = (min_char & ~kMask); 1086 int base = (min_char & ~kMask);
1087 USE(base);
1088 1088
1089 // Assert that everything is on one kTableSize page. 1089 // Assert that everything is on one kTableSize page.
1090 for (intptr_t i = start_index; i <= end_index; i++) { 1090 for (int i = start_index; i <= end_index; i++) {
1091 ASSERT((ranges->At(i) & ~kMask) == base); 1091 DCHECK_EQ(ranges->at(i) & ~kMask, base);
1092 } 1092 }
1093 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); 1093 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1094 1094
1095 char templ[kSize]; 1095 char templ[kSize];
1096 BlockLabel* on_bit_set; 1096 Label* on_bit_set;
1097 BlockLabel* on_bit_clear; 1097 Label* on_bit_clear;
1098 intptr_t bit; 1098 int bit;
1099 if (even_label == fall_through) { 1099 if (even_label == fall_through) {
1100 on_bit_set = odd_label; 1100 on_bit_set = odd_label;
1101 on_bit_clear = even_label; 1101 on_bit_clear = even_label;
1102 bit = 1; 1102 bit = 1;
1103 } else { 1103 } else {
1104 on_bit_set = even_label; 1104 on_bit_set = even_label;
1105 on_bit_clear = odd_label; 1105 on_bit_clear = odd_label;
1106 bit = 0; 1106 bit = 0;
1107 } 1107 }
1108 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize; 1108 for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
1109 i++) {
1110 templ[i] = bit; 1109 templ[i] = bit;
1111 } 1110 }
1112 intptr_t j = 0; 1111 int j = 0;
1113 bit ^= 1; 1112 bit ^= 1;
1114 for (intptr_t i = start_index; i < end_index; i++) { 1113 for (int i = start_index; i < end_index; i++) {
1115 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { 1114 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
1116 templ[j] = bit; 1115 templ[j] = bit;
1117 } 1116 }
1118 bit ^= 1; 1117 bit ^= 1;
1119 } 1118 }
1120 for (intptr_t i = j; i < kSize; i++) { 1119 for (int i = j; i < kSize; i++) {
1121 templ[i] = bit; 1120 templ[i] = bit;
1122 } 1121 }
1122 Factory* factory = masm->zone()->isolate()->factory();
1123 // TODO(erikcorry): Cache these. 1123 // TODO(erikcorry): Cache these.
1124 const TypedData& ba = TypedData::ZoneHandle( 1124 Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED);
1125 masm->isolate(), 1125 for (int i = 0; i < kSize; i++) {
1126 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); 1126 ba->set(i, templ[i]);
1127 for (intptr_t i = 0; i < kSize; i++) {
1128 ba.SetUint8(i, templ[i]);
1129 } 1127 }
1130 masm->CheckBitInTable(ba, on_bit_set); 1128 masm->CheckBitInTable(ba, on_bit_set);
1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); 1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1132 } 1130 }
1133 1131
1134 1132
1135 static void CutOutRange(RegExpMacroAssembler* masm, 1133 static void CutOutRange(RegExpMacroAssembler* masm,
1136 ZoneGrowableArray<int>* ranges, 1134 ZoneList<int>* ranges,
1137 intptr_t start_index, 1135 int start_index,
1138 intptr_t end_index, 1136 int end_index,
1139 intptr_t cut_index, 1137 int cut_index,
1140 BlockLabel* even_label, 1138 Label* even_label,
1141 BlockLabel* odd_label) { 1139 Label* odd_label) {
1142 bool odd = (((cut_index - start_index) & 1) == 1); 1140 bool odd = (((cut_index - start_index) & 1) == 1);
1143 BlockLabel* in_range_label = odd ? odd_label : even_label; 1141 Label* in_range_label = odd ? odd_label : even_label;
1144 BlockLabel dummy; 1142 Label dummy;
1145 EmitDoubleBoundaryTest(masm, 1143 EmitDoubleBoundaryTest(masm,
1146 ranges->At(cut_index), 1144 ranges->at(cut_index),
1147 ranges->At(cut_index + 1) - 1, 1145 ranges->at(cut_index + 1) - 1,
1148 &dummy, 1146 &dummy,
1149 in_range_label, 1147 in_range_label,
1150 &dummy); 1148 &dummy);
1151 ASSERT(!dummy.IsLinked()); 1149 DCHECK(!dummy.is_linked());
1152 // Cut out the single range by rewriting the array. This creates a new 1150 // Cut out the single range by rewriting the array. This creates a new
1153 // range that is a merger of the two ranges on either side of the one we 1151 // range that is a merger of the two ranges on either side of the one we
1154 // are cutting out. The oddity of the labels is preserved. 1152 // are cutting out. The oddity of the labels is preserved.
1155 for (intptr_t j = cut_index; j > start_index; j--) { 1153 for (int j = cut_index; j > start_index; j--) {
1156 (*ranges)[j] = ranges->At(j - 1); 1154 ranges->at(j) = ranges->at(j - 1);
1157 } 1155 }
1158 for (intptr_t j = cut_index + 1; j < end_index; j++) { 1156 for (int j = cut_index + 1; j < end_index; j++) {
1159 (*ranges)[j] = ranges->At(j + 1); 1157 ranges->at(j) = ranges->at(j + 1);
1160 } 1158 }
1161 } 1159 }
1162 1160
1163 1161
1164 // Unicode case. Split the search space into kSize spaces that are handled 1162 // Unicode case. Split the search space into kSize spaces that are handled
1165 // with recursion. 1163 // with recursion.
1166 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, 1164 static void SplitSearchSpace(ZoneList<int>* ranges,
1167 intptr_t start_index, 1165 int start_index,
1168 intptr_t end_index, 1166 int end_index,
1169 intptr_t* new_start_index, 1167 int* new_start_index,
1170 intptr_t* new_end_index, 1168 int* new_end_index,
1171 intptr_t* border) { 1169 int* border) {
1172 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; 1170 static const int kSize = RegExpMacroAssembler::kTableSize;
1173 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; 1171 static const int kMask = RegExpMacroAssembler::kTableMask;
1174 1172
1175 intptr_t first = ranges->At(start_index); 1173 int first = ranges->at(start_index);
1176 intptr_t last = ranges->At(end_index) - 1; 1174 int last = ranges->at(end_index) - 1;
1177 1175
1178 *new_start_index = start_index; 1176 *new_start_index = start_index;
1179 *border = (ranges->At(start_index) & ~kMask) + kSize; 1177 *border = (ranges->at(start_index) & ~kMask) + kSize;
1180 while (*new_start_index < end_index) { 1178 while (*new_start_index < end_index) {
1181 if (ranges->At(*new_start_index) > *border) break; 1179 if (ranges->at(*new_start_index) > *border) break;
1182 (*new_start_index)++; 1180 (*new_start_index)++;
1183 } 1181 }
1184 // new_start_index is the index of the first edge that is beyond the 1182 // new_start_index is the index of the first edge that is beyond the
1185 // current kSize space. 1183 // current kSize space.
1186 1184
1187 // For very large search spaces we do a binary chop search of the non-Latin1 1185 // For very large search spaces we do a binary chop search of the non-Latin1
1188 // space instead of just going to the end of the current kSize space. The 1186 // space instead of just going to the end of the current kSize space. The
1189 // heuristics are complicated a little by the fact that any 128-character 1187 // heuristics are complicated a little by the fact that any 128-character
1190 // encoding space can be quickly tested with a table lookup, so we don't 1188 // encoding space can be quickly tested with a table lookup, so we don't
1191 // wish to do binary chop search at a smaller granularity than that. A 1189 // wish to do binary chop search at a smaller granularity than that. A
1192 // 128-character space can take up a lot of space in the ranges array if, 1190 // 128-character space can take up a lot of space in the ranges array if,
1193 // for example, we only want to match every second character (eg. the lower 1191 // for example, we only want to match every second character (eg. the lower
1194 // case characters on some Unicode pages). 1192 // case characters on some Unicode pages).
1195 intptr_t binary_chop_index = (end_index + start_index) / 2; 1193 int binary_chop_index = (end_index + start_index) / 2;
1196 // The first test ensures that we get to the code that handles the Latin1 1194 // The first test ensures that we get to the code that handles the Latin1
1197 // range with a single not-taken branch, speeding up this important 1195 // range with a single not-taken branch, speeding up this important
1198 // character range (even non-Latin1 charset-based text has spaces and 1196 // character range (even non-Latin1 charset-based text has spaces and
1199 // punctuation). 1197 // punctuation).
1200 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. 1198 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1201 end_index - start_index > (*new_start_index - start_index) * 2 && 1199 end_index - start_index > (*new_start_index - start_index) * 2 &&
1202 last - first > kSize * 2 && 1200 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1203 binary_chop_index > *new_start_index && 1201 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1204 ranges->At(binary_chop_index) >= first + 2 * kSize) { 1202 int scan_forward_for_section_border = binary_chop_index;;
1205 intptr_t scan_forward_for_section_border = binary_chop_index;; 1203 int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1206 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1;
1207 1204
1208 while (scan_forward_for_section_border < end_index) { 1205 while (scan_forward_for_section_border < end_index) {
1209 if (ranges->At(scan_forward_for_section_border) > new_border) { 1206 if (ranges->at(scan_forward_for_section_border) > new_border) {
1210 *new_start_index = scan_forward_for_section_border; 1207 *new_start_index = scan_forward_for_section_border;
1211 *border = new_border; 1208 *border = new_border;
1212 break; 1209 break;
1213 } 1210 }
1214 scan_forward_for_section_border++; 1211 scan_forward_for_section_border++;
1215 } 1212 }
1216 } 1213 }
1217 1214
1218 ASSERT(*new_start_index > start_index); 1215 DCHECK(*new_start_index > start_index);
1219 *new_end_index = *new_start_index - 1; 1216 *new_end_index = *new_start_index - 1;
1220 if (ranges->At(*new_end_index) == *border) { 1217 if (ranges->at(*new_end_index) == *border) {
1221 (*new_end_index)--; 1218 (*new_end_index)--;
1222 } 1219 }
1223 if (*border >= ranges->At(end_index)) { 1220 if (*border >= ranges->at(end_index)) {
1224 *border = ranges->At(end_index); 1221 *border = ranges->at(end_index);
1225 *new_start_index = end_index; // Won't be used. 1222 *new_start_index = end_index; // Won't be used.
1226 *new_end_index = end_index - 1; 1223 *new_end_index = end_index - 1;
1227 } 1224 }
1228 } 1225 }
1229 1226
1230 1227
1231 // Gets a series of segment boundaries representing a character class. If the 1228 // Gets a series of segment boundaries representing a character class. If the
1232 // character is in the range between an even and an odd boundary (counting from 1229 // character is in the range between an even and an odd boundary (counting from
1233 // start_index) then go to even_label, otherwise go to odd_label. We already 1230 // start_index) then go to even_label, otherwise go to odd_label. We already
1234 // know that the character is in the range of min_char to max_char inclusive. 1231 // know that the character is in the range of min_char to max_char inclusive.
1235 // Either label can be NULL indicating backtracking. Either label can also be 1232 // Either label can be NULL indicating backtracking. Either label can also be
1236 // equal to the fall_through label. 1233 // equal to the fall_through label.
1237 static void GenerateBranches(RegExpMacroAssembler* masm, 1234 static void GenerateBranches(RegExpMacroAssembler* masm,
1238 ZoneGrowableArray<int>* ranges, 1235 ZoneList<int>* ranges,
1239 intptr_t start_index, 1236 int start_index,
1240 intptr_t end_index, 1237 int end_index,
1241 uint16_t min_char, 1238 uc16 min_char,
1242 uint16_t max_char, 1239 uc16 max_char,
1243 BlockLabel* fall_through, 1240 Label* fall_through,
1244 BlockLabel* even_label, 1241 Label* even_label,
1245 BlockLabel* odd_label) { 1242 Label* odd_label) {
1246 intptr_t first = ranges->At(start_index); 1243 int first = ranges->at(start_index);
1247 intptr_t last = ranges->At(end_index) - 1; 1244 int last = ranges->at(end_index) - 1;
1248 1245
1249 ASSERT(min_char < first); 1246 DCHECK_LT(min_char, first);
1250 1247
1251 // Just need to test if the character is before or on-or-after 1248 // Just need to test if the character is before or on-or-after
1252 // a particular character. 1249 // a particular character.
1253 if (start_index == end_index) { 1250 if (start_index == end_index) {
1254 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); 1251 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1255 return; 1252 return;
1256 } 1253 }
1257 1254
1258 // Another almost trivial case: There is one interval in the middle that is 1255 // Another almost trivial case: There is one interval in the middle that is
1259 // different from the end intervals. 1256 // different from the end intervals.
1260 if (start_index + 1 == end_index) { 1257 if (start_index + 1 == end_index) {
1261 EmitDoubleBoundaryTest( 1258 EmitDoubleBoundaryTest(
1262 masm, first, last, fall_through, even_label, odd_label); 1259 masm, first, last, fall_through, even_label, odd_label);
1263 return; 1260 return;
1264 } 1261 }
1265 1262
1266 // It's not worth using table lookup if there are very few intervals in the 1263 // It's not worth using table lookup if there are very few intervals in the
1267 // character class. 1264 // character class.
1268 if (end_index - start_index <= 6) { 1265 if (end_index - start_index <= 6) {
1269 // It is faster to test for individual characters, so we look for those 1266 // It is faster to test for individual characters, so we look for those
1270 // first, then try arbitrary ranges in the second round. 1267 // first, then try arbitrary ranges in the second round.
1271 static intptr_t kNoCutIndex = -1; 1268 static int kNoCutIndex = -1;
1272 intptr_t cut = kNoCutIndex; 1269 int cut = kNoCutIndex;
1273 for (intptr_t i = start_index; i < end_index; i++) { 1270 for (int i = start_index; i < end_index; i++) {
1274 if (ranges->At(i) == ranges->At(i + 1) - 1) { 1271 if (ranges->at(i) == ranges->at(i + 1) - 1) {
1275 cut = i; 1272 cut = i;
1276 break; 1273 break;
1277 } 1274 }
1278 } 1275 }
1279 if (cut == kNoCutIndex) cut = start_index; 1276 if (cut == kNoCutIndex) cut = start_index;
1280 CutOutRange( 1277 CutOutRange(
1281 masm, ranges, start_index, end_index, cut, even_label, odd_label); 1278 masm, ranges, start_index, end_index, cut, even_label, odd_label);
1282 ASSERT(end_index - start_index >= 2); 1279 DCHECK_GE(end_index - start_index, 2);
1283 GenerateBranches(masm, 1280 GenerateBranches(masm,
1284 ranges, 1281 ranges,
1285 start_index + 1, 1282 start_index + 1,
1286 end_index - 1, 1283 end_index - 1,
1287 min_char, 1284 min_char,
1288 max_char, 1285 max_char,
1289 fall_through, 1286 fall_through,
1290 even_label, 1287 even_label,
1291 odd_label); 1288 odd_label);
1292 return; 1289 return;
1293 } 1290 }
1294 1291
1295 // If there are a lot of intervals in the regexp, then we will use tables to 1292 // If there are a lot of intervals in the regexp, then we will use tables to
1296 // determine whether the character is inside or outside the character class. 1293 // determine whether the character is inside or outside the character class.
1297 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; 1294 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1298 1295
1299 if ((max_char >> kBits) == (min_char >> kBits)) { 1296 if ((max_char >> kBits) == (min_char >> kBits)) {
1300 EmitUseLookupTable(masm, 1297 EmitUseLookupTable(masm,
1301 ranges, 1298 ranges,
1302 start_index, 1299 start_index,
1303 end_index, 1300 end_index,
1304 min_char, 1301 min_char,
1305 fall_through, 1302 fall_through,
1306 even_label, 1303 even_label,
1307 odd_label); 1304 odd_label);
1308 return; 1305 return;
1309 } 1306 }
1310 1307
1311 if ((min_char >> kBits) != (first >> kBits)) { 1308 if ((min_char >> kBits) != (first >> kBits)) {
1312 masm->CheckCharacterLT(first, odd_label); 1309 masm->CheckCharacterLT(first, odd_label);
1313 GenerateBranches(masm, 1310 GenerateBranches(masm,
1314 ranges, 1311 ranges,
1315 start_index + 1, 1312 start_index + 1,
1316 end_index, 1313 end_index,
1317 first, 1314 first,
1318 max_char, 1315 max_char,
1319 fall_through, 1316 fall_through,
1320 odd_label, 1317 odd_label,
1321 even_label); 1318 even_label);
1322 return; 1319 return;
1323 } 1320 }
1324 1321
1325 intptr_t new_start_index = 0; 1322 int new_start_index = 0;
1326 intptr_t new_end_index = 0; 1323 int new_end_index = 0;
1327 intptr_t border = 0; 1324 int border = 0;
1328 1325
1329 SplitSearchSpace(ranges, 1326 SplitSearchSpace(ranges,
1330 start_index, 1327 start_index,
1331 end_index, 1328 end_index,
1332 &new_start_index, 1329 &new_start_index,
1333 &new_end_index, 1330 &new_end_index,
1334 &border); 1331 &border);
1335 1332
1336 BlockLabel handle_rest; 1333 Label handle_rest;
1337 BlockLabel* above = &handle_rest; 1334 Label* above = &handle_rest;
1338 if (border == last + 1) { 1335 if (border == last + 1) {
1339 // We didn't find any section that started after the limit, so everything 1336 // We didn't find any section that started after the limit, so everything
1340 // above the border is one of the terminal labels. 1337 // above the border is one of the terminal labels.
1341 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; 1338 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1342 ASSERT(new_end_index == end_index - 1); 1339 DCHECK(new_end_index == end_index - 1);
1343 } 1340 }
1344 1341
1345 ASSERT(start_index <= new_end_index); 1342 DCHECK_LE(start_index, new_end_index);
1346 ASSERT(new_start_index <= end_index); 1343 DCHECK_LE(new_start_index, end_index);
1347 ASSERT(start_index < new_start_index); 1344 DCHECK_LT(start_index, new_start_index);
1348 ASSERT(new_end_index < end_index); 1345 DCHECK_LT(new_end_index, end_index);
1349 ASSERT(new_end_index + 1 == new_start_index || 1346 DCHECK(new_end_index + 1 == new_start_index ||
1350 (new_end_index + 2 == new_start_index && 1347 (new_end_index + 2 == new_start_index &&
1351 border == ranges->At(new_end_index + 1))); 1348 border == ranges->at(new_end_index + 1)));
1352 ASSERT(min_char < border - 1); 1349 DCHECK_LT(min_char, border - 1);
1353 ASSERT(border < max_char); 1350 DCHECK_LT(border, max_char);
1354 ASSERT(ranges->At(new_end_index) < border); 1351 DCHECK_LT(ranges->at(new_end_index), border);
1355 ASSERT(border < ranges->At(new_start_index) || 1352 DCHECK(border < ranges->at(new_start_index) ||
1356 (border == ranges->At(new_start_index) && 1353 (border == ranges->at(new_start_index) &&
1357 new_start_index == end_index && 1354 new_start_index == end_index &&
1358 new_end_index == end_index - 1 && 1355 new_end_index == end_index - 1 &&
1359 border == last + 1)); 1356 border == last + 1));
1360 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); 1357 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
1361 1358
1362 masm->CheckCharacterGT(border - 1, above); 1359 masm->CheckCharacterGT(border - 1, above);
1363 BlockLabel dummy; 1360 Label dummy;
1364 GenerateBranches(masm, 1361 GenerateBranches(masm,
1365 ranges, 1362 ranges,
1366 start_index, 1363 start_index,
1367 new_end_index, 1364 new_end_index,
1368 min_char, 1365 min_char,
1369 border - 1, 1366 border - 1,
1370 &dummy, 1367 &dummy,
1371 even_label, 1368 even_label,
1372 odd_label); 1369 odd_label);
1373 1370 if (handle_rest.is_linked()) {
1374 if (handle_rest.IsLinked()) { 1371 masm->Bind(&handle_rest);
1375 masm->BindBlock(&handle_rest);
1376 bool flip = (new_start_index & 1) != (start_index & 1); 1372 bool flip = (new_start_index & 1) != (start_index & 1);
1377 GenerateBranches(masm, 1373 GenerateBranches(masm,
1378 ranges, 1374 ranges,
1379 new_start_index, 1375 new_start_index,
1380 end_index, 1376 end_index,
1381 border, 1377 border,
1382 max_char, 1378 max_char,
1383 &dummy, 1379 &dummy,
1384 flip ? odd_label : even_label, 1380 flip ? odd_label : even_label,
1385 flip ? even_label : odd_label); 1381 flip ? even_label : odd_label);
1386 } 1382 }
1387 } 1383 }
1388 1384
1389 1385
1390 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1386 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1391 RegExpCharacterClass* cc, 1387 RegExpCharacterClass* cc, bool one_byte,
1392 bool one_byte, 1388 Label* on_failure, int cp_offset, bool check_offset,
1393 BlockLabel* on_failure, 1389 bool preloaded, Zone* zone) {
1394 intptr_t cp_offset, 1390 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
1395 bool check_offset,
1396 bool preloaded,
1397 Isolate* isolate) {
1398 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
1399 if (!CharacterRange::IsCanonical(ranges)) { 1391 if (!CharacterRange::IsCanonical(ranges)) {
1400 CharacterRange::Canonicalize(ranges); 1392 CharacterRange::Canonicalize(ranges);
1401 } 1393 }
1402 1394
1403 intptr_t max_char; 1395 int max_char;
1404 if (one_byte) { 1396 if (one_byte) {
1405 max_char = Symbols::kMaxOneCharCodeSymbol; 1397 max_char = String::kMaxOneByteCharCode;
1406 } else { 1398 } else {
1407 max_char = Utf16::kMaxCodeUnit; 1399 max_char = String::kMaxUtf16CodeUnit;
1408 } 1400 }
1409 1401
1410 intptr_t range_count = ranges->length(); 1402 int range_count = ranges->length();
1411 1403
1412 intptr_t last_valid_range = range_count - 1; 1404 int last_valid_range = range_count - 1;
1413 while (last_valid_range >= 0) { 1405 while (last_valid_range >= 0) {
1414 CharacterRange& range = (*ranges)[last_valid_range]; 1406 CharacterRange& range = ranges->at(last_valid_range);
1415 if (range.from() <= max_char) { 1407 if (range.from() <= max_char) {
1416 break; 1408 break;
1417 } 1409 }
1418 last_valid_range--; 1410 last_valid_range--;
1419 } 1411 }
1420 1412
1421 if (last_valid_range < 0) { 1413 if (last_valid_range < 0) {
1422 if (!cc->is_negated()) { 1414 if (!cc->is_negated()) {
1423 macro_assembler->GoTo(on_failure); 1415 macro_assembler->GoTo(on_failure);
1424 } 1416 }
1425 if (check_offset) { 1417 if (check_offset) {
1426 macro_assembler->CheckPosition(cp_offset, on_failure); 1418 macro_assembler->CheckPosition(cp_offset, on_failure);
1427 } 1419 }
1428 return; 1420 return;
1429 } 1421 }
1430 1422
1431 if (last_valid_range == 0 && 1423 if (last_valid_range == 0 &&
1432 ranges->At(0).IsEverything(max_char)) { 1424 ranges->at(0).IsEverything(max_char)) {
1433 if (cc->is_negated()) { 1425 if (cc->is_negated()) {
1434 macro_assembler->GoTo(on_failure); 1426 macro_assembler->GoTo(on_failure);
1435 } else { 1427 } else {
1436 // This is a common case hit by non-anchored expressions. 1428 // This is a common case hit by non-anchored expressions.
1437 if (check_offset) { 1429 if (check_offset) {
1438 macro_assembler->CheckPosition(cp_offset, on_failure); 1430 macro_assembler->CheckPosition(cp_offset, on_failure);
1439 } 1431 }
1440 } 1432 }
1441 return; 1433 return;
1442 } 1434 }
1443 if (last_valid_range == 0 && 1435 if (last_valid_range == 0 &&
1444 !cc->is_negated() && 1436 !cc->is_negated() &&
1445 ranges->At(0).IsEverything(max_char)) { 1437 ranges->at(0).IsEverything(max_char)) {
1446 // This is a common case hit by non-anchored expressions. 1438 // This is a common case hit by non-anchored expressions.
1447 if (check_offset) { 1439 if (check_offset) {
1448 macro_assembler->CheckPosition(cp_offset, on_failure); 1440 macro_assembler->CheckPosition(cp_offset, on_failure);
1449 } 1441 }
1450 return; 1442 return;
1451 } 1443 }
1452 1444
1453 if (!preloaded) { 1445 if (!preloaded) {
1454 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 1446 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1455 } 1447 }
1456 1448
1457 if (cc->is_standard() && 1449 if (cc->is_standard(zone) &&
1458 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 1450 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
1459 on_failure)) { 1451 on_failure)) {
1460 return; 1452 return;
1461 } 1453 }
1462 1454
1463 1455
1464 // A new list with ascending entries. Each entry is a code unit 1456 // A new list with ascending entries. Each entry is a code unit
1465 // where there is a boundary between code units that are part of 1457 // where there is a boundary between code units that are part of
1466 // the class and code units that are not. Normally we insert an 1458 // the class and code units that are not. Normally we insert an
1467 // entry at zero which goes to the failure label, but if there 1459 // entry at zero which goes to the failure label, but if there
1468 // was already one there we fall through for success on that entry. 1460 // was already one there we fall through for success on that entry.
1469 // Subsequent entries have alternating meaning (success/failure). 1461 // Subsequent entries have alternating meaning (success/failure).
1470 ZoneGrowableArray<int>* range_boundaries = 1462 ZoneList<int>* range_boundaries =
1471 new(isolate) ZoneGrowableArray<int>(last_valid_range); 1463 new(zone) ZoneList<int>(last_valid_range, zone);
1472 1464
1473 bool zeroth_entry_is_failure = !cc->is_negated(); 1465 bool zeroth_entry_is_failure = !cc->is_negated();
1474 1466
1475 for (intptr_t i = 0; i <= last_valid_range; i++) { 1467 for (int i = 0; i <= last_valid_range; i++) {
1476 CharacterRange& range = (*ranges)[i]; 1468 CharacterRange& range = ranges->at(i);
1477 if (range.from() == 0) { 1469 if (range.from() == 0) {
1478 ASSERT(i == 0); 1470 DCHECK_EQ(i, 0);
1479 zeroth_entry_is_failure = !zeroth_entry_is_failure; 1471 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1480 } else { 1472 } else {
1481 range_boundaries->Add(range.from()); 1473 range_boundaries->Add(range.from(), zone);
1482 } 1474 }
1483 range_boundaries->Add(range.to() + 1); 1475 range_boundaries->Add(range.to() + 1, zone);
1484 } 1476 }
1485 intptr_t end_index = range_boundaries->length() - 1; 1477 int end_index = range_boundaries->length() - 1;
1486 if (range_boundaries->At(end_index) > max_char) { 1478 if (range_boundaries->at(end_index) > max_char) {
1487 end_index--; 1479 end_index--;
1488 } 1480 }
1489 1481
1490 BlockLabel fall_through; 1482 Label fall_through;
1491 GenerateBranches(macro_assembler, 1483 GenerateBranches(macro_assembler,
1492 range_boundaries, 1484 range_boundaries,
1493 0, // start_index. 1485 0, // start_index.
1494 end_index, 1486 end_index,
1495 0, // min_char. 1487 0, // min_char.
1496 max_char, 1488 max_char,
1497 &fall_through, 1489 &fall_through,
1498 zeroth_entry_is_failure ? &fall_through : on_failure, 1490 zeroth_entry_is_failure ? &fall_through : on_failure,
1499 zeroth_entry_is_failure ? on_failure : &fall_through); 1491 zeroth_entry_is_failure ? on_failure : &fall_through);
1500 macro_assembler->BindBlock(&fall_through); 1492 macro_assembler->Bind(&fall_through);
1501 } 1493 }
1502 1494
1503 1495
1504 RegExpNode::~RegExpNode() { 1496 RegExpNode::~RegExpNode() {
1505 } 1497 }
1506 1498
1507 1499
1508 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1500 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1509 Trace* trace) { 1501 Trace* trace) {
1510 // If we are generating a greedy loop then don't stop and don't reuse code. 1502 // If we are generating a greedy loop then don't stop and don't reuse code.
1511 if (trace->stop_node() != NULL) { 1503 if (trace->stop_node() != NULL) {
1512 return CONTINUE; 1504 return CONTINUE;
1513 } 1505 }
1514 1506
1515 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1507 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1516 if (trace->is_trivial()) { 1508 if (trace->is_trivial()) {
1517 if (label_.IsBound()) { 1509 if (label_.is_bound()) {
1518 // We are being asked to generate a generic version, but that's already 1510 // We are being asked to generate a generic version, but that's already
1519 // been done so just go to it. 1511 // been done so just go to it.
1520 macro_assembler->GoTo(&label_); 1512 macro_assembler->GoTo(&label_);
1521 return DONE; 1513 return DONE;
1522 } 1514 }
1523 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { 1515 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
1524 // To avoid too deep recursion we push the node to the work queue and just 1516 // To avoid too deep recursion we push the node to the work queue and just
1525 // generate a goto here. 1517 // generate a goto here.
1526 compiler->AddWork(this); 1518 compiler->AddWork(this);
1527 macro_assembler->GoTo(&label_); 1519 macro_assembler->GoTo(&label_);
1528 return DONE; 1520 return DONE;
1529 } 1521 }
1530 // Generate generic version of the node and bind the label for later use. 1522 // Generate generic version of the node and bind the label for later use.
1531 macro_assembler->BindBlock(&label_); 1523 macro_assembler->Bind(&label_);
1532 return CONTINUE; 1524 return CONTINUE;
1533 } 1525 }
1534 1526
1535 // We are being asked to make a non-generic version. Keep track of how many 1527 // We are being asked to make a non-generic version. Keep track of how many
1536 // non-generic versions we generate so as not to overdo it. 1528 // non-generic versions we generate so as not to overdo it.
1537 trace_count_++; 1529 trace_count_++;
1538 if (kRegexpOptimization && 1530 if (FLAG_regexp_optimization &&
1539 trace_count_ < kMaxCopiesCodeGenerated && 1531 trace_count_ < kMaxCopiesCodeGenerated &&
1540 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { 1532 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1541 return CONTINUE; 1533 return CONTINUE;
1542 } 1534 }
1543 1535
1544 // If we get here code has been generated for this node too many times or 1536 // If we get here code has been generated for this node too many times or
1545 // recursion is too deep. Time to switch to a generic version. The code for 1537 // recursion is too deep. Time to switch to a generic version. The code for
1546 // generic versions above can handle deep recursion properly. 1538 // generic versions above can handle deep recursion properly.
1547 trace->Flush(compiler, this); 1539 trace->Flush(compiler, this);
1548 return DONE; 1540 return DONE;
1549 } 1541 }
1550 1542
1551 1543
1552 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, 1544 int ActionNode::EatsAtLeast(int still_to_find,
1553 intptr_t budget, 1545 int budget,
1554 bool not_at_start) { 1546 bool not_at_start) {
1555 if (budget <= 0) return 0; 1547 if (budget <= 0) return 0;
1556 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 1548 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1557 return on_success()->EatsAtLeast(still_to_find, 1549 return on_success()->EatsAtLeast(still_to_find,
1558 budget - 1, 1550 budget - 1,
1559 not_at_start); 1551 not_at_start);
1560 } 1552 }
1561 1553
1562 1554
1563 void ActionNode::FillInBMInfo(intptr_t offset, 1555 void ActionNode::FillInBMInfo(int offset,
1564 intptr_t budget, 1556 int budget,
1565 BoyerMooreLookahead* bm, 1557 BoyerMooreLookahead* bm,
1566 bool not_at_start) { 1558 bool not_at_start) {
1567 if (action_type_ == BEGIN_SUBMATCH) { 1559 if (action_type_ == BEGIN_SUBMATCH) {
1568 bm->SetRest(offset); 1560 bm->SetRest(offset);
1569 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { 1561 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1570 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1562 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1571 } 1563 }
1572 SaveBMInfo(bm, not_at_start, offset); 1564 SaveBMInfo(bm, not_at_start, offset);
1573 } 1565 }
1574 1566
1575 1567
1576 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, 1568 int AssertionNode::EatsAtLeast(int still_to_find,
1577 intptr_t budget, 1569 int budget,
1578 bool not_at_start) { 1570 bool not_at_start) {
1579 if (budget <= 0) return 0; 1571 if (budget <= 0) return 0;
1580 // If we know we are not at the start and we are asked "how many characters 1572 // If we know we are not at the start and we are asked "how many characters
1581 // will you match if you succeed?" then we can answer anything since false 1573 // will you match if you succeed?" then we can answer anything since false
1582 // implies false. So lets just return the max answer (still_to_find) since 1574 // implies false. So lets just return the max answer (still_to_find) since
1583 // that won't prevent us from preloading a lot of characters for the other 1575 // that won't prevent us from preloading a lot of characters for the other
1584 // branches in the node graph. 1576 // branches in the node graph.
1585 if (assertion_type() == AT_START && not_at_start) return still_to_find; 1577 if (assertion_type() == AT_START && not_at_start) return still_to_find;
1586 return on_success()->EatsAtLeast(still_to_find, 1578 return on_success()->EatsAtLeast(still_to_find,
1587 budget - 1, 1579 budget - 1,
1588 not_at_start); 1580 not_at_start);
1589 } 1581 }
1590 1582
1591 1583
1592 void AssertionNode::FillInBMInfo(intptr_t offset, 1584 void AssertionNode::FillInBMInfo(int offset,
1593 intptr_t budget, 1585 int budget,
1594 BoyerMooreLookahead* bm, 1586 BoyerMooreLookahead* bm,
1595 bool not_at_start) { 1587 bool not_at_start) {
1596 // Match the behaviour of EatsAtLeast on this node. 1588 // Match the behaviour of EatsAtLeast on this node.
1597 if (assertion_type() == AT_START && not_at_start) return; 1589 if (assertion_type() == AT_START && not_at_start) return;
1598 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1590 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1599 SaveBMInfo(bm, not_at_start, offset); 1591 SaveBMInfo(bm, not_at_start, offset);
1600 } 1592 }
1601 1593
1602 1594
1603 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, 1595 int BackReferenceNode::EatsAtLeast(int still_to_find,
1604 intptr_t budget, 1596 int budget,
1605 bool not_at_start) { 1597 bool not_at_start) {
1606 if (budget <= 0) return 0; 1598 if (budget <= 0) return 0;
1607 return on_success()->EatsAtLeast(still_to_find, 1599 return on_success()->EatsAtLeast(still_to_find,
1608 budget - 1, 1600 budget - 1,
1609 not_at_start); 1601 not_at_start);
1610 } 1602 }
1611 1603
1612 1604
1613 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, 1605 int TextNode::EatsAtLeast(int still_to_find,
1614 intptr_t budget, 1606 int budget,
1615 bool not_at_start) { 1607 bool not_at_start) {
1616 intptr_t answer = Length(); 1608 int answer = Length();
1617 if (answer >= still_to_find) return answer; 1609 if (answer >= still_to_find) return answer;
1618 if (budget <= 0) return answer; 1610 if (budget <= 0) return answer;
1619 // We are not at start after this node so we set the last argument to 'true'. 1611 // We are not at start after this node so we set the last argument to 'true'.
1620 return answer + on_success()->EatsAtLeast(still_to_find - answer, 1612 return answer + on_success()->EatsAtLeast(still_to_find - answer,
1621 budget - 1, 1613 budget - 1,
1622 true); 1614 true);
1623 } 1615 }
1624 1616
1625 1617
1626 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, 1618 int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
1627 intptr_t budget, 1619 int budget,
1628 bool not_at_start) { 1620 bool not_at_start) {
1629 if (budget <= 0) return 0; 1621 if (budget <= 0) return 0;
1630 // Alternative 0 is the negative lookahead, alternative 1 is what comes 1622 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1631 // afterwards. 1623 // afterwards.
1632 RegExpNode* node = (*alternatives_)[1].node(); 1624 RegExpNode* node = alternatives_->at(1).node();
1633 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); 1625 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1634 } 1626 }
1635 1627
1636 1628
1637 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( 1629 void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
1638 QuickCheckDetails* details, 1630 QuickCheckDetails* details,
1639 RegExpCompiler* compiler, 1631 RegExpCompiler* compiler,
1640 intptr_t filled_in, 1632 int filled_in,
1641 bool not_at_start) { 1633 bool not_at_start) {
1642 // Alternative 0 is the negative lookahead, alternative 1 is what comes 1634 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1643 // afterwards. 1635 // afterwards.
1644 RegExpNode* node = (*alternatives_)[1].node(); 1636 RegExpNode* node = alternatives_->at(1).node();
1645 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); 1637 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1646 } 1638 }
1647 1639
1648 1640
1649 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, 1641 int ChoiceNode::EatsAtLeastHelper(int still_to_find,
1650 intptr_t budget, 1642 int budget,
1651 RegExpNode* ignore_this_node, 1643 RegExpNode* ignore_this_node,
1652 bool not_at_start) { 1644 bool not_at_start) {
1653 if (budget <= 0) return 0; 1645 if (budget <= 0) return 0;
1654 intptr_t min = 100; 1646 int min = 100;
1655 intptr_t choice_count = alternatives_->length(); 1647 int choice_count = alternatives_->length();
1656 budget = (budget - 1) / choice_count; 1648 budget = (budget - 1) / choice_count;
1657 for (intptr_t i = 0; i < choice_count; i++) { 1649 for (int i = 0; i < choice_count; i++) {
1658 RegExpNode* node = (*alternatives_)[i].node(); 1650 RegExpNode* node = alternatives_->at(i).node();
1659 if (node == ignore_this_node) continue; 1651 if (node == ignore_this_node) continue;
1660 intptr_t node_eats_at_least = 1652 int node_eats_at_least =
1661 node->EatsAtLeast(still_to_find, budget, not_at_start); 1653 node->EatsAtLeast(still_to_find, budget, not_at_start);
1662 if (node_eats_at_least < min) min = node_eats_at_least; 1654 if (node_eats_at_least < min) min = node_eats_at_least;
1663 if (min == 0) return 0; 1655 if (min == 0) return 0;
1664 } 1656 }
1665 return min; 1657 return min;
1666 } 1658 }
1667 1659
1668 1660
1669 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, 1661 int LoopChoiceNode::EatsAtLeast(int still_to_find,
1670 intptr_t budget, 1662 int budget,
1671 bool not_at_start) { 1663 bool not_at_start) {
1672 return EatsAtLeastHelper(still_to_find, 1664 return EatsAtLeastHelper(still_to_find,
1673 budget - 1, 1665 budget - 1,
1674 loop_node_, 1666 loop_node_,
1675 not_at_start); 1667 not_at_start);
1676 } 1668 }
1677 1669
1678 1670
1679 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, 1671 int ChoiceNode::EatsAtLeast(int still_to_find,
1680 intptr_t budget, 1672 int budget,
1681 bool not_at_start) { 1673 bool not_at_start) {
1682 return EatsAtLeastHelper(still_to_find, 1674 return EatsAtLeastHelper(still_to_find,
1683 budget, 1675 budget,
1684 NULL, 1676 NULL,
1685 not_at_start); 1677 not_at_start);
1686 } 1678 }
1687 1679
1688 1680
1689 // Takes the left-most 1-bit and smears it out, setting all bits to its right. 1681 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
1690 static inline uint32_t SmearBitsRight(uint32_t v) { 1682 static inline uint32_t SmearBitsRight(uint32_t v) {
1691 v |= v >> 1; 1683 v |= v >> 1;
1692 v |= v >> 2; 1684 v |= v >> 2;
1693 v |= v >> 4; 1685 v |= v >> 4;
1694 v |= v >> 8; 1686 v |= v >> 8;
1695 v |= v >> 16; 1687 v |= v >> 16;
1696 return v; 1688 return v;
1697 } 1689 }
1698 1690
1699 1691
1700 bool QuickCheckDetails::Rationalize(bool asc) { 1692 bool QuickCheckDetails::Rationalize(bool asc) {
1701 bool found_useful_op = false; 1693 bool found_useful_op = false;
1702 uint32_t char_mask; 1694 uint32_t char_mask;
1703 if (asc) { 1695 if (asc) {
1704 char_mask = Symbols::kMaxOneCharCodeSymbol; 1696 char_mask = String::kMaxOneByteCharCode;
1705 } else { 1697 } else {
1706 char_mask = Utf16::kMaxCodeUnit; 1698 char_mask = String::kMaxUtf16CodeUnit;
1707 } 1699 }
1708 mask_ = 0; 1700 mask_ = 0;
1709 value_ = 0; 1701 value_ = 0;
1710 intptr_t char_shift = 0; 1702 int char_shift = 0;
1711 for (intptr_t i = 0; i < characters_; i++) { 1703 for (int i = 0; i < characters_; i++) {
1712 Position* pos = &positions_[i]; 1704 Position* pos = &positions_[i];
1713 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { 1705 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
1714 found_useful_op = true; 1706 found_useful_op = true;
1715 } 1707 }
1716 mask_ |= (pos->mask & char_mask) << char_shift; 1708 mask_ |= (pos->mask & char_mask) << char_shift;
1717 value_ |= (pos->value & char_mask) << char_shift; 1709 value_ |= (pos->value & char_mask) << char_shift;
1718 char_shift += asc ? 8 : 16; 1710 char_shift += asc ? 8 : 16;
1719 } 1711 }
1720 return found_useful_op; 1712 return found_useful_op;
1721 } 1713 }
1722 1714
1723 1715
1724 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 1716 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1725 Trace* bounds_check_trace, 1717 Trace* bounds_check_trace,
1726 Trace* trace, 1718 Trace* trace,
1727 bool preload_has_checked_bounds, 1719 bool preload_has_checked_bounds,
1728 BlockLabel* on_possible_success, 1720 Label* on_possible_success,
1729 QuickCheckDetails* details, 1721 QuickCheckDetails* details,
1730 bool fall_through_on_failure) { 1722 bool fall_through_on_failure) {
1731 if (details->characters() == 0) return false; 1723 if (details->characters() == 0) return false;
1732 GetQuickCheckDetails( 1724 GetQuickCheckDetails(
1733 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); 1725 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
1734 if (details->cannot_match()) return false; 1726 if (details->cannot_match()) return false;
1735 if (!details->Rationalize(compiler->one_byte())) return false; 1727 if (!details->Rationalize(compiler->one_byte())) return false;
1736 ASSERT(details->characters() == 1 || 1728 DCHECK(details->characters() == 1 ||
1737 compiler->macro_assembler()->CanReadUnaligned()); 1729 compiler->macro_assembler()->CanReadUnaligned());
1738 uint32_t mask = details->mask(); 1730 uint32_t mask = details->mask();
1739 uint32_t value = details->value(); 1731 uint32_t value = details->value();
1740 1732
1741 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1733 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1742 1734
1743 if (trace->characters_preloaded() != details->characters()) { 1735 if (trace->characters_preloaded() != details->characters()) {
1744 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); 1736 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
1745 // We are attempting to preload the minimum number of characters 1737 // We are attempting to preload the minimum number of characters
1746 // any choice would eat, so if the bounds check fails, then none of the 1738 // any choice would eat, so if the bounds check fails, then none of the
1747 // choices can succeed, so we can just immediately backtrack, rather 1739 // choices can succeed, so we can just immediately backtrack, rather
1748 // than go to the next choice. 1740 // than go to the next choice.
1749 assembler->LoadCurrentCharacter(trace->cp_offset(), 1741 assembler->LoadCurrentCharacter(trace->cp_offset(),
1750 bounds_check_trace->backtrack(), 1742 bounds_check_trace->backtrack(),
1751 !preload_has_checked_bounds, 1743 !preload_has_checked_bounds,
1752 details->characters()); 1744 details->characters());
1753 } 1745 }
1754 1746
1755 1747
1756 bool need_mask = true; 1748 bool need_mask = true;
1757 1749
1758 if (details->characters() == 1) { 1750 if (details->characters() == 1) {
1759 // If number of characters preloaded is 1 then we used a byte or 16 bit 1751 // If number of characters preloaded is 1 then we used a byte or 16 bit
1760 // load so the value is already masked down. 1752 // load so the value is already masked down.
1761 uint32_t char_mask; 1753 uint32_t char_mask;
1762 if (compiler->one_byte()) { 1754 if (compiler->one_byte()) {
1763 char_mask = Symbols::kMaxOneCharCodeSymbol; 1755 char_mask = String::kMaxOneByteCharCode;
1764 } else { 1756 } else {
1765 char_mask = Utf16::kMaxCodeUnit; 1757 char_mask = String::kMaxUtf16CodeUnit;
1766 } 1758 }
1767 if ((mask & char_mask) == char_mask) need_mask = false; 1759 if ((mask & char_mask) == char_mask) need_mask = false;
1768 mask &= char_mask; 1760 mask &= char_mask;
1769 } else { 1761 } else {
1770 // For 2-character preloads in one-byte mode or 1-character preloads in 1762 // For 2-character preloads in one-byte mode or 1-character preloads in
1771 // two-byte mode we also use a 16 bit load with zero extend. 1763 // two-byte mode we also use a 16 bit load with zero extend.
1772 if (details->characters() == 2 && compiler->one_byte()) { 1764 if (details->characters() == 2 && compiler->one_byte()) {
1773 if ((mask & 0xffff) == 0xffff) need_mask = false; 1765 if ((mask & 0xffff) == 0xffff) need_mask = false;
1774 } else if (details->characters() == 1 && !compiler->one_byte()) { 1766 } else if (details->characters() == 1 && !compiler->one_byte()) {
1775 if ((mask & 0xffff) == 0xffff) need_mask = false; 1767 if ((mask & 0xffff) == 0xffff) need_mask = false;
(...skipping 22 matching lines...) Expand all
1798 // Here is the meat of GetQuickCheckDetails (see also the comment on the 1790 // Here is the meat of GetQuickCheckDetails (see also the comment on the
1799 // super-class in the .h file). 1791 // super-class in the .h file).
1800 // 1792 //
1801 // We iterate along the text object, building up for each character a 1793 // We iterate along the text object, building up for each character a
1802 // mask and value that can be used to test for a quick failure to match. 1794 // mask and value that can be used to test for a quick failure to match.
1803 // The masks and values for the positions will be combined into a single 1795 // The masks and values for the positions will be combined into a single
1804 // machine word for the current character width in order to be used in 1796 // machine word for the current character width in order to be used in
1805 // generating a quick check. 1797 // generating a quick check.
1806 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 1798 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1807 RegExpCompiler* compiler, 1799 RegExpCompiler* compiler,
1808 intptr_t characters_filled_in, 1800 int characters_filled_in,
1809 bool not_at_start) { 1801 bool not_at_start) {
1810 #if defined(__GNUC__) 1802 Isolate* isolate = compiler->macro_assembler()->zone()->isolate();
1811 // TODO(zerny): Make the combination code byte-order independent. 1803 DCHECK(characters_filled_in < details->characters());
1812 ASSERT(details->characters() == 1 || 1804 int characters = details->characters();
1813 (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)); 1805 int char_mask;
1814 #endif
1815 ASSERT(characters_filled_in < details->characters());
1816 intptr_t characters = details->characters();
1817 intptr_t char_mask;
1818 if (compiler->one_byte()) { 1806 if (compiler->one_byte()) {
1819 char_mask = Symbols::kMaxOneCharCodeSymbol; 1807 char_mask = String::kMaxOneByteCharCode;
1820 } else { 1808 } else {
1821 char_mask = Utf16::kMaxCodeUnit; 1809 char_mask = String::kMaxUtf16CodeUnit;
1822 } 1810 }
1823 for (intptr_t k = 0; k < elms_->length(); k++) { 1811 for (int k = 0; k < elms_->length(); k++) {
1824 TextElement elm = elms_->At(k); 1812 TextElement elm = elms_->at(k);
1825 if (elm.text_type() == TextElement::ATOM) { 1813 if (elm.text_type() == TextElement::ATOM) {
1826 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); 1814 Vector<const uc16> quarks = elm.atom()->data();
1827 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) { 1815 for (int i = 0; i < characters && i < quarks.length(); i++) {
1828 QuickCheckDetails::Position* pos = 1816 QuickCheckDetails::Position* pos =
1829 details->positions(characters_filled_in); 1817 details->positions(characters_filled_in);
1830 uint16_t c = quarks->At(i); 1818 uc16 c = quarks[i];
1831 if (c > char_mask) { 1819 if (c > char_mask) {
1832 // If we expect a non-Latin1 character from an one-byte string, 1820 // If we expect a non-Latin1 character from an one-byte string,
1833 // there is no way we can match. Not even case independent 1821 // there is no way we can match. Not even case-independent
1834 // matching can turn an Latin1 character into non-Latin1 or 1822 // matching can turn an Latin1 character into non-Latin1 or
1835 // vice versa. 1823 // vice versa.
1836 // TODO(dcarney): issue 3550. Verify that this works as expected. 1824 // TODO(dcarney): issue 3550. Verify that this works as expected.
1837 // For example, \u0178 is uppercase of \u00ff (y-umlaut). 1825 // For example, \u0178 is uppercase of \u00ff (y-umlaut).
1838 details->set_cannot_match(); 1826 details->set_cannot_match();
1839 pos->determines_perfectly = false; 1827 pos->determines_perfectly = false;
1840 return; 1828 return;
1841 } 1829 }
1842 if (compiler->ignore_case()) { 1830 if (compiler->ignore_case()) {
1843 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1831 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1844 intptr_t length = 1832 int length = GetCaseIndependentLetters(isolate, c,
1845 GetCaseIndependentLetters(c, compiler->one_byte(), chars); 1833 compiler->one_byte(), chars);
1846 ASSERT(length != 0); // Can only happen if c > char_mask (see above). 1834 DCHECK(length != 0); // Can only happen if c > char_mask (see above).
1847 if (length == 1) { 1835 if (length == 1) {
1848 // This letter has no case equivalents, so it's nice and simple 1836 // This letter has no case equivalents, so it's nice and simple
1849 // and the mask-compare will determine definitely whether we have 1837 // and the mask-compare will determine definitely whether we have
1850 // a match at this character position. 1838 // a match at this character position.
1851 pos->mask = char_mask; 1839 pos->mask = char_mask;
1852 pos->value = c; 1840 pos->value = c;
1853 pos->determines_perfectly = true; 1841 pos->determines_perfectly = true;
1854 } else { 1842 } else {
1855 uint32_t common_bits = char_mask; 1843 uint32_t common_bits = char_mask;
1856 uint32_t bits = chars[0]; 1844 uint32_t bits = chars[0];
1857 for (intptr_t j = 1; j < length; j++) { 1845 for (int j = 1; j < length; j++) {
1858 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); 1846 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
1859 common_bits ^= differing_bits; 1847 common_bits ^= differing_bits;
1860 bits &= common_bits; 1848 bits &= common_bits;
1861 } 1849 }
1862 // If length is 2 and common bits has only one zero in it then 1850 // If length is 2 and common bits has only one zero in it then
1863 // our mask and compare instruction will determine definitely 1851 // our mask and compare instruction will determine definitely
1864 // whether we have a match at this character position. Otherwise 1852 // whether we have a match at this character position. Otherwise
1865 // it can only be an approximate check. 1853 // it can only be an approximate check.
1866 uint32_t one_zero = (common_bits | ~char_mask); 1854 uint32_t one_zero = (common_bits | ~char_mask);
1867 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { 1855 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
1868 pos->determines_perfectly = true; 1856 pos->determines_perfectly = true;
1869 } 1857 }
1870 pos->mask = common_bits; 1858 pos->mask = common_bits;
1871 pos->value = bits; 1859 pos->value = bits;
1872 } 1860 }
1873 } else { 1861 } else {
1874 // Don't ignore case. Nice simple case where the mask-compare will 1862 // Don't ignore case. Nice simple case where the mask-compare will
1875 // determine definitely whether we have a match at this character 1863 // determine definitely whether we have a match at this character
1876 // position. 1864 // position.
1877 pos->mask = char_mask; 1865 pos->mask = char_mask;
1878 pos->value = c; 1866 pos->value = c;
1879 pos->determines_perfectly = true; 1867 pos->determines_perfectly = true;
1880 } 1868 }
1881 characters_filled_in++; 1869 characters_filled_in++;
1882 ASSERT(characters_filled_in <= details->characters()); 1870 DCHECK(characters_filled_in <= details->characters());
1883 if (characters_filled_in == details->characters()) { 1871 if (characters_filled_in == details->characters()) {
1884 return; 1872 return;
1885 } 1873 }
1886 } 1874 }
1887 } else { 1875 } else {
1888 QuickCheckDetails::Position* pos = 1876 QuickCheckDetails::Position* pos =
1889 details->positions(characters_filled_in); 1877 details->positions(characters_filled_in);
1890 RegExpCharacterClass* tree = elm.char_class(); 1878 RegExpCharacterClass* tree = elm.char_class();
1891 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges(); 1879 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
1892 if (tree->is_negated()) { 1880 if (tree->is_negated()) {
1893 // A quick check uses multi-character mask and compare. There is no 1881 // A quick check uses multi-character mask and compare. There is no
1894 // useful way to incorporate a negative char class into this scheme 1882 // useful way to incorporate a negative char class into this scheme
1895 // so we just conservatively create a mask and value that will always 1883 // so we just conservatively create a mask and value that will always
1896 // succeed. 1884 // succeed.
1897 pos->mask = 0; 1885 pos->mask = 0;
1898 pos->value = 0; 1886 pos->value = 0;
1899 } else { 1887 } else {
1900 intptr_t first_range = 0; 1888 int first_range = 0;
1901 while (ranges->At(first_range).from() > char_mask) { 1889 while (ranges->at(first_range).from() > char_mask) {
1902 first_range++; 1890 first_range++;
1903 if (first_range == ranges->length()) { 1891 if (first_range == ranges->length()) {
1904 details->set_cannot_match(); 1892 details->set_cannot_match();
1905 pos->determines_perfectly = false; 1893 pos->determines_perfectly = false;
1906 return; 1894 return;
1907 } 1895 }
1908 } 1896 }
1909 CharacterRange range = ranges->At(first_range); 1897 CharacterRange range = ranges->at(first_range);
1910 uint16_t from = range.from(); 1898 uc16 from = range.from();
1911 uint16_t to = range.to(); 1899 uc16 to = range.to();
1912 if (to > char_mask) { 1900 if (to > char_mask) {
1913 to = char_mask; 1901 to = char_mask;
1914 } 1902 }
1915 uint32_t differing_bits = (from ^ to); 1903 uint32_t differing_bits = (from ^ to);
1916 // A mask and compare is only perfect if the differing bits form a 1904 // A mask and compare is only perfect if the differing bits form a
1917 // number like 00011111 with one single block of trailing 1s. 1905 // number like 00011111 with one single block of trailing 1s.
1918 if ((differing_bits & (differing_bits + 1)) == 0 && 1906 if ((differing_bits & (differing_bits + 1)) == 0 &&
1919 from + differing_bits == to) { 1907 from + differing_bits == to) {
1920 pos->determines_perfectly = true; 1908 pos->determines_perfectly = true;
1921 } 1909 }
1922 uint32_t common_bits = ~SmearBitsRight(differing_bits); 1910 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1923 uint32_t bits = (from & common_bits); 1911 uint32_t bits = (from & common_bits);
1924 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { 1912 for (int i = first_range + 1; i < ranges->length(); i++) {
1925 CharacterRange range = ranges->At(i); 1913 CharacterRange range = ranges->at(i);
1926 uint16_t from = range.from(); 1914 uc16 from = range.from();
1927 uint16_t to = range.to(); 1915 uc16 to = range.to();
1928 if (from > char_mask) continue; 1916 if (from > char_mask) continue;
1929 if (to > char_mask) to = char_mask; 1917 if (to > char_mask) to = char_mask;
1930 // Here we are combining more ranges into the mask and compare 1918 // Here we are combining more ranges into the mask and compare
1931 // value. With each new range the mask becomes more sparse and 1919 // value. With each new range the mask becomes more sparse and
1932 // so the chances of a false positive rise. A character class 1920 // so the chances of a false positive rise. A character class
1933 // with multiple ranges is assumed never to be equivalent to a 1921 // with multiple ranges is assumed never to be equivalent to a
1934 // mask and compare operation. 1922 // mask and compare operation.
1935 pos->determines_perfectly = false; 1923 pos->determines_perfectly = false;
1936 uint32_t new_common_bits = (from ^ to); 1924 uint32_t new_common_bits = (from ^ to);
1937 new_common_bits = ~SmearBitsRight(new_common_bits); 1925 new_common_bits = ~SmearBitsRight(new_common_bits);
1938 common_bits &= new_common_bits; 1926 common_bits &= new_common_bits;
1939 bits &= new_common_bits; 1927 bits &= new_common_bits;
1940 uint32_t differing_bits = (from & common_bits) ^ bits; 1928 uint32_t differing_bits = (from & common_bits) ^ bits;
1941 common_bits ^= differing_bits; 1929 common_bits ^= differing_bits;
1942 bits &= common_bits; 1930 bits &= common_bits;
1943 } 1931 }
1944 pos->mask = common_bits; 1932 pos->mask = common_bits;
1945 pos->value = bits; 1933 pos->value = bits;
1946 } 1934 }
1947 characters_filled_in++; 1935 characters_filled_in++;
1948 ASSERT(characters_filled_in <= details->characters()); 1936 DCHECK(characters_filled_in <= details->characters());
1949 if (characters_filled_in == details->characters()) { 1937 if (characters_filled_in == details->characters()) {
1950 return; 1938 return;
1951 } 1939 }
1952 } 1940 }
1953 } 1941 }
1954 ASSERT(characters_filled_in != details->characters()); 1942 DCHECK(characters_filled_in != details->characters());
1955 if (!details->cannot_match()) { 1943 if (!details->cannot_match()) {
1956 on_success()-> GetQuickCheckDetails(details, 1944 on_success()-> GetQuickCheckDetails(details,
1957 compiler, 1945 compiler,
1958 characters_filled_in, 1946 characters_filled_in,
1959 true); 1947 true);
1960 } 1948 }
1961 } 1949 }
1962 1950
1963 1951
1964 void QuickCheckDetails::Clear() { 1952 void QuickCheckDetails::Clear() {
1965 for (int i = 0; i < characters_; i++) { 1953 for (int i = 0; i < characters_; i++) {
1966 positions_[i].mask = 0; 1954 positions_[i].mask = 0;
1967 positions_[i].value = 0; 1955 positions_[i].value = 0;
1968 positions_[i].determines_perfectly = false; 1956 positions_[i].determines_perfectly = false;
1969 } 1957 }
1970 characters_ = 0; 1958 characters_ = 0;
1971 } 1959 }
1972 1960
1973 1961
1974 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { 1962 void QuickCheckDetails::Advance(int by, bool one_byte) {
1975 ASSERT(by >= 0); 1963 DCHECK(by >= 0);
1976 if (by >= characters_) { 1964 if (by >= characters_) {
1977 Clear(); 1965 Clear();
1978 return; 1966 return;
1979 } 1967 }
1980 for (intptr_t i = 0; i < characters_ - by; i++) { 1968 for (int i = 0; i < characters_ - by; i++) {
1981 positions_[i] = positions_[by + i]; 1969 positions_[i] = positions_[by + i];
1982 } 1970 }
1983 for (intptr_t i = characters_ - by; i < characters_; i++) { 1971 for (int i = characters_ - by; i < characters_; i++) {
1984 positions_[i].mask = 0; 1972 positions_[i].mask = 0;
1985 positions_[i].value = 0; 1973 positions_[i].value = 0;
1986 positions_[i].determines_perfectly = false; 1974 positions_[i].determines_perfectly = false;
1987 } 1975 }
1988 characters_ -= by; 1976 characters_ -= by;
1989 // We could change mask_ and value_ here but we would never advance unless 1977 // We could change mask_ and value_ here but we would never advance unless
1990 // they had already been used in a check and they won't be used again because 1978 // they had already been used in a check and they won't be used again because
1991 // it would gain us nothing. So there's no point. 1979 // it would gain us nothing. So there's no point.
1992 } 1980 }
1993 1981
1994 1982
1995 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { 1983 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
1996 ASSERT(characters_ == other->characters_); 1984 DCHECK(characters_ == other->characters_);
1997 if (other->cannot_match_) { 1985 if (other->cannot_match_) {
1998 return; 1986 return;
1999 } 1987 }
2000 if (cannot_match_) { 1988 if (cannot_match_) {
2001 *this = *other; 1989 *this = *other;
2002 return; 1990 return;
2003 } 1991 }
2004 for (intptr_t i = from_index; i < characters_; i++) { 1992 for (int i = from_index; i < characters_; i++) {
2005 QuickCheckDetails::Position* pos = positions(i); 1993 QuickCheckDetails::Position* pos = positions(i);
2006 QuickCheckDetails::Position* other_pos = other->positions(i); 1994 QuickCheckDetails::Position* other_pos = other->positions(i);
2007 if (pos->mask != other_pos->mask || 1995 if (pos->mask != other_pos->mask ||
2008 pos->value != other_pos->value || 1996 pos->value != other_pos->value ||
2009 !other_pos->determines_perfectly) { 1997 !other_pos->determines_perfectly) {
2010 // Our mask-compare operation will be approximate unless we have the 1998 // Our mask-compare operation will be approximate unless we have the
2011 // exact same operation on both sides of the alternation. 1999 // exact same operation on both sides of the alternation.
2012 pos->determines_perfectly = false; 2000 pos->determines_perfectly = false;
2013 } 2001 }
2014 pos->mask &= other_pos->mask; 2002 pos->mask &= other_pos->mask;
2015 pos->value &= pos->mask; 2003 pos->value &= pos->mask;
2016 other_pos->value &= pos->mask; 2004 other_pos->value &= pos->mask;
2017 uint16_t differing_bits = (pos->value ^ other_pos->value); 2005 uc16 differing_bits = (pos->value ^ other_pos->value);
2018 pos->mask &= ~differing_bits; 2006 pos->mask &= ~differing_bits;
2019 pos->value &= pos->mask; 2007 pos->value &= pos->mask;
2020 } 2008 }
2021 } 2009 }
2022 2010
2023 2011
2024 class VisitMarker : public ValueObject { 2012 class VisitMarker {
2025 public: 2013 public:
2026 explicit VisitMarker(NodeInfo* info) : info_(info) { 2014 explicit VisitMarker(NodeInfo* info) : info_(info) {
2027 ASSERT(!info->visited); 2015 DCHECK(!info->visited);
2028 info->visited = true; 2016 info->visited = true;
2029 } 2017 }
2030 ~VisitMarker() { 2018 ~VisitMarker() {
2031 info_->visited = false; 2019 info_->visited = false;
2032 } 2020 }
2033 private: 2021 private:
2034 NodeInfo* info_; 2022 NodeInfo* info_;
2035 }; 2023 };
2036 2024
2037 2025
2038 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { 2026 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
2039 if (info()->replacement_calculated) return replacement(); 2027 if (info()->replacement_calculated) return replacement();
2040 if (depth < 0) return this; 2028 if (depth < 0) return this;
2041 ASSERT(!info()->visited); 2029 DCHECK(!info()->visited);
2042 VisitMarker marker(info()); 2030 VisitMarker marker(info());
2043 return FilterSuccessor(depth - 1, ignore_case); 2031 return FilterSuccessor(depth - 1, ignore_case);
2044 } 2032 }
2045 2033
2046 2034
2047 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { 2035 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
2048 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); 2036 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
2049 if (next == NULL) return set_replacement(NULL); 2037 if (next == NULL) return set_replacement(NULL);
2050 on_success_ = next; 2038 on_success_ = next;
2051 return set_replacement(this); 2039 return set_replacement(this);
2052 } 2040 }
2053 2041
2054 2042
2055 // We need to check for the following characters: 0x39c 0x3bc 0x178. 2043 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2056 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 2044 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2057 // TODO(dcarney): this could be a lot more efficient. 2045 // TODO(dcarney): this could be a lot more efficient.
2058 return range.Contains(0x39c) || 2046 return range.Contains(0x39c) ||
2059 range.Contains(0x3bc) || range.Contains(0x178); 2047 range.Contains(0x3bc) || range.Contains(0x178);
2060 } 2048 }
2061 2049
2062 2050
2063 static bool RangesContainLatin1Equivalents( 2051 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2064 ZoneGrowableArray<CharacterRange>* ranges) { 2052 for (int i = 0; i < ranges->length(); i++) {
2065 for (intptr_t i = 0; i < ranges->length(); i++) {
2066 // TODO(dcarney): this could be a lot more efficient. 2053 // TODO(dcarney): this could be a lot more efficient.
2067 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; 2054 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
2068 } 2055 }
2069 return false; 2056 return false;
2070 } 2057 }
2071 2058
2072 2059
2073 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { 2060 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
2074 ASSERT(c > Symbols::kMaxOneCharCodeSymbol);
2075 switch (c) {
2076 // This are equivalent characters in unicode.
2077 case 0x39c:
2078 case 0x3bc:
2079 return 0xb5;
2080 // This is an uppercase of a Latin-1 character
2081 // outside of Latin-1.
2082 case 0x178:
2083 return 0xff;
2084 }
2085 return 0;
2086 }
2087
2088
2089 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) {
2090 if (info()->replacement_calculated) return replacement(); 2061 if (info()->replacement_calculated) return replacement();
2091 if (depth < 0) return this; 2062 if (depth < 0) return this;
2092 ASSERT(!info()->visited); 2063 DCHECK(!info()->visited);
2093 VisitMarker marker(info()); 2064 VisitMarker marker(info());
2094 intptr_t element_count = elms_->length(); 2065 int element_count = elms_->length();
2095 for (intptr_t i = 0; i < element_count; i++) { 2066 for (int i = 0; i < element_count; i++) {
2096 TextElement elm = elms_->At(i); 2067 TextElement elm = elms_->at(i);
2097 if (elm.text_type() == TextElement::ATOM) { 2068 if (elm.text_type() == TextElement::ATOM) {
2098 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); 2069 Vector<const uc16> quarks = elm.atom()->data();
2099 for (intptr_t j = 0; j < quarks->length(); j++) { 2070 for (int j = 0; j < quarks.length(); j++) {
2100 uint16_t c = quarks->At(j); 2071 uint16_t c = quarks[j];
2101 if (c <= Symbols::kMaxOneCharCodeSymbol) continue; 2072 if (c <= String::kMaxOneByteCharCode) continue;
2102 if (!ignore_case) return set_replacement(NULL); 2073 if (!ignore_case) return set_replacement(NULL);
2103 // Here, we need to check for characters whose upper and lower cases 2074 // Here, we need to check for characters whose upper and lower cases
2104 // are outside the Latin-1 range. 2075 // are outside the Latin-1 range.
2105 uint16_t converted = ConvertNonLatin1ToLatin1(c); 2076 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2106 // Character is outside Latin-1 completely 2077 // Character is outside Latin-1 completely
2107 if (converted == 0) return set_replacement(NULL); 2078 if (converted == 0) return set_replacement(NULL);
2108 // Convert quark to Latin-1 in place. 2079 // Convert quark to Latin-1 in place.
2109 (*quarks)[0] = converted; 2080 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2081 copy[j] = converted;
2110 } 2082 }
2111 } else { 2083 } else {
2112 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); 2084 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
2113 RegExpCharacterClass* cc = elm.char_class(); 2085 RegExpCharacterClass* cc = elm.char_class();
2114 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); 2086 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2115 if (!CharacterRange::IsCanonical(ranges)) { 2087 if (!CharacterRange::IsCanonical(ranges)) {
2116 CharacterRange::Canonicalize(ranges); 2088 CharacterRange::Canonicalize(ranges);
2117 } 2089 }
2118 // Now they are in order so we only need to look at the first. 2090 // Now they are in order so we only need to look at the first.
2119 intptr_t range_count = ranges->length(); 2091 int range_count = ranges->length();
2120 if (cc->is_negated()) { 2092 if (cc->is_negated()) {
2121 if (range_count != 0 && 2093 if (range_count != 0 &&
2122 ranges->At(0).from() == 0 && 2094 ranges->at(0).from() == 0 &&
2123 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { 2095 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2124 // This will be handled in a later filter. 2096 // This will be handled in a later filter.
2125 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2097 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2126 return set_replacement(NULL); 2098 return set_replacement(NULL);
2127 } 2099 }
2128 } else { 2100 } else {
2129 if (range_count == 0 || 2101 if (range_count == 0 ||
2130 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { 2102 ranges->at(0).from() > String::kMaxOneByteCharCode) {
2131 // This will be handled in a later filter. 2103 // This will be handled in a later filter.
2132 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2104 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2133 return set_replacement(NULL); 2105 return set_replacement(NULL);
2134 } 2106 }
2135 } 2107 }
2136 } 2108 }
2137 } 2109 }
2138 return FilterSuccessor(depth - 1, ignore_case); 2110 return FilterSuccessor(depth - 1, ignore_case);
2139 } 2111 }
2140 2112
2141 2113
2142 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { 2114 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
2143 if (info()->replacement_calculated) return replacement(); 2115 if (info()->replacement_calculated) return replacement();
2144 if (depth < 0) return this; 2116 if (depth < 0) return this;
2145 if (info()->visited) return this; 2117 if (info()->visited) return this;
2146 { 2118 {
2147 VisitMarker marker(info()); 2119 VisitMarker marker(info());
2148 2120
2149 RegExpNode* continue_replacement = 2121 RegExpNode* continue_replacement =
2150 continue_node_->FilterOneByte(depth - 1, ignore_case); 2122 continue_node_->FilterOneByte(depth - 1, ignore_case);
2151 // If we can't continue after the loop then there is no sense in doing the 2123 // If we can't continue after the loop then there is no sense in doing the
2152 // loop. 2124 // loop.
2153 if (continue_replacement == NULL) return set_replacement(NULL); 2125 if (continue_replacement == NULL) return set_replacement(NULL);
2154 } 2126 }
2155 2127
2156 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); 2128 return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
2157 } 2129 }
2158 2130
2159 2131
2160 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { 2132 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
2161 if (info()->replacement_calculated) return replacement(); 2133 if (info()->replacement_calculated) return replacement();
2162 if (depth < 0) return this; 2134 if (depth < 0) return this;
2163 if (info()->visited) return this; 2135 if (info()->visited) return this;
2164 VisitMarker marker(info()); 2136 VisitMarker marker(info());
2165 intptr_t choice_count = alternatives_->length(); 2137 int choice_count = alternatives_->length();
2166 2138
2167 for (intptr_t i = 0; i < choice_count; i++) { 2139 for (int i = 0; i < choice_count; i++) {
2168 GuardedAlternative alternative = alternatives_->At(i); 2140 GuardedAlternative alternative = alternatives_->at(i);
2169 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { 2141 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
2170 set_replacement(this); 2142 set_replacement(this);
2171 return this; 2143 return this;
2172 } 2144 }
2173 } 2145 }
2174 2146
2175 intptr_t surviving = 0; 2147 int surviving = 0;
2176 RegExpNode* survivor = NULL; 2148 RegExpNode* survivor = NULL;
2177 for (intptr_t i = 0; i < choice_count; i++) { 2149 for (int i = 0; i < choice_count; i++) {
2178 GuardedAlternative alternative = alternatives_->At(i); 2150 GuardedAlternative alternative = alternatives_->at(i);
2179 RegExpNode* replacement = 2151 RegExpNode* replacement =
2180 alternative.node()->FilterOneByte(depth - 1, ignore_case); 2152 alternative.node()->FilterOneByte(depth - 1, ignore_case);
2181 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. 2153 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
2182 if (replacement != NULL) { 2154 if (replacement != NULL) {
2183 (*alternatives_)[i].set_node(replacement); 2155 alternatives_->at(i).set_node(replacement);
2184 surviving++; 2156 surviving++;
2185 survivor = replacement; 2157 survivor = replacement;
2186 } 2158 }
2187 } 2159 }
2188 if (surviving < 2) return set_replacement(survivor); 2160 if (surviving < 2) return set_replacement(survivor);
2189 2161
2190 set_replacement(this); 2162 set_replacement(this);
2191 if (surviving == choice_count) { 2163 if (surviving == choice_count) {
2192 return this; 2164 return this;
2193 } 2165 }
2194 // Only some of the nodes survived the filtering. We need to rebuild the 2166 // Only some of the nodes survived the filtering. We need to rebuild the
2195 // alternatives list. 2167 // alternatives list.
2196 ZoneGrowableArray<GuardedAlternative>* new_alternatives = 2168 ZoneList<GuardedAlternative>* new_alternatives =
2197 new(I) ZoneGrowableArray<GuardedAlternative>(surviving); 2169 new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
2198 for (intptr_t i = 0; i < choice_count; i++) { 2170 for (int i = 0; i < choice_count; i++) {
2199 RegExpNode* replacement = 2171 RegExpNode* replacement =
2200 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); 2172 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
2201 if (replacement != NULL) { 2173 if (replacement != NULL) {
2202 (*alternatives_)[i].set_node(replacement); 2174 alternatives_->at(i).set_node(replacement);
2203 new_alternatives->Add((*alternatives_)[i]); 2175 new_alternatives->Add(alternatives_->at(i), zone());
2204 } 2176 }
2205 } 2177 }
2206 alternatives_ = new_alternatives; 2178 alternatives_ = new_alternatives;
2207 return this; 2179 return this;
2208 } 2180 }
2209 2181
2210 2182
2211 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, 2183 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
2212 bool ignore_case) { 2184 bool ignore_case) {
2213 if (info()->replacement_calculated) return replacement(); 2185 if (info()->replacement_calculated) return replacement();
2214 if (depth < 0) return this; 2186 if (depth < 0) return this;
2215 if (info()->visited) return this; 2187 if (info()->visited) return this;
2216 VisitMarker marker(info()); 2188 VisitMarker marker(info());
2217 // Alternative 0 is the negative lookahead, alternative 1 is what comes 2189 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2218 // afterwards. 2190 // afterwards.
2219 RegExpNode* node = (*alternatives_)[1].node(); 2191 RegExpNode* node = alternatives_->at(1).node();
2220 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); 2192 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
2221 if (replacement == NULL) return set_replacement(NULL); 2193 if (replacement == NULL) return set_replacement(NULL);
2222 (*alternatives_)[1].set_node(replacement); 2194 alternatives_->at(1).set_node(replacement);
2223 2195
2224 RegExpNode* neg_node = (*alternatives_)[0].node(); 2196 RegExpNode* neg_node = alternatives_->at(0).node();
2225 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); 2197 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
2226 // If the negative lookahead is always going to fail then 2198 // If the negative lookahead is always going to fail then
2227 // we don't need to check it. 2199 // we don't need to check it.
2228 if (neg_replacement == NULL) return set_replacement(replacement); 2200 if (neg_replacement == NULL) return set_replacement(replacement);
2229 (*alternatives_)[0].set_node(neg_replacement); 2201 alternatives_->at(0).set_node(neg_replacement);
2230 return set_replacement(this); 2202 return set_replacement(this);
2231 } 2203 }
2232 2204
2233 2205
2234 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2206 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2235 RegExpCompiler* compiler, 2207 RegExpCompiler* compiler,
2236 intptr_t characters_filled_in, 2208 int characters_filled_in,
2237 bool not_at_start) { 2209 bool not_at_start) {
2238 if (body_can_be_zero_length_ || info()->visited) return; 2210 if (body_can_be_zero_length_ || info()->visited) return;
2239 VisitMarker marker(info()); 2211 VisitMarker marker(info());
2240 return ChoiceNode::GetQuickCheckDetails(details, 2212 return ChoiceNode::GetQuickCheckDetails(details,
2241 compiler, 2213 compiler,
2242 characters_filled_in, 2214 characters_filled_in,
2243 not_at_start); 2215 not_at_start);
2244 } 2216 }
2245 2217
2246 2218
2247 void LoopChoiceNode::FillInBMInfo(intptr_t offset, 2219 void LoopChoiceNode::FillInBMInfo(int offset,
2248 intptr_t budget, 2220 int budget,
2249 BoyerMooreLookahead* bm, 2221 BoyerMooreLookahead* bm,
2250 bool not_at_start) { 2222 bool not_at_start) {
2251 if (body_can_be_zero_length_ || budget <= 0) { 2223 if (body_can_be_zero_length_ || budget <= 0) {
2252 bm->SetRest(offset); 2224 bm->SetRest(offset);
2253 SaveBMInfo(bm, not_at_start, offset); 2225 SaveBMInfo(bm, not_at_start, offset);
2254 return; 2226 return;
2255 } 2227 }
2256 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); 2228 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
2257 SaveBMInfo(bm, not_at_start, offset); 2229 SaveBMInfo(bm, not_at_start, offset);
2258 } 2230 }
2259 2231
2260 2232
2261 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2233 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2262 RegExpCompiler* compiler, 2234 RegExpCompiler* compiler,
2263 intptr_t characters_filled_in, 2235 int characters_filled_in,
2264 bool not_at_start) { 2236 bool not_at_start) {
2265 not_at_start = (not_at_start || not_at_start_); 2237 not_at_start = (not_at_start || not_at_start_);
2266 intptr_t choice_count = alternatives_->length(); 2238 int choice_count = alternatives_->length();
2267 ASSERT(choice_count > 0); 2239 DCHECK(choice_count > 0);
2268 (*alternatives_)[0].node()->GetQuickCheckDetails(details, 2240 alternatives_->at(0).node()->GetQuickCheckDetails(details,
2269 compiler, 2241 compiler,
2270 characters_filled_in, 2242 characters_filled_in,
2271 not_at_start); 2243 not_at_start);
2272 for (intptr_t i = 1; i < choice_count; i++) { 2244 for (int i = 1; i < choice_count; i++) {
2273 QuickCheckDetails new_details(details->characters()); 2245 QuickCheckDetails new_details(details->characters());
2274 RegExpNode* node = (*alternatives_)[i].node(); 2246 RegExpNode* node = alternatives_->at(i).node();
2275 node->GetQuickCheckDetails(&new_details, compiler, 2247 node->GetQuickCheckDetails(&new_details, compiler,
2276 characters_filled_in, 2248 characters_filled_in,
2277 not_at_start); 2249 not_at_start);
2278 // Here we merge the quick match details of the two branches. 2250 // Here we merge the quick match details of the two branches.
2279 details->Merge(&new_details, characters_filled_in); 2251 details->Merge(&new_details, characters_filled_in);
2280 } 2252 }
2281 } 2253 }
2282 2254
2283 2255
2284 // Check for [0-9A-Z_a-z]. 2256 // Check for [0-9A-Z_a-z].
2285 static void EmitWordCheck(RegExpMacroAssembler* assembler, 2257 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2286 BlockLabel* word, 2258 Label* word,
2287 BlockLabel* non_word, 2259 Label* non_word,
2288 bool fall_through_on_word) { 2260 bool fall_through_on_word) {
2289 if (assembler->CheckSpecialCharacterClass( 2261 if (assembler->CheckSpecialCharacterClass(
2290 fall_through_on_word ? 'w' : 'W', 2262 fall_through_on_word ? 'w' : 'W',
2291 fall_through_on_word ? non_word : word)) { 2263 fall_through_on_word ? non_word : word)) {
2292 // Optimized implementation available. 2264 // Optimized implementation available.
2293 return; 2265 return;
2294 } 2266 }
2295 assembler->CheckCharacterGT('z', non_word); 2267 assembler->CheckCharacterGT('z', non_word);
2296 assembler->CheckCharacterLT('0', non_word); 2268 assembler->CheckCharacterLT('0', non_word);
2297 assembler->CheckCharacterGT('a' - 1, word); 2269 assembler->CheckCharacterGT('a' - 1, word);
(...skipping 12 matching lines...) Expand all
2310 // that matches newline or the start of input). 2282 // that matches newline or the start of input).
2311 static void EmitHat(RegExpCompiler* compiler, 2283 static void EmitHat(RegExpCompiler* compiler,
2312 RegExpNode* on_success, 2284 RegExpNode* on_success,
2313 Trace* trace) { 2285 Trace* trace) {
2314 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2286 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2315 // We will be loading the previous character into the current character 2287 // We will be loading the previous character into the current character
2316 // register. 2288 // register.
2317 Trace new_trace(*trace); 2289 Trace new_trace(*trace);
2318 new_trace.InvalidateCurrentCharacter(); 2290 new_trace.InvalidateCurrentCharacter();
2319 2291
2320 BlockLabel ok; 2292 Label ok;
2321 if (new_trace.cp_offset() == 0) { 2293 if (new_trace.cp_offset() == 0) {
2322 // The start of input counts as a newline in this context, so skip to 2294 // The start of input counts as a newline in this context, so skip to
2323 // ok if we are at the start. 2295 // ok if we are at the start.
2324 assembler->CheckAtStart(&ok); 2296 assembler->CheckAtStart(&ok);
2325 } 2297 }
2326 // We already checked that we are not at the start of input so it must be 2298 // We already checked that we are not at the start of input so it must be
2327 // OK to load the previous character. 2299 // OK to load the previous character.
2328 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, 2300 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
2329 new_trace.backtrack(), 2301 new_trace.backtrack(),
2330 false); 2302 false);
2331 if (!assembler->CheckSpecialCharacterClass('n', 2303 if (!assembler->CheckSpecialCharacterClass('n',
2332 new_trace.backtrack())) { 2304 new_trace.backtrack())) {
2333 // Newline means \n, \r, 0x2028 or 0x2029. 2305 // Newline means \n, \r, 0x2028 or 0x2029.
2334 if (!compiler->one_byte()) { 2306 if (!compiler->one_byte()) {
2335 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); 2307 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2336 } 2308 }
2337 assembler->CheckCharacter('\n', &ok); 2309 assembler->CheckCharacter('\n', &ok);
2338 assembler->CheckNotCharacter('\r', new_trace.backtrack()); 2310 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2339 } 2311 }
2340 assembler->BindBlock(&ok); 2312 assembler->Bind(&ok);
2341 on_success->Emit(compiler, &new_trace); 2313 on_success->Emit(compiler, &new_trace);
2342 } 2314 }
2343 2315
2344 2316
2345 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). 2317 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2346 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 2318 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2347 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2319 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2348 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2320 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2349 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); 2321 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2350 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2322 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2351 if (lookahead == NULL) { 2323 if (lookahead == NULL) {
2352 intptr_t eats_at_least = 2324 int eats_at_least =
2353 Utils::Minimum(kMaxLookaheadForBoyerMoore, 2325 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
2354 EatsAtLeast(kMaxLookaheadForBoyerMoore, 2326 kRecursionBudget,
2355 kRecursionBudget, 2327 not_at_start));
2356 not_at_start));
2357 if (eats_at_least >= 1) { 2328 if (eats_at_least >= 1) {
2358 BoyerMooreLookahead* bm = 2329 BoyerMooreLookahead* bm =
2359 new(I) BoyerMooreLookahead(eats_at_least, compiler, I); 2330 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
2360 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); 2331 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
2361 if (bm->at(0)->is_non_word()) 2332 if (bm->at(0)->is_non_word())
2362 next_is_word_character = Trace::FALSE_VALUE; 2333 next_is_word_character = Trace::FALSE_VALUE;
2363 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; 2334 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2364 } 2335 }
2365 } else { 2336 } else {
2366 if (lookahead->at(0)->is_non_word()) 2337 if (lookahead->at(0)->is_non_word())
2367 next_is_word_character = Trace::FALSE_VALUE; 2338 next_is_word_character = Trace::FALSE_VALUE;
2368 if (lookahead->at(0)->is_word()) 2339 if (lookahead->at(0)->is_word())
2369 next_is_word_character = Trace::TRUE_VALUE; 2340 next_is_word_character = Trace::TRUE_VALUE;
2370 } 2341 }
2371 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); 2342 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2372 if (next_is_word_character == Trace::UNKNOWN) { 2343 if (next_is_word_character == Trace::UNKNOWN) {
2373 BlockLabel before_non_word; 2344 Label before_non_word;
2374 BlockLabel before_word; 2345 Label before_word;
2375 if (trace->characters_preloaded() != 1) { 2346 if (trace->characters_preloaded() != 1) {
2376 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); 2347 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2377 } 2348 }
2378 // Fall through on non-word. 2349 // Fall through on non-word.
2379 EmitWordCheck(assembler, &before_word, &before_non_word, false); 2350 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2380 // Next character is not a word character. 2351 // Next character is not a word character.
2381 assembler->BindBlock(&before_non_word); 2352 assembler->Bind(&before_non_word);
2382 BlockLabel ok; 2353 Label ok;
2383 // Backtrack on \B (non-boundary check) if previous is a word,
2384 // since we know next *is not* a word and this would be a boundary.
2385 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 2354 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2355 assembler->GoTo(&ok);
2386 2356
2387 if (!assembler->IsClosed()) { 2357 assembler->Bind(&before_word);
2388 assembler->GoTo(&ok);
2389 }
2390
2391 assembler->BindBlock(&before_word);
2392 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 2358 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2393 assembler->BindBlock(&ok); 2359 assembler->Bind(&ok);
2394 } else if (next_is_word_character == Trace::TRUE_VALUE) { 2360 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2395 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 2361 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2396 } else { 2362 } else {
2397 ASSERT(next_is_word_character == Trace::FALSE_VALUE); 2363 DCHECK(next_is_word_character == Trace::FALSE_VALUE);
2398 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 2364 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2399 } 2365 }
2400 } 2366 }
2401 2367
2402 2368
2403 void AssertionNode::BacktrackIfPrevious( 2369 void AssertionNode::BacktrackIfPrevious(
2404 RegExpCompiler* compiler, 2370 RegExpCompiler* compiler,
2405 Trace* trace, 2371 Trace* trace,
2406 AssertionNode::IfPrevious backtrack_if_previous) { 2372 AssertionNode::IfPrevious backtrack_if_previous) {
2407 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2373 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2408 Trace new_trace(*trace); 2374 Trace new_trace(*trace);
2409 new_trace.InvalidateCurrentCharacter(); 2375 new_trace.InvalidateCurrentCharacter();
2410 2376
2411 BlockLabel fall_through, dummy; 2377 Label fall_through, dummy;
2412 2378
2413 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? 2379 Label* non_word = backtrack_if_previous == kIsNonWord ?
2414 new_trace.backtrack() : 2380 new_trace.backtrack() :
2415 &fall_through; 2381 &fall_through;
2416 BlockLabel* word = backtrack_if_previous == kIsNonWord ? 2382 Label* word = backtrack_if_previous == kIsNonWord ?
2417 &fall_through : 2383 &fall_through :
2418 new_trace.backtrack(); 2384 new_trace.backtrack();
2419 2385
2420 if (new_trace.cp_offset() == 0) { 2386 if (new_trace.cp_offset() == 0) {
2421 // The start of input counts as a non-word character, so the question is 2387 // The start of input counts as a non-word character, so the question is
2422 // decided if we are at the start. 2388 // decided if we are at the start.
2423 assembler->CheckAtStart(non_word); 2389 assembler->CheckAtStart(non_word);
2424 } 2390 }
2425 // We already checked that we are not at the start of input so it must be 2391 // We already checked that we are not at the start of input so it must be
2426 // OK to load the previous character. 2392 // OK to load the previous character.
2427 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); 2393 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
2428 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); 2394 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2429 2395
2430 assembler->BindBlock(&fall_through); 2396 assembler->Bind(&fall_through);
2431 on_success()->Emit(compiler, &new_trace); 2397 on_success()->Emit(compiler, &new_trace);
2432 } 2398 }
2433 2399
2434 2400
2435 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 2401 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2436 RegExpCompiler* compiler, 2402 RegExpCompiler* compiler,
2437 intptr_t filled_in, 2403 int filled_in,
2438 bool not_at_start) { 2404 bool not_at_start) {
2439 if (assertion_type_ == AT_START && not_at_start) { 2405 if (assertion_type_ == AT_START && not_at_start) {
2440 details->set_cannot_match(); 2406 details->set_cannot_match();
2441 return; 2407 return;
2442 } 2408 }
2443 return on_success()->GetQuickCheckDetails(details, 2409 return on_success()->GetQuickCheckDetails(details,
2444 compiler, 2410 compiler,
2445 filled_in, 2411 filled_in,
2446 not_at_start); 2412 not_at_start);
2447 } 2413 }
2448 2414
2449 2415
2450 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2416 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2451 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2417 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2452 switch (assertion_type_) { 2418 switch (assertion_type_) {
2453 case AT_END: { 2419 case AT_END: {
2454 BlockLabel ok; 2420 Label ok;
2455 assembler->CheckPosition(trace->cp_offset(), &ok); 2421 assembler->CheckPosition(trace->cp_offset(), &ok);
2456 assembler->GoTo(trace->backtrack()); 2422 assembler->GoTo(trace->backtrack());
2457 assembler->BindBlock(&ok); 2423 assembler->Bind(&ok);
2458 break; 2424 break;
2459 } 2425 }
2460 case AT_START: { 2426 case AT_START: {
2461 if (trace->at_start() == Trace::FALSE_VALUE) { 2427 if (trace->at_start() == Trace::FALSE_VALUE) {
2462 assembler->GoTo(trace->backtrack()); 2428 assembler->GoTo(trace->backtrack());
2463 return; 2429 return;
2464 } 2430 }
2465 if (trace->at_start() == Trace::UNKNOWN) { 2431 if (trace->at_start() == Trace::UNKNOWN) {
2466 assembler->CheckNotAtStart(trace->backtrack()); 2432 assembler->CheckNotAtStart(trace->backtrack());
2467 Trace at_start_trace = *trace; 2433 Trace at_start_trace = *trace;
2468 at_start_trace.set_at_start(true); 2434 at_start_trace.set_at_start(true);
2469 on_success()->Emit(compiler, &at_start_trace); 2435 on_success()->Emit(compiler, &at_start_trace);
2470 return; 2436 return;
2471 } 2437 }
2472 } 2438 }
2473 break; 2439 break;
2474 case AFTER_NEWLINE: 2440 case AFTER_NEWLINE:
2475 EmitHat(compiler, on_success(), trace); 2441 EmitHat(compiler, on_success(), trace);
2476 return; 2442 return;
2477 case AT_BOUNDARY: 2443 case AT_BOUNDARY:
2478 case AT_NON_BOUNDARY: { 2444 case AT_NON_BOUNDARY: {
2479 EmitBoundaryCheck(compiler, trace); 2445 EmitBoundaryCheck(compiler, trace);
2480 return; 2446 return;
2481 } 2447 }
2482 } 2448 }
2483 on_success()->Emit(compiler, trace); 2449 on_success()->Emit(compiler, trace);
2484 } 2450 }
2485 2451
2486 2452
2487 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { 2453 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2488 if (quick_check == NULL) return false; 2454 if (quick_check == NULL) return false;
2489 if (offset >= quick_check->characters()) return false; 2455 if (offset >= quick_check->characters()) return false;
2490 return quick_check->positions(offset)->determines_perfectly; 2456 return quick_check->positions(offset)->determines_perfectly;
2491 } 2457 }
2492 2458
2493 2459
2494 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { 2460 static void UpdateBoundsCheck(int index, int* checked_up_to) {
2495 if (index > *checked_up_to) { 2461 if (index > *checked_up_to) {
2496 *checked_up_to = index; 2462 *checked_up_to = index;
2497 } 2463 }
2498 } 2464 }
2499 2465
2500 2466
2501 // We call this repeatedly to generate code for each pass over the text node. 2467 // We call this repeatedly to generate code for each pass over the text node.
2502 // The passes are in increasing order of difficulty because we hope one 2468 // The passes are in increasing order of difficulty because we hope one
2503 // of the first passes will fail in which case we are saved the work of the 2469 // of the first passes will fail in which case we are saved the work of the
2504 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2470 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
(...skipping 20 matching lines...) Expand all
2525 // order to get to the code we are now generating. The quick check can involve 2491 // order to get to the code we are now generating. The quick check can involve
2526 // loading characters, which means we do not need to recheck the bounds 2492 // loading characters, which means we do not need to recheck the bounds
2527 // up to the limit the quick check already checked. In addition the quick 2493 // up to the limit the quick check already checked. In addition the quick
2528 // check can have involved a mask and compare operation which may simplify 2494 // check can have involved a mask and compare operation which may simplify
2529 // or obviate the need for further checks at some character positions. 2495 // or obviate the need for further checks at some character positions.
2530 void TextNode::TextEmitPass(RegExpCompiler* compiler, 2496 void TextNode::TextEmitPass(RegExpCompiler* compiler,
2531 TextEmitPassType pass, 2497 TextEmitPassType pass,
2532 bool preloaded, 2498 bool preloaded,
2533 Trace* trace, 2499 Trace* trace,
2534 bool first_element_checked, 2500 bool first_element_checked,
2535 intptr_t* checked_up_to) { 2501 int* checked_up_to) {
2536 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2502 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2503 Isolate* isolate = assembler->zone()->isolate();
2537 bool one_byte = compiler->one_byte(); 2504 bool one_byte = compiler->one_byte();
2538 BlockLabel* backtrack = trace->backtrack(); 2505 Label* backtrack = trace->backtrack();
2539 QuickCheckDetails* quick_check = trace->quick_check_performed(); 2506 QuickCheckDetails* quick_check = trace->quick_check_performed();
2540 intptr_t element_count = elms_->length(); 2507 int element_count = elms_->length();
2541 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 2508 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2542 TextElement elm = elms_->At(i); 2509 TextElement elm = elms_->at(i);
2543 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset(); 2510 int cp_offset = trace->cp_offset() + elm.cp_offset();
2544 if (elm.text_type() == TextElement::ATOM) { 2511 if (elm.text_type() == TextElement::ATOM) {
2545 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); 2512 Vector<const uc16> quarks = elm.atom()->data();
2546 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) { 2513 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2547 if (first_element_checked && i == 0 && j == 0) continue; 2514 if (first_element_checked && i == 0 && j == 0) continue;
2548 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 2515 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2549 EmitCharacterFunction* emit_function = NULL; 2516 EmitCharacterFunction* emit_function = NULL;
2550 switch (pass) { 2517 switch (pass) {
2551 case NON_LATIN1_MATCH: 2518 case NON_LATIN1_MATCH:
2552 ASSERT(one_byte); 2519 DCHECK(one_byte);
2553 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) { 2520 if (quarks[j] > String::kMaxOneByteCharCode) {
2554 assembler->GoTo(backtrack); 2521 assembler->GoTo(backtrack);
2555 return; 2522 return;
2556 } 2523 }
2557 break; 2524 break;
2558 case NON_LETTER_CHARACTER_MATCH: 2525 case NON_LETTER_CHARACTER_MATCH:
2559 emit_function = &EmitAtomNonLetter; 2526 emit_function = &EmitAtomNonLetter;
2560 break; 2527 break;
2561 case SIMPLE_CHARACTER_MATCH: 2528 case SIMPLE_CHARACTER_MATCH:
2562 emit_function = &EmitSimpleCharacter; 2529 emit_function = &EmitSimpleCharacter;
2563 break; 2530 break;
2564 case CASE_CHARACTER_MATCH: 2531 case CASE_CHARACTER_MATCH:
2565 emit_function = &EmitAtomLetter; 2532 emit_function = &EmitAtomLetter;
2566 break; 2533 break;
2567 default: 2534 default:
2568 break; 2535 break;
2569 } 2536 }
2570 if (emit_function != NULL) { 2537 if (emit_function != NULL) {
2571 bool bound_checked = emit_function(I, 2538 bool bound_checked = emit_function(isolate,
2572 compiler, 2539 compiler,
2573 quarks->At(j), 2540 quarks[j],
2574 backtrack, 2541 backtrack,
2575 cp_offset + j, 2542 cp_offset + j,
2576 *checked_up_to < cp_offset + j, 2543 *checked_up_to < cp_offset + j,
2577 preloaded); 2544 preloaded);
2578 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 2545 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2579 } 2546 }
2580 } 2547 }
2581 } else { 2548 } else {
2582 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); 2549 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
2583 if (pass == CHARACTER_CLASS_MATCH) { 2550 if (pass == CHARACTER_CLASS_MATCH) {
2584 if (first_element_checked && i == 0) continue; 2551 if (first_element_checked && i == 0) continue;
2585 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 2552 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2586 RegExpCharacterClass* cc = elm.char_class(); 2553 RegExpCharacterClass* cc = elm.char_class();
2587 EmitCharClass(assembler, 2554 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
2588 cc, 2555 *checked_up_to < cp_offset, preloaded, zone());
2589 one_byte,
2590 backtrack,
2591 cp_offset,
2592 *checked_up_to < cp_offset,
2593 preloaded,
2594 I);
2595 UpdateBoundsCheck(cp_offset, checked_up_to); 2556 UpdateBoundsCheck(cp_offset, checked_up_to);
2596 } 2557 }
2597 } 2558 }
2598 } 2559 }
2599 } 2560 }
2600 2561
2601 2562
2602 intptr_t TextNode::Length() { 2563 int TextNode::Length() {
2603 TextElement elm = elms_->Last(); 2564 TextElement elm = elms_->last();
2604 ASSERT(elm.cp_offset() >= 0); 2565 DCHECK(elm.cp_offset() >= 0);
2605 return elm.cp_offset() + elm.length(); 2566 return elm.cp_offset() + elm.length();
2606 } 2567 }
2607 2568
2608 2569
2609 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { 2570 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
2610 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); 2571 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
2611 if (ignore_case) { 2572 if (ignore_case) {
2612 return pass == SIMPLE_CHARACTER_MATCH; 2573 return pass == SIMPLE_CHARACTER_MATCH;
2613 } else { 2574 } else {
2614 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 2575 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
2615 } 2576 }
2616 } 2577 }
2617 2578
2618 2579
2619 // This generates the code to match a text node. A text node can contain 2580 // This generates the code to match a text node. A text node can contain
2620 // straight character sequences (possibly to be matched in a case-independent 2581 // straight character sequences (possibly to be matched in a case-independent
2621 // way) and character classes. For efficiency we do not do this in a single 2582 // way) and character classes. For efficiency we do not do this in a single
2622 // pass from left to right. Instead we pass over the text node several times, 2583 // pass from left to right. Instead we pass over the text node several times,
2623 // emitting code for some character positions every time. See the comment on 2584 // emitting code for some character positions every time. See the comment on
2624 // TextEmitPass for details. 2585 // TextEmitPass for details.
2625 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2586 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2626 LimitResult limit_result = LimitVersions(compiler, trace); 2587 LimitResult limit_result = LimitVersions(compiler, trace);
2627 if (limit_result == DONE) return; 2588 if (limit_result == DONE) return;
2628 ASSERT(limit_result == CONTINUE); 2589 DCHECK(limit_result == CONTINUE);
2629 2590
2630 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { 2591 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2631 compiler->SetRegExpTooBig(); 2592 compiler->SetRegExpTooBig();
2632 return; 2593 return;
2633 } 2594 }
2634 2595
2635 if (compiler->one_byte()) { 2596 if (compiler->one_byte()) {
2636 intptr_t dummy = 0; 2597 int dummy = 0;
2637 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); 2598 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
2638 } 2599 }
2639 2600
2640 bool first_elt_done = false; 2601 bool first_elt_done = false;
2641 intptr_t bound_checked_to = trace->cp_offset() - 1; 2602 int bound_checked_to = trace->cp_offset() - 1;
2642 bound_checked_to += trace->bound_checked_up_to(); 2603 bound_checked_to += trace->bound_checked_up_to();
2643 2604
2644 // If a character is preloaded into the current character register then 2605 // If a character is preloaded into the current character register then
2645 // check that now. 2606 // check that now.
2646 if (trace->characters_preloaded() == 1) { 2607 if (trace->characters_preloaded() == 1) {
2647 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { 2608 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2648 if (!SkipPass(pass, compiler->ignore_case())) { 2609 if (!SkipPass(pass, compiler->ignore_case())) {
2649 TextEmitPass(compiler, 2610 TextEmitPass(compiler,
2650 static_cast<TextEmitPassType>(pass), 2611 static_cast<TextEmitPassType>(pass),
2651 true, 2612 true,
2652 trace, 2613 trace,
2653 false, 2614 false,
2654 &bound_checked_to); 2615 &bound_checked_to);
2655 } 2616 }
2656 } 2617 }
2657 first_elt_done = true; 2618 first_elt_done = true;
2658 } 2619 }
2659 2620
2660 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { 2621 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2661 if (!SkipPass(pass, compiler->ignore_case())) { 2622 if (!SkipPass(pass, compiler->ignore_case())) {
2662 TextEmitPass(compiler, 2623 TextEmitPass(compiler,
2663 static_cast<TextEmitPassType>(pass), 2624 static_cast<TextEmitPassType>(pass),
2664 false, 2625 false,
2665 trace, 2626 trace,
2666 first_elt_done, 2627 first_elt_done,
2667 &bound_checked_to); 2628 &bound_checked_to);
2668 } 2629 }
2669 } 2630 }
2670 2631
2671 Trace successor_trace(*trace); 2632 Trace successor_trace(*trace);
2672 successor_trace.set_at_start(false); 2633 successor_trace.set_at_start(false);
2673 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); 2634 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
2674 RecursionCheck rc(compiler); 2635 RecursionCheck rc(compiler);
2675 on_success()->Emit(compiler, &successor_trace); 2636 on_success()->Emit(compiler, &successor_trace);
2676 } 2637 }
2677 2638
2678 2639
2679 void Trace::InvalidateCurrentCharacter() { 2640 void Trace::InvalidateCurrentCharacter() {
2680 characters_preloaded_ = 0; 2641 characters_preloaded_ = 0;
2681 } 2642 }
2682 2643
2683 2644
2684 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, 2645 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2685 RegExpCompiler* compiler) { 2646 DCHECK(by > 0);
2686 ASSERT(by > 0);
2687 // We don't have an instruction for shifting the current character register 2647 // We don't have an instruction for shifting the current character register
2688 // down or for using a shifted value for anything so lets just forget that 2648 // down or for using a shifted value for anything so lets just forget that
2689 // we preloaded any characters into it. 2649 // we preloaded any characters into it.
2690 characters_preloaded_ = 0; 2650 characters_preloaded_ = 0;
2691 // Adjust the offsets of the quick check performed information. This 2651 // Adjust the offsets of the quick check performed information. This
2692 // information is used to find out what we already determined about the 2652 // information is used to find out what we already determined about the
2693 // characters by means of mask and compare. 2653 // characters by means of mask and compare.
2694 quick_check_performed_.Advance(by, compiler->one_byte()); 2654 quick_check_performed_.Advance(by, compiler->one_byte());
2695 cp_offset_ += by; 2655 cp_offset_ += by;
2696 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 2656 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2697 compiler->SetRegExpTooBig(); 2657 compiler->SetRegExpTooBig();
2698 cp_offset_ = 0; 2658 cp_offset_ = 0;
2699 } 2659 }
2700 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), 2660 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
2701 bound_checked_up_to_ - by);
2702 } 2661 }
2703 2662
2704 2663
2705 void TextNode::MakeCaseIndependent(bool is_one_byte) { 2664 void TextNode::MakeCaseIndependent(bool is_one_byte) {
2706 intptr_t element_count = elms_->length(); 2665 int element_count = elms_->length();
2707 for (intptr_t i = 0; i < element_count; i++) { 2666 for (int i = 0; i < element_count; i++) {
2708 TextElement elm = elms_->At(i); 2667 TextElement elm = elms_->at(i);
2709 if (elm.text_type() == TextElement::CHAR_CLASS) { 2668 if (elm.text_type() == TextElement::CHAR_CLASS) {
2710 RegExpCharacterClass* cc = elm.char_class(); 2669 RegExpCharacterClass* cc = elm.char_class();
2711 // None of the standard character classes is different in the case 2670 // None of the standard character classes is different in the case
2712 // independent case and it slows us down if we don't know that. 2671 // independent case and it slows us down if we don't know that.
2713 if (cc->is_standard()) continue; 2672 if (cc->is_standard(zone())) continue;
2714 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); 2673 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2715 intptr_t range_count = ranges->length(); 2674 int range_count = ranges->length();
2716 for (intptr_t j = 0; j < range_count; j++) { 2675 for (int j = 0; j < range_count; j++) {
2717 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, I); 2676 ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
2718 } 2677 }
2719 } 2678 }
2720 } 2679 }
2721 } 2680 }
2722 2681
2723 2682
2724 intptr_t TextNode::GreedyLoopTextLength() { 2683 int TextNode::GreedyLoopTextLength() {
2725 TextElement elm = elms_->At(elms_->length() - 1); 2684 TextElement elm = elms_->at(elms_->length() - 1);
2726 return elm.cp_offset() + elm.length(); 2685 return elm.cp_offset() + elm.length();
2727 } 2686 }
2728 2687
2729 2688
2730 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 2689 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2731 RegExpCompiler* compiler) { 2690 RegExpCompiler* compiler) {
2732 if (elms_->length() != 1) return NULL; 2691 if (elms_->length() != 1) return NULL;
2733 TextElement elm = elms_->At(0); 2692 TextElement elm = elms_->at(0);
2734 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 2693 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
2735 RegExpCharacterClass* node = elm.char_class(); 2694 RegExpCharacterClass* node = elm.char_class();
2736 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); 2695 ZoneList<CharacterRange>* ranges = node->ranges(zone());
2737 if (!CharacterRange::IsCanonical(ranges)) { 2696 if (!CharacterRange::IsCanonical(ranges)) {
2738 CharacterRange::Canonicalize(ranges); 2697 CharacterRange::Canonicalize(ranges);
2739 } 2698 }
2740 if (node->is_negated()) { 2699 if (node->is_negated()) {
2741 return ranges->length() == 0 ? on_success() : NULL; 2700 return ranges->length() == 0 ? on_success() : NULL;
2742 } 2701 }
2743 if (ranges->length() != 1) return NULL; 2702 if (ranges->length() != 1) return NULL;
2744 uint32_t max_char; 2703 uint32_t max_char;
2745 if (compiler->one_byte()) { 2704 if (compiler->one_byte()) {
2746 max_char = Symbols::kMaxOneCharCodeSymbol; 2705 max_char = String::kMaxOneByteCharCode;
2747 } else { 2706 } else {
2748 max_char = Utf16::kMaxCodeUnit; 2707 max_char = String::kMaxUtf16CodeUnit;
2749 } 2708 }
2750 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; 2709 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
2751 } 2710 }
2752 2711
2753 2712
2754 // Finds the fixed match length of a sequence of nodes that goes from 2713 // Finds the fixed match length of a sequence of nodes that goes from
2755 // this alternative and back to this choice node. If there are variable 2714 // this alternative and back to this choice node. If there are variable
2756 // length nodes or other complications in the way then return a sentinel 2715 // length nodes or other complications in the way then return a sentinel
2757 // value indicating that a greedy loop cannot be constructed. 2716 // value indicating that a greedy loop cannot be constructed.
2758 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( 2717 int ChoiceNode::GreedyLoopTextLengthForAlternative(
2759 GuardedAlternative* alternative) { 2718 GuardedAlternative* alternative) {
2760 intptr_t length = 0; 2719 int length = 0;
2761 RegExpNode* node = alternative->node(); 2720 RegExpNode* node = alternative->node();
2762 // Later we will generate code for all these text nodes using recursion 2721 // Later we will generate code for all these text nodes using recursion
2763 // so we have to limit the max number. 2722 // so we have to limit the max number.
2764 intptr_t recursion_depth = 0; 2723 int recursion_depth = 0;
2765 while (node != this) { 2724 while (node != this) {
2766 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 2725 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2767 return kNodeIsTooComplexForGreedyLoops; 2726 return kNodeIsTooComplexForGreedyLoops;
2768 } 2727 }
2769 intptr_t node_length = node->GreedyLoopTextLength(); 2728 int node_length = node->GreedyLoopTextLength();
2770 if (node_length == kNodeIsTooComplexForGreedyLoops) { 2729 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2771 return kNodeIsTooComplexForGreedyLoops; 2730 return kNodeIsTooComplexForGreedyLoops;
2772 } 2731 }
2773 length += node_length; 2732 length += node_length;
2774 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 2733 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2775 node = seq_node->on_success(); 2734 node = seq_node->on_success();
2776 } 2735 }
2777 return length; 2736 return length;
2778 } 2737 }
2779 2738
2780 2739
2781 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 2740 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2782 ASSERT(loop_node_ == NULL); 2741 DCHECK_EQ(loop_node_, NULL);
2783 AddAlternative(alt); 2742 AddAlternative(alt);
2784 loop_node_ = alt.node(); 2743 loop_node_ = alt.node();
2785 } 2744 }
2786 2745
2787 2746
2788 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 2747 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2789 ASSERT(continue_node_ == NULL); 2748 DCHECK_EQ(continue_node_, NULL);
2790 AddAlternative(alt); 2749 AddAlternative(alt);
2791 continue_node_ = alt.node(); 2750 continue_node_ = alt.node();
2792 } 2751 }
2793 2752
2794 2753
2795 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2754 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2796 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2755 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2797 if (trace->stop_node() == this) { 2756 if (trace->stop_node() == this) {
2798 // Back edge of greedy optimized loop node graph. 2757 // Back edge of greedy optimized loop node graph.
2799 intptr_t text_length = 2758 int text_length =
2800 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); 2759 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2801 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); 2760 DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
2802 // Update the counter-based backtracking info on the stack. This is an 2761 // Update the counter-based backtracking info on the stack. This is an
2803 // optimization for greedy loops (see below). 2762 // optimization for greedy loops (see below).
2804 ASSERT(trace->cp_offset() == text_length); 2763 DCHECK(trace->cp_offset() == text_length);
2805 macro_assembler->AdvanceCurrentPosition(text_length); 2764 macro_assembler->AdvanceCurrentPosition(text_length);
2806 macro_assembler->GoTo(trace->loop_label()); 2765 macro_assembler->GoTo(trace->loop_label());
2807 return; 2766 return;
2808 } 2767 }
2809 ASSERT(trace->stop_node() == NULL); 2768 DCHECK(trace->stop_node() == NULL);
2810 if (!trace->is_trivial()) { 2769 if (!trace->is_trivial()) {
2811 trace->Flush(compiler, this); 2770 trace->Flush(compiler, this);
2812 return; 2771 return;
2813 } 2772 }
2814 ChoiceNode::Emit(compiler, trace); 2773 ChoiceNode::Emit(compiler, trace);
2815 } 2774 }
2816 2775
2817 2776
2818 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 2777 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2819 intptr_t eats_at_least) { 2778 int eats_at_least) {
2820 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), 2779 int preload_characters = Min(4, eats_at_least);
2821 eats_at_least);
2822 if (compiler->macro_assembler()->CanReadUnaligned()) { 2780 if (compiler->macro_assembler()->CanReadUnaligned()) {
2823 bool one_byte = compiler->one_byte(); 2781 bool one_byte = compiler->one_byte();
2824 if (one_byte) { 2782 if (one_byte) {
2825 if (preload_characters > 4) preload_characters = 4; 2783 if (preload_characters > 4) preload_characters = 4;
2826 // We can't preload 3 characters because there is no machine instruction 2784 // We can't preload 3 characters because there is no machine instruction
2827 // to do that. We can't just load 4 because we could be reading 2785 // to do that. We can't just load 4 because we could be reading
2828 // beyond the end of the string, which could cause a memory fault. 2786 // beyond the end of the string, which could cause a memory fault.
2829 if (preload_characters == 3) preload_characters = 2; 2787 if (preload_characters == 3) preload_characters = 2;
2830 } else { 2788 } else {
2831 if (preload_characters > 2) preload_characters = 2; 2789 if (preload_characters > 2) preload_characters = 2;
2832 } 2790 }
2833 } else { 2791 } else {
2834 if (preload_characters > 1) preload_characters = 1; 2792 if (preload_characters > 1) preload_characters = 1;
2835 } 2793 }
2836 return preload_characters; 2794 return preload_characters;
2837 } 2795 }
2838 2796
2839 2797
2840 // This structure is used when generating the alternatives in a choice node. It 2798 // This class is used when generating the alternatives in a choice node. It
2841 // records the way the alternative is being code generated. 2799 // records the way the alternative is being code generated.
2842 struct AlternativeGeneration { 2800 class AlternativeGeneration: public Malloced {
2801 public:
2843 AlternativeGeneration() 2802 AlternativeGeneration()
2844 : possible_success(), 2803 : possible_success(),
2845 expects_preload(false), 2804 expects_preload(false),
2846 after(), 2805 after(),
2847 quick_check_details() { } 2806 quick_check_details() { }
2848 BlockLabel possible_success; 2807 Label possible_success;
2849 bool expects_preload; 2808 bool expects_preload;
2850 BlockLabel after; 2809 Label after;
2851 QuickCheckDetails quick_check_details; 2810 QuickCheckDetails quick_check_details;
2852 }; 2811 };
2853 2812
2854 2813
2855 // Creates a list of AlternativeGenerations. If the list has a reasonable 2814 // Creates a list of AlternativeGenerations. If the list has a reasonable
2856 // size then it is on the stack, otherwise the excess is on the heap. 2815 // size then it is on the stack, otherwise the excess is on the heap.
2857 class AlternativeGenerationList { 2816 class AlternativeGenerationList {
2858 public: 2817 public:
2859 explicit AlternativeGenerationList(intptr_t count) 2818 AlternativeGenerationList(int count, Zone* zone)
2860 : alt_gens_(count) { 2819 : alt_gens_(count, zone) {
2861 for (intptr_t i = 0; i < count && i < kAFew; i++) { 2820 for (int i = 0; i < count && i < kAFew; i++) {
2862 alt_gens_.Add(a_few_alt_gens_ + i); 2821 alt_gens_.Add(a_few_alt_gens_ + i, zone);
2863 } 2822 }
2864 for (intptr_t i = kAFew; i < count; i++) { 2823 for (int i = kAFew; i < count; i++) {
2865 alt_gens_.Add(new AlternativeGeneration()); 2824 alt_gens_.Add(new AlternativeGeneration(), zone);
2866 } 2825 }
2867 } 2826 }
2868 ~AlternativeGenerationList() { 2827 ~AlternativeGenerationList() {
2869 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { 2828 for (int i = kAFew; i < alt_gens_.length(); i++) {
2870 delete alt_gens_[i]; 2829 delete alt_gens_[i];
2871 alt_gens_[i] = NULL; 2830 alt_gens_[i] = NULL;
2872 } 2831 }
2873 } 2832 }
2874 2833
2875 AlternativeGeneration* at(intptr_t i) { 2834 AlternativeGeneration* at(int i) {
2876 return alt_gens_[i]; 2835 return alt_gens_[i];
2877 } 2836 }
2878 2837
2879 private: 2838 private:
2880 static const intptr_t kAFew = 10; 2839 static const int kAFew = 10;
2881 GrowableArray<AlternativeGeneration*> alt_gens_; 2840 ZoneList<AlternativeGeneration*> alt_gens_;
2882 AlternativeGeneration a_few_alt_gens_[kAFew]; 2841 AlternativeGeneration a_few_alt_gens_[kAFew];
2883
2884 DISALLOW_ALLOCATION();
2885 }; 2842 };
2886 2843
2887 2844
2888 // The '2' variant is inclusive from and exclusive to. 2845 // The '2' variant is has inclusive from and exclusive to.
2889 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 2846 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
2890 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. 2847 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
2891 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 2848 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
2892 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 2849 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
2893 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 2850 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
2894 0xFEFF, 0xFF00, 0x10000 }; 2851 0xFEFF, 0xFF00, 0x10000 };
2895 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); 2852 static const int kSpaceRangeCount = arraysize(kSpaceRanges);
2896 static const intptr_t kWordRanges[] = { 2853
2854 static const int kWordRanges[] = {
2897 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; 2855 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
2898 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); 2856 static const int kWordRangeCount = arraysize(kWordRanges);
2899 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; 2857 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
2900 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); 2858 static const int kDigitRangeCount = arraysize(kDigitRanges);
2901 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; 2859 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
2902 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); 2860 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
2903 static const intptr_t kLineTerminatorRanges[] = { 2861 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
2904 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; 2862 0x2028, 0x202A, 0x10000 };
2905 static const intptr_t kLineTerminatorRangeCount = 2863 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
2906 ARRAY_SIZE(kLineTerminatorRanges);
2907 2864
2908 2865
2909 void BoyerMoorePositionInfo::Set(intptr_t character) { 2866 void BoyerMoorePositionInfo::Set(int character) {
2910 SetInterval(Interval(character, character)); 2867 SetInterval(Interval(character, character));
2911 } 2868 }
2912 2869
2913 2870
2914 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 2871 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2915 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 2872 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
2916 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 2873 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2917 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 2874 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
2918 surrogate_ = 2875 surrogate_ =
2919 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); 2876 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
2920 if (interval.to() - interval.from() >= kMapSize - 1) { 2877 if (interval.to() - interval.from() >= kMapSize - 1) {
2921 if (map_count_ != kMapSize) { 2878 if (map_count_ != kMapSize) {
2922 map_count_ = kMapSize; 2879 map_count_ = kMapSize;
2923 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; 2880 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
2924 } 2881 }
2925 return; 2882 return;
2926 } 2883 }
2927 for (intptr_t i = interval.from(); i <= interval.to(); i++) { 2884 for (int i = interval.from(); i <= interval.to(); i++) {
2928 intptr_t mod_character = (i & kMask); 2885 int mod_character = (i & kMask);
2929 if (!map_->At(mod_character)) { 2886 if (!map_->at(mod_character)) {
2930 map_count_++; 2887 map_count_++;
2931 (*map_)[mod_character] = true; 2888 map_->at(mod_character) = true;
2932 } 2889 }
2933 if (map_count_ == kMapSize) return; 2890 if (map_count_ == kMapSize) return;
2934 } 2891 }
2935 } 2892 }
2936 2893
2937 2894
2938 void BoyerMoorePositionInfo::SetAll() { 2895 void BoyerMoorePositionInfo::SetAll() {
2939 s_ = w_ = d_ = kLatticeUnknown; 2896 s_ = w_ = d_ = kLatticeUnknown;
2940 if (map_count_ != kMapSize) { 2897 if (map_count_ != kMapSize) {
2941 map_count_ = kMapSize; 2898 map_count_ = kMapSize;
2942 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; 2899 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
2943 } 2900 }
2944 } 2901 }
2945 2902
2946 2903
2947 BoyerMooreLookahead::BoyerMooreLookahead( 2904 BoyerMooreLookahead::BoyerMooreLookahead(
2948 intptr_t length, RegExpCompiler* compiler, Isolate* isolate) 2905 int length, RegExpCompiler* compiler, Zone* zone)
2949 : length_(length), 2906 : length_(length),
2950 compiler_(compiler) { 2907 compiler_(compiler) {
2951 if (compiler->one_byte()) { 2908 if (compiler->one_byte()) {
2952 max_char_ = Symbols::kMaxOneCharCodeSymbol; 2909 max_char_ = String::kMaxOneByteCharCode;
2953 } else { 2910 } else {
2954 max_char_ = Utf16::kMaxCodeUnit; 2911 max_char_ = String::kMaxUtf16CodeUnit;
2955 } 2912 }
2956 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); 2913 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
2957 for (intptr_t i = 0; i < length; i++) { 2914 for (int i = 0; i < length; i++) {
2958 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate)); 2915 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
2959 } 2916 }
2960 } 2917 }
2961 2918
2962 2919
2963 // Find the longest range of lookahead that has the fewest number of different 2920 // Find the longest range of lookahead that has the fewest number of different
2964 // characters that can occur at a given position. Since we are optimizing two 2921 // characters that can occur at a given position. Since we are optimizing two
2965 // different parameters at once this is a tradeoff. 2922 // different parameters at once this is a tradeoff.
2966 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { 2923 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
2967 intptr_t biggest_points = 0; 2924 int biggest_points = 0;
2968 // If more than 32 characters out of 128 can occur it is unlikely that we can 2925 // If more than 32 characters out of 128 can occur it is unlikely that we can
2969 // be lucky enough to step forwards much of the time. 2926 // be lucky enough to step forwards much of the time.
2970 const intptr_t kMaxMax = 32; 2927 const int kMaxMax = 32;
2971 for (intptr_t max_number_of_chars = 4; 2928 for (int max_number_of_chars = 4;
2972 max_number_of_chars < kMaxMax; 2929 max_number_of_chars < kMaxMax;
2973 max_number_of_chars *= 2) { 2930 max_number_of_chars *= 2) {
2974 biggest_points = 2931 biggest_points =
2975 FindBestInterval(max_number_of_chars, biggest_points, from, to); 2932 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2976 } 2933 }
2977 if (biggest_points == 0) return false; 2934 if (biggest_points == 0) return false;
2978 return true; 2935 return true;
2979 } 2936 }
2980 2937
2981 2938
2982 // Find the highest-points range between 0 and length_ where the character 2939 // Find the highest-points range between 0 and length_ where the character
2983 // information is not too vague. 'Too vague' means that there are more than 2940 // information is not too vague. 'Too vague' means that there are more than
2984 // max_number_of_chars that can occur at this position. Calculates the number 2941 // max_number_of_chars that can occur at this position. Calculates the number
2985 // of points as the product of width-of-the-range and 2942 // of points as the product of width-of-the-range and
2986 // probability-of-finding-one-of-the-characters, where the probability is 2943 // probability-of-finding-one-of-the-characters, where the probability is
2987 // calculated using the frequency distribution of the sample subject string. 2944 // calculated using the frequency distribution of the sample subject string.
2988 intptr_t BoyerMooreLookahead::FindBestInterval( 2945 int BoyerMooreLookahead::FindBestInterval(
2989 intptr_t max_number_of_chars, 2946 int max_number_of_chars, int old_biggest_points, int* from, int* to) {
2990 intptr_t old_biggest_points, 2947 int biggest_points = old_biggest_points;
2991 intptr_t* from, 2948 static const int kSize = RegExpMacroAssembler::kTableSize;
2992 intptr_t* to) { 2949 for (int i = 0; i < length_; ) {
2993 intptr_t biggest_points = old_biggest_points;
2994 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2995 for (intptr_t i = 0; i < length_; ) {
2996 while (i < length_ && Count(i) > max_number_of_chars) i++; 2950 while (i < length_ && Count(i) > max_number_of_chars) i++;
2997 if (i == length_) break; 2951 if (i == length_) break;
2998 intptr_t remembered_from = i; 2952 int remembered_from = i;
2999 bool union_map[kSize]; 2953 bool union_map[kSize];
3000 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; 2954 for (int j = 0; j < kSize; j++) union_map[j] = false;
3001 while (i < length_ && Count(i) <= max_number_of_chars) { 2955 while (i < length_ && Count(i) <= max_number_of_chars) {
3002 BoyerMoorePositionInfo* map = bitmaps_->At(i); 2956 BoyerMoorePositionInfo* map = bitmaps_->at(i);
3003 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); 2957 for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
3004 i++; 2958 i++;
3005 } 2959 }
3006 intptr_t frequency = 0; 2960 int frequency = 0;
3007 for (intptr_t j = 0; j < kSize; j++) { 2961 for (int j = 0; j < kSize; j++) {
3008 if (union_map[j]) { 2962 if (union_map[j]) {
3009 // Add 1 to the frequency to give a small per-character boost for 2963 // Add 1 to the frequency to give a small per-character boost for
3010 // the cases where our sampling is not good enough and many 2964 // the cases where our sampling is not good enough and many
3011 // characters have a frequency of zero. This means the frequency 2965 // characters have a frequency of zero. This means the frequency
3012 // can theoretically be up to 2*kSize though we treat it mostly as 2966 // can theoretically be up to 2*kSize though we treat it mostly as
3013 // a fraction of kSize. 2967 // a fraction of kSize.
3014 frequency += compiler_->frequency_collator()->Frequency(j) + 1; 2968 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3015 } 2969 }
3016 } 2970 }
3017 // We use the probability of skipping times the distance we are skipping to 2971 // We use the probability of skipping times the distance we are skipping to
3018 // judge the effectiveness of this. Actually we have a cut-off: By 2972 // judge the effectiveness of this. Actually we have a cut-off: By
3019 // dividing by 2 we switch off the skipping if the probability of skipping 2973 // dividing by 2 we switch off the skipping if the probability of skipping
3020 // is less than 50%. This is because the multibyte mask-and-compare 2974 // is less than 50%. This is because the multibyte mask-and-compare
3021 // skipping in quickcheck is more likely to do well on this case. 2975 // skipping in quickcheck is more likely to do well on this case.
3022 bool in_quickcheck_range = ((i - remembered_from < 4) || 2976 bool in_quickcheck_range =
3023 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); 2977 ((i - remembered_from < 4) ||
2978 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
3024 // Called 'probability' but it is only a rough estimate and can actually 2979 // Called 'probability' but it is only a rough estimate and can actually
3025 // be outside the 0-kSize range. 2980 // be outside the 0-kSize range.
3026 intptr_t probability = 2981 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3027 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; 2982 int points = (i - remembered_from) * probability;
3028 intptr_t points = (i - remembered_from) * probability;
3029 if (points > biggest_points) { 2983 if (points > biggest_points) {
3030 *from = remembered_from; 2984 *from = remembered_from;
3031 *to = i - 1; 2985 *to = i - 1;
3032 biggest_points = points; 2986 biggest_points = points;
3033 } 2987 }
3034 } 2988 }
3035 return biggest_points; 2989 return biggest_points;
3036 } 2990 }
3037 2991
3038 2992
3039 // Take all the characters that will not prevent a successful match if they 2993 // Take all the characters that will not prevent a successful match if they
3040 // occur in the subject string in the range between min_lookahead and 2994 // occur in the subject string in the range between min_lookahead and
3041 // max_lookahead (inclusive) measured from the current position. If the 2995 // max_lookahead (inclusive) measured from the current position. If the
3042 // character at max_lookahead offset is not one of these characters, then we 2996 // character at max_lookahead offset is not one of these characters, then we
3043 // can safely skip forwards by the number of characters in the range. 2997 // can safely skip forwards by the number of characters in the range.
3044 intptr_t BoyerMooreLookahead::GetSkipTable( 2998 int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
3045 intptr_t min_lookahead, 2999 int max_lookahead,
3046 intptr_t max_lookahead, 3000 Handle<ByteArray> boolean_skip_table) {
3047 const TypedData& boolean_skip_table) { 3001 const int kSize = RegExpMacroAssembler::kTableSize;
3048 const intptr_t kSize = RegExpMacroAssembler::kTableSize;
3049 3002
3050 const intptr_t kSkipArrayEntry = 0; 3003 const int kSkipArrayEntry = 0;
3051 const intptr_t kDontSkipArrayEntry = 1; 3004 const int kDontSkipArrayEntry = 1;
3052 3005
3053 for (intptr_t i = 0; i < kSize; i++) { 3006 for (int i = 0; i < kSize; i++) {
3054 boolean_skip_table.SetUint8(i, kSkipArrayEntry); 3007 boolean_skip_table->set(i, kSkipArrayEntry);
3055 } 3008 }
3056 intptr_t skip = max_lookahead + 1 - min_lookahead; 3009 int skip = max_lookahead + 1 - min_lookahead;
3057 3010
3058 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { 3011 for (int i = max_lookahead; i >= min_lookahead; i--) {
3059 BoyerMoorePositionInfo* map = bitmaps_->At(i); 3012 BoyerMoorePositionInfo* map = bitmaps_->at(i);
3060 for (intptr_t j = 0; j < kSize; j++) { 3013 for (int j = 0; j < kSize; j++) {
3061 if (map->at(j)) { 3014 if (map->at(j)) {
3062 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); 3015 boolean_skip_table->set(j, kDontSkipArrayEntry);
3063 } 3016 }
3064 } 3017 }
3065 } 3018 }
3066 3019
3067 return skip; 3020 return skip;
3068 } 3021 }
3069 3022
3070 3023
3071 // See comment above on the implementation of GetSkipTable. 3024 // See comment above on the implementation of GetSkipTable.
3072 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { 3025 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
3073 const intptr_t kSize = RegExpMacroAssembler::kTableSize; 3026 const int kSize = RegExpMacroAssembler::kTableSize;
3074 3027
3075 intptr_t min_lookahead = 0; 3028 int min_lookahead = 0;
3076 intptr_t max_lookahead = 0; 3029 int max_lookahead = 0;
3077 3030
3078 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; 3031 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
3079 3032
3080 bool found_single_character = false; 3033 bool found_single_character = false;
3081 intptr_t single_character = 0; 3034 int single_character = 0;
3082 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { 3035 for (int i = max_lookahead; i >= min_lookahead; i--) {
3083 BoyerMoorePositionInfo* map = bitmaps_->At(i); 3036 BoyerMoorePositionInfo* map = bitmaps_->at(i);
3084 if (map->map_count() > 1 || 3037 if (map->map_count() > 1 ||
3085 (found_single_character && map->map_count() != 0)) { 3038 (found_single_character && map->map_count() != 0)) {
3086 found_single_character = false; 3039 found_single_character = false;
3087 break; 3040 break;
3088 } 3041 }
3089 for (intptr_t j = 0; j < kSize; j++) { 3042 for (int j = 0; j < kSize; j++) {
3090 if (map->at(j)) { 3043 if (map->at(j)) {
3091 found_single_character = true; 3044 found_single_character = true;
3092 single_character = j; 3045 single_character = j;
3093 break; 3046 break;
3094 } 3047 }
3095 } 3048 }
3096 } 3049 }
3097 3050
3098 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead; 3051 int lookahead_width = max_lookahead + 1 - min_lookahead;
3099 3052
3100 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { 3053 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3101 // The mask-compare can probably handle this better. 3054 // The mask-compare can probably handle this better.
3102 return; 3055 return;
3103 } 3056 }
3104 3057
3105 if (found_single_character) { 3058 if (found_single_character) {
3106 BlockLabel cont, again; 3059 Label cont, again;
3107 masm->BindBlock(&again); 3060 masm->Bind(&again);
3108 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3061 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3109 if (max_char_ > kSize) { 3062 if (max_char_ > kSize) {
3110 masm->CheckCharacterAfterAnd(single_character, 3063 masm->CheckCharacterAfterAnd(single_character,
3111 RegExpMacroAssembler::kTableMask, 3064 RegExpMacroAssembler::kTableMask,
3112 &cont); 3065 &cont);
3113 } else { 3066 } else {
3114 masm->CheckCharacter(single_character, &cont); 3067 masm->CheckCharacter(single_character, &cont);
3115 } 3068 }
3116 masm->AdvanceCurrentPosition(lookahead_width); 3069 masm->AdvanceCurrentPosition(lookahead_width);
3117 masm->GoTo(&again); 3070 masm->GoTo(&again);
3118 masm->BindBlock(&cont); 3071 masm->Bind(&cont);
3119 return; 3072 return;
3120 } 3073 }
3121 3074
3122 const TypedData& boolean_skip_table = TypedData::ZoneHandle( 3075 Factory* factory = masm->zone()->isolate()->factory();
3123 compiler_->isolate(), 3076 Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED);
3124 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); 3077 int skip_distance = GetSkipTable(
3125 intptr_t skip_distance = GetSkipTable(
3126 min_lookahead, max_lookahead, boolean_skip_table); 3078 min_lookahead, max_lookahead, boolean_skip_table);
3127 ASSERT(skip_distance != 0); 3079 DCHECK(skip_distance != 0);
3128 3080
3129 BlockLabel cont, again; 3081 Label cont, again;
3130 3082 masm->Bind(&again);
3131 masm->BindBlock(&again);
3132 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3083 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3133 masm->CheckBitInTable(boolean_skip_table, &cont); 3084 masm->CheckBitInTable(boolean_skip_table, &cont);
3134 masm->AdvanceCurrentPosition(skip_distance); 3085 masm->AdvanceCurrentPosition(skip_distance);
3135 masm->GoTo(&again); 3086 masm->GoTo(&again);
3136 masm->BindBlock(&cont); 3087 masm->Bind(&cont);
3137
3138 return;
3139 } 3088 }
3140 3089
3141 3090
3142 /* Code generation for choice nodes. 3091 /* Code generation for choice nodes.
3143 * 3092 *
3144 * We generate quick checks that do a mask and compare to eliminate a 3093 * We generate quick checks that do a mask and compare to eliminate a
3145 * choice. If the quick check succeeds then it jumps to the continuation to 3094 * choice. If the quick check succeeds then it jumps to the continuation to
3146 * do slow checks and check subsequent nodes. If it fails (the common case) 3095 * do slow checks and check subsequent nodes. If it fails (the common case)
3147 * it falls through to the next choice. 3096 * it falls through to the next choice.
3148 * 3097 *
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
3214 */ 3163 */
3215 3164
3216 GreedyLoopState::GreedyLoopState(bool not_at_start) { 3165 GreedyLoopState::GreedyLoopState(bool not_at_start) {
3217 counter_backtrack_trace_.set_backtrack(&label_); 3166 counter_backtrack_trace_.set_backtrack(&label_);
3218 if (not_at_start) counter_backtrack_trace_.set_at_start(false); 3167 if (not_at_start) counter_backtrack_trace_.set_at_start(false);
3219 } 3168 }
3220 3169
3221 3170
3222 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 3171 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3223 #ifdef DEBUG 3172 #ifdef DEBUG
3224 intptr_t choice_count = alternatives_->length(); 3173 int choice_count = alternatives_->length();
3225 for (intptr_t i = 0; i < choice_count - 1; i++) { 3174 for (int i = 0; i < choice_count - 1; i++) {
3226 GuardedAlternative alternative = alternatives_->At(i); 3175 GuardedAlternative alternative = alternatives_->at(i);
3227 ZoneGrowableArray<Guard*>* guards = alternative.guards(); 3176 ZoneList<Guard*>* guards = alternative.guards();
3228 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); 3177 int guard_count = (guards == NULL) ? 0 : guards->length();
3229 for (intptr_t j = 0; j < guard_count; j++) { 3178 for (int j = 0; j < guard_count; j++) {
3230 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); 3179 DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
3231 } 3180 }
3232 } 3181 }
3233 #endif 3182 #endif
3234 } 3183 }
3235 3184
3236 3185
3237 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, 3186 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3238 Trace* current_trace, 3187 Trace* current_trace,
3239 PreloadState* state) { 3188 PreloadState* state) {
3240 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 3189 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3241 // Save some time by looking at most one machine word ahead. 3190 // Save some time by looking at most one machine word ahead.
3242 state->eats_at_least_ = 3191 state->eats_at_least_ =
3243 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, 3192 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
3244 current_trace->at_start() == Trace::FALSE_VALUE); 3193 current_trace->at_start() == Trace::FALSE_VALUE);
3245 } 3194 }
3246 state->preload_characters_ = 3195 state->preload_characters_ =
3247 CalculatePreloadCharacters(compiler, state->eats_at_least_); 3196 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3248 3197
3249 state->preload_is_current_ = 3198 state->preload_is_current_ =
3250 (current_trace->characters_preloaded() == state->preload_characters_); 3199 (current_trace->characters_preloaded() == state->preload_characters_);
3251 state->preload_has_checked_bounds_ = state->preload_is_current_; 3200 state->preload_has_checked_bounds_ = state->preload_is_current_;
3252 } 3201 }
3253 3202
3254 3203
3255 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3204 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3256 intptr_t choice_count = alternatives_->length(); 3205 int choice_count = alternatives_->length();
3257 3206
3258 AssertGuardsMentionRegisters(trace); 3207 AssertGuardsMentionRegisters(trace);
3259 3208
3260 LimitResult limit_result = LimitVersions(compiler, trace); 3209 LimitResult limit_result = LimitVersions(compiler, trace);
3261 if (limit_result == DONE) return; 3210 if (limit_result == DONE) return;
3262 ASSERT(limit_result == CONTINUE); 3211 DCHECK(limit_result == CONTINUE);
3263 3212
3264 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for 3213 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
3265 // other choice nodes we only flush if we are out of code size budget. 3214 // other choice nodes we only flush if we are out of code size budget.
3266 if (trace->flush_budget() == 0 && trace->actions() != NULL) { 3215 if (trace->flush_budget() == 0 && trace->actions() != NULL) {
3267 trace->Flush(compiler, this); 3216 trace->Flush(compiler, this);
3268 return; 3217 return;
3269 } 3218 }
3270 3219
3271 RecursionCheck rc(compiler); 3220 RecursionCheck rc(compiler);
3272 3221
3273 PreloadState preload; 3222 PreloadState preload;
3274 preload.init(); 3223 preload.init();
3275 GreedyLoopState greedy_loop_state(not_at_start()); 3224 GreedyLoopState greedy_loop_state(not_at_start());
3276 3225
3277 intptr_t text_length = 3226 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3278 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); 3227 AlternativeGenerationList alt_gens(choice_count, zone());
3279 AlternativeGenerationList alt_gens(choice_count);
3280 3228
3281 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 3229 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3282 trace = EmitGreedyLoop(compiler, 3230 trace = EmitGreedyLoop(compiler,
3283 trace, 3231 trace,
3284 &alt_gens, 3232 &alt_gens,
3285 &preload, 3233 &preload,
3286 &greedy_loop_state, 3234 &greedy_loop_state,
3287 text_length); 3235 text_length);
3288 } else { 3236 } else {
3289 // TODO(erikcorry): Delete this. We don't need this label, but it makes us 3237 // TODO(erikcorry): Delete this. We don't need this label, but it makes us
3290 // match the traces produced pre-cleanup. 3238 // match the traces produced pre-cleanup.
3291 BlockLabel second_choice; 3239 Label second_choice;
3292 compiler->macro_assembler()->BindBlock(&second_choice); 3240 compiler->macro_assembler()->Bind(&second_choice);
3293 3241
3294 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); 3242 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3295 3243
3296 EmitChoices(compiler, 3244 EmitChoices(compiler,
3297 &alt_gens, 3245 &alt_gens,
3298 0, 3246 0,
3299 trace, 3247 trace,
3300 &preload); 3248 &preload);
3301 } 3249 }
3302 3250
3303 // At this point we need to generate slow checks for the alternatives where 3251 // At this point we need to generate slow checks for the alternatives where
3304 // the quick check was inlined. We can recognize these because the associated 3252 // the quick check was inlined. We can recognize these because the associated
3305 // label was bound. 3253 // label was bound.
3306 intptr_t new_flush_budget = trace->flush_budget() / choice_count; 3254 int new_flush_budget = trace->flush_budget() / choice_count;
3307 for (intptr_t i = 0; i < choice_count; i++) { 3255 for (int i = 0; i < choice_count; i++) {
3308 AlternativeGeneration* alt_gen = alt_gens.at(i); 3256 AlternativeGeneration* alt_gen = alt_gens.at(i);
3309 Trace new_trace(*trace); 3257 Trace new_trace(*trace);
3310 // If there are actions to be flushed we have to limit how many times 3258 // If there are actions to be flushed we have to limit how many times
3311 // they are flushed. Take the budget of the parent trace and distribute 3259 // they are flushed. Take the budget of the parent trace and distribute
3312 // it fairly amongst the children. 3260 // it fairly amongst the children.
3313 if (new_trace.actions() != NULL) { 3261 if (new_trace.actions() != NULL) {
3314 new_trace.set_flush_budget(new_flush_budget); 3262 new_trace.set_flush_budget(new_flush_budget);
3315 } 3263 }
3316 bool next_expects_preload = 3264 bool next_expects_preload =
3317 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; 3265 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3318 EmitOutOfLineContinuation(compiler, 3266 EmitOutOfLineContinuation(compiler,
3319 &new_trace, 3267 &new_trace,
3320 alternatives_->At(i), 3268 alternatives_->at(i),
3321 alt_gen, 3269 alt_gen,
3322 preload.preload_characters_, 3270 preload.preload_characters_,
3323 next_expects_preload); 3271 next_expects_preload);
3324 } 3272 }
3325 } 3273 }
3326 3274
3275
3327 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, 3276 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
3328 Trace* trace, 3277 Trace* trace,
3329 AlternativeGenerationList* alt_gens, 3278 AlternativeGenerationList* alt_gens,
3330 PreloadState* preload, 3279 PreloadState* preload,
3331 GreedyLoopState* greedy_loop_state, 3280 GreedyLoopState* greedy_loop_state,
3332 intptr_t text_length) { 3281 int text_length) {
3333 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3334 // Here we have special handling for greedy loops containing only text nodes 3283 // Here we have special handling for greedy loops containing only text nodes
3335 // and other simple nodes. These are handled by pushing the current 3284 // and other simple nodes. These are handled by pushing the current
3336 // position on the stack and then incrementing the current position each 3285 // position on the stack and then incrementing the current position each
3337 // time around the switch. On backtrack we decrement the current position 3286 // time around the switch. On backtrack we decrement the current position
3338 // and check it against the pushed value. This avoids pushing backtrack 3287 // and check it against the pushed value. This avoids pushing backtrack
3339 // information for each iteration of the loop, which could take up a lot of 3288 // information for each iteration of the loop, which could take up a lot of
3340 // space. 3289 // space.
3341 ASSERT(trace->stop_node() == NULL); 3290 DCHECK(trace->stop_node() == NULL);
3342 macro_assembler->PushCurrentPosition(); 3291 macro_assembler->PushCurrentPosition();
3343 BlockLabel greedy_match_failed; 3292 Label greedy_match_failed;
3344 Trace greedy_match_trace; 3293 Trace greedy_match_trace;
3345 if (not_at_start()) greedy_match_trace.set_at_start(false); 3294 if (not_at_start()) greedy_match_trace.set_at_start(false);
3346 greedy_match_trace.set_backtrack(&greedy_match_failed); 3295 greedy_match_trace.set_backtrack(&greedy_match_failed);
3347 BlockLabel loop_label; 3296 Label loop_label;
3348 macro_assembler->BindBlock(&loop_label); 3297 macro_assembler->Bind(&loop_label);
3349 greedy_match_trace.set_stop_node(this); 3298 greedy_match_trace.set_stop_node(this);
3350 greedy_match_trace.set_loop_label(&loop_label); 3299 greedy_match_trace.set_loop_label(&loop_label);
3351 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); 3300 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3352 macro_assembler->BindBlock(&greedy_match_failed); 3301 macro_assembler->Bind(&greedy_match_failed);
3353 3302
3354 BlockLabel second_choice; // For use in greedy matches. 3303 Label second_choice; // For use in greedy matches.
3355 macro_assembler->BindBlock(&second_choice); 3304 macro_assembler->Bind(&second_choice);
3356 3305
3357 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); 3306 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3358 3307
3359 EmitChoices(compiler, 3308 EmitChoices(compiler,
3360 alt_gens, 3309 alt_gens,
3361 1, 3310 1,
3362 new_trace, 3311 new_trace,
3363 preload); 3312 preload);
3364 3313
3365 macro_assembler->BindBlock(greedy_loop_state->label()); 3314 macro_assembler->Bind(greedy_loop_state->label());
3366 // If we have unwound to the bottom then backtrack. 3315 // If we have unwound to the bottom then backtrack.
3367 macro_assembler->CheckGreedyLoop(trace->backtrack()); 3316 macro_assembler->CheckGreedyLoop(trace->backtrack());
3368 // Otherwise try the second priority at an earlier position. 3317 // Otherwise try the second priority at an earlier position.
3369 macro_assembler->AdvanceCurrentPosition(-text_length); 3318 macro_assembler->AdvanceCurrentPosition(-text_length);
3370 macro_assembler->GoTo(&second_choice); 3319 macro_assembler->GoTo(&second_choice);
3371 return new_trace; 3320 return new_trace;
3372 } 3321 }
3373 3322
3374 3323 int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3375 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, 3324 Trace* trace) {
3376 Trace* trace) { 3325 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3377 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3378 if (alternatives_->length() != 2) return eats_at_least; 3326 if (alternatives_->length() != 2) return eats_at_least;
3379 3327
3380 GuardedAlternative alt1 = alternatives_->At(1); 3328 GuardedAlternative alt1 = alternatives_->at(1);
3381 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { 3329 if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
3382 return eats_at_least; 3330 return eats_at_least;
3383 } 3331 }
3384 RegExpNode* eats_anything_node = alt1.node(); 3332 RegExpNode* eats_anything_node = alt1.node();
3385 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { 3333 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
3386 return eats_at_least; 3334 return eats_at_least;
3387 } 3335 }
3388 3336
3389 // Really we should be creating a new trace when we execute this function, 3337 // Really we should be creating a new trace when we execute this function,
3390 // but there is no need, because the code it generates cannot backtrack, and 3338 // but there is no need, because the code it generates cannot backtrack, and
3391 // we always arrive here with a trivial trace (since it's the entry to a 3339 // we always arrive here with a trivial trace (since it's the entry to a
3392 // loop. That also implies that there are no preloaded characters, which is 3340 // loop. That also implies that there are no preloaded characters, which is
3393 // good, because it means we won't be violating any assumptions by 3341 // good, because it means we won't be violating any assumptions by
3394 // overwriting those characters with new load instructions. 3342 // overwriting those characters with new load instructions.
3395 ASSERT(trace->is_trivial()); 3343 DCHECK(trace->is_trivial());
3396 3344
3397 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3345 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3398 // At this point we know that we are at a non-greedy loop that will eat 3346 // At this point we know that we are at a non-greedy loop that will eat
3399 // any character one at a time. Any non-anchored regexp has such a 3347 // any character one at a time. Any non-anchored regexp has such a
3400 // loop prepended to it in order to find where it starts. We look for 3348 // loop prepended to it in order to find where it starts. We look for
3401 // a pattern of the form ...abc... where we can look 6 characters ahead 3349 // a pattern of the form ...abc... where we can look 6 characters ahead
3402 // and step forwards 3 if the character is not one of abc. Abc need 3350 // and step forwards 3 if the character is not one of abc. Abc need
3403 // not be atoms, they can be any reasonably limited character class or 3351 // not be atoms, they can be any reasonably limited character class or
3404 // small alternation. 3352 // small alternation.
3405 BoyerMooreLookahead* bm = bm_info(false); 3353 BoyerMooreLookahead* bm = bm_info(false);
3406 if (bm == NULL) { 3354 if (bm == NULL) {
3407 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, 3355 eats_at_least = Min(kMaxLookaheadForBoyerMoore,
3408 EatsAtLeast(kMaxLookaheadForBoyerMoore, 3356 EatsAtLeast(kMaxLookaheadForBoyerMoore,
3409 kRecursionBudget, 3357 kRecursionBudget,
3410 false)); 3358 false));
3411 if (eats_at_least >= 1) { 3359 if (eats_at_least >= 1) {
3412 bm = new(I) BoyerMooreLookahead(eats_at_least, compiler, I); 3360 bm = new(zone()) BoyerMooreLookahead(eats_at_least,
3413 GuardedAlternative alt0 = alternatives_->At(0); 3361 compiler,
3362 zone());
3363 GuardedAlternative alt0 = alternatives_->at(0);
3414 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); 3364 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
3415 } 3365 }
3416 } 3366 }
3417 if (bm != NULL) { 3367 if (bm != NULL) {
3418 bm->EmitSkipInstructions(macro_assembler); 3368 bm->EmitSkipInstructions(macro_assembler);
3419 } 3369 }
3420 return eats_at_least; 3370 return eats_at_least;
3421 } 3371 }
3422 3372
3423 3373
3424 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, 3374 void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3425 AlternativeGenerationList* alt_gens, 3375 AlternativeGenerationList* alt_gens,
3426 intptr_t first_choice, 3376 int first_choice,
3427 Trace* trace, 3377 Trace* trace,
3428 PreloadState* preload) { 3378 PreloadState* preload) {
3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3379 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3430 SetUpPreLoad(compiler, trace, preload); 3380 SetUpPreLoad(compiler, trace, preload);
3431 3381
3432 // For now we just call all choices one after the other. The idea ultimately 3382 // For now we just call all choices one after the other. The idea ultimately
3433 // is to use the Dispatch table to try only the relevant ones. 3383 // is to use the Dispatch table to try only the relevant ones.
3434 intptr_t choice_count = alternatives_->length(); 3384 int choice_count = alternatives_->length();
3435 3385
3436 intptr_t new_flush_budget = trace->flush_budget() / choice_count; 3386 int new_flush_budget = trace->flush_budget() / choice_count;
3437 3387
3438 for (intptr_t i = first_choice; i < choice_count; i++) { 3388 for (int i = first_choice; i < choice_count; i++) {
3439 bool is_last = i == choice_count - 1; 3389 bool is_last = i == choice_count - 1;
3440 bool fall_through_on_failure = !is_last; 3390 bool fall_through_on_failure = !is_last;
3441 GuardedAlternative alternative = alternatives_->At(i); 3391 GuardedAlternative alternative = alternatives_->at(i);
3442 AlternativeGeneration* alt_gen = alt_gens->at(i); 3392 AlternativeGeneration* alt_gen = alt_gens->at(i);
3443 alt_gen->quick_check_details.set_characters(preload->preload_characters_); 3393 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3444 ZoneGrowableArray<Guard*>* guards = alternative.guards(); 3394 ZoneList<Guard*>* guards = alternative.guards();
3445 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); 3395 int guard_count = (guards == NULL) ? 0 : guards->length();
3446 Trace new_trace(*trace); 3396 Trace new_trace(*trace);
3447 new_trace.set_characters_preloaded(preload->preload_is_current_ ? 3397 new_trace.set_characters_preloaded(preload->preload_is_current_ ?
3448 preload->preload_characters_ : 3398 preload->preload_characters_ :
3449 0); 3399 0);
3450 if (preload->preload_has_checked_bounds_) { 3400 if (preload->preload_has_checked_bounds_) {
3451 new_trace.set_bound_checked_up_to(preload->preload_characters_); 3401 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3452 } 3402 }
3453 new_trace.quick_check_performed()->Clear(); 3403 new_trace.quick_check_performed()->Clear();
3454 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); 3404 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
3455 if (!is_last) { 3405 if (!is_last) {
3456 new_trace.set_backtrack(&alt_gen->after); 3406 new_trace.set_backtrack(&alt_gen->after);
3457 } 3407 }
3458 alt_gen->expects_preload = preload->preload_is_current_; 3408 alt_gen->expects_preload = preload->preload_is_current_;
3459 bool generate_full_check_inline = false; 3409 bool generate_full_check_inline = false;
3460 if (kRegexpOptimization && 3410 if (FLAG_regexp_optimization &&
3461 try_to_emit_quick_check_for_alternative(i == 0) && 3411 try_to_emit_quick_check_for_alternative(i == 0) &&
3462 alternative.node()->EmitQuickCheck(compiler, 3412 alternative.node()->EmitQuickCheck(compiler,
3463 trace, 3413 trace,
3464 &new_trace, 3414 &new_trace,
3465 preload->preload_has_checked_bounds_, 3415 preload->preload_has_checked_bounds_,
3466 &alt_gen->possible_success, 3416 &alt_gen->possible_success,
3467 &alt_gen->quick_check_details, 3417 &alt_gen->quick_check_details,
3468 fall_through_on_failure)) { 3418 fall_through_on_failure)) {
3469 // Quick check was generated for this choice. 3419 // Quick check was generated for this choice.
3470 preload->preload_is_current_ = true; 3420 preload->preload_is_current_ = true;
3471 preload->preload_has_checked_bounds_ = true; 3421 preload->preload_has_checked_bounds_ = true;
3472 // If we generated the quick check to fall through on possible success, 3422 // If we generated the quick check to fall through on possible success,
3473 // we now need to generate the full check inline. 3423 // we now need to generate the full check inline.
3474 if (!fall_through_on_failure) { 3424 if (!fall_through_on_failure) {
3475 macro_assembler->BindBlock(&alt_gen->possible_success); 3425 macro_assembler->Bind(&alt_gen->possible_success);
3476 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); 3426 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3477 new_trace.set_characters_preloaded(preload->preload_characters_); 3427 new_trace.set_characters_preloaded(preload->preload_characters_);
3478 new_trace.set_bound_checked_up_to(preload->preload_characters_); 3428 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3479 generate_full_check_inline = true; 3429 generate_full_check_inline = true;
3480 } 3430 }
3481 } else if (alt_gen->quick_check_details.cannot_match()) { 3431 } else if (alt_gen->quick_check_details.cannot_match()) {
3482 if (!fall_through_on_failure) { 3432 if (!fall_through_on_failure) {
3483 macro_assembler->GoTo(trace->backtrack()); 3433 macro_assembler->GoTo(trace->backtrack());
3484 } 3434 }
3485 continue; 3435 continue;
3486 } else { 3436 } else {
3487 // No quick check was generated. Put the full code here. 3437 // No quick check was generated. Put the full code here.
3488 // If this is not the first choice then there could be slow checks from 3438 // If this is not the first choice then there could be slow checks from
3489 // previous cases that go here when they fail. There's no reason to 3439 // previous cases that go here when they fail. There's no reason to
3490 // insist that they preload characters since the slow check we are about 3440 // insist that they preload characters since the slow check we are about
3491 // to generate probably can't use it. 3441 // to generate probably can't use it.
3492 if (i != first_choice) { 3442 if (i != first_choice) {
3493 alt_gen->expects_preload = false; 3443 alt_gen->expects_preload = false;
3494 new_trace.InvalidateCurrentCharacter(); 3444 new_trace.InvalidateCurrentCharacter();
3495 } 3445 }
3496 generate_full_check_inline = true; 3446 generate_full_check_inline = true;
3497 } 3447 }
3498 if (generate_full_check_inline) { 3448 if (generate_full_check_inline) {
3499 if (new_trace.actions() != NULL) { 3449 if (new_trace.actions() != NULL) {
3500 new_trace.set_flush_budget(new_flush_budget); 3450 new_trace.set_flush_budget(new_flush_budget);
3501 } 3451 }
3502 for (intptr_t j = 0; j < guard_count; j++) { 3452 for (int j = 0; j < guard_count; j++) {
3503 GenerateGuard(macro_assembler, guards->At(j), &new_trace); 3453 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
3504 } 3454 }
3505 alternative.node()->Emit(compiler, &new_trace); 3455 alternative.node()->Emit(compiler, &new_trace);
3506 preload->preload_is_current_ = false; 3456 preload->preload_is_current_ = false;
3507 } 3457 }
3508 macro_assembler->BindBlock(&alt_gen->after); 3458 macro_assembler->Bind(&alt_gen->after);
3509 } 3459 }
3510 } 3460 }
3511 3461
3512 3462
3513 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, 3463 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3514 Trace* trace, 3464 Trace* trace,
3515 GuardedAlternative alternative, 3465 GuardedAlternative alternative,
3516 AlternativeGeneration* alt_gen, 3466 AlternativeGeneration* alt_gen,
3517 intptr_t preload_characters, 3467 int preload_characters,
3518 bool next_expects_preload) { 3468 bool next_expects_preload) {
3519 if (!alt_gen->possible_success.IsLinked()) return; 3469 if (!alt_gen->possible_success.is_linked()) return;
3520 3470
3521 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3471 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3522 macro_assembler->BindBlock(&alt_gen->possible_success); 3472 macro_assembler->Bind(&alt_gen->possible_success);
3523 Trace out_of_line_trace(*trace); 3473 Trace out_of_line_trace(*trace);
3524 out_of_line_trace.set_characters_preloaded(preload_characters); 3474 out_of_line_trace.set_characters_preloaded(preload_characters);
3525 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); 3475 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3526 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); 3476 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
3527 ZoneGrowableArray<Guard*>* guards = alternative.guards(); 3477 ZoneList<Guard*>* guards = alternative.guards();
3528 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); 3478 int guard_count = (guards == NULL) ? 0 : guards->length();
3529 if (next_expects_preload) { 3479 if (next_expects_preload) {
3530 BlockLabel reload_current_char; 3480 Label reload_current_char;
3531 out_of_line_trace.set_backtrack(&reload_current_char); 3481 out_of_line_trace.set_backtrack(&reload_current_char);
3532 for (intptr_t j = 0; j < guard_count; j++) { 3482 for (int j = 0; j < guard_count; j++) {
3533 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); 3483 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3534 } 3484 }
3535 alternative.node()->Emit(compiler, &out_of_line_trace); 3485 alternative.node()->Emit(compiler, &out_of_line_trace);
3536 macro_assembler->BindBlock(&reload_current_char); 3486 macro_assembler->Bind(&reload_current_char);
3537 // Reload the current character, since the next quick check expects that. 3487 // Reload the current character, since the next quick check expects that.
3538 // We don't need to check bounds here because we only get into this 3488 // We don't need to check bounds here because we only get into this
3539 // code through a quick check which already did the checked load. 3489 // code through a quick check which already did the checked load.
3540 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), 3490 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
3541 NULL, 3491 NULL,
3542 false, 3492 false,
3543 preload_characters); 3493 preload_characters);
3544 macro_assembler->GoTo(&(alt_gen->after)); 3494 macro_assembler->GoTo(&(alt_gen->after));
3545 } else { 3495 } else {
3546 out_of_line_trace.set_backtrack(&(alt_gen->after)); 3496 out_of_line_trace.set_backtrack(&(alt_gen->after));
3547 for (intptr_t j = 0; j < guard_count; j++) { 3497 for (int j = 0; j < guard_count; j++) {
3548 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); 3498 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3549 } 3499 }
3550 alternative.node()->Emit(compiler, &out_of_line_trace); 3500 alternative.node()->Emit(compiler, &out_of_line_trace);
3551 } 3501 }
3552 } 3502 }
3553 3503
3554 3504
3555 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3505 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3556 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3506 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3557 LimitResult limit_result = LimitVersions(compiler, trace); 3507 LimitResult limit_result = LimitVersions(compiler, trace);
3558 if (limit_result == DONE) return; 3508 if (limit_result == DONE) return;
3559 ASSERT(limit_result == CONTINUE); 3509 DCHECK(limit_result == CONTINUE);
3560 3510
3561 RecursionCheck rc(compiler); 3511 RecursionCheck rc(compiler);
3562 3512
3563 switch (action_type_) { 3513 switch (action_type_) {
3564 case STORE_POSITION: { 3514 case STORE_POSITION: {
3565 Trace::DeferredCapture 3515 Trace::DeferredCapture
3566 new_capture(data_.u_position_register.reg, 3516 new_capture(data_.u_position_register.reg,
3567 data_.u_position_register.is_capture, 3517 data_.u_position_register.is_capture,
3568 trace); 3518 trace);
3569 Trace new_trace = *trace; 3519 Trace new_trace = *trace;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
3601 trace->Flush(compiler, this); 3551 trace->Flush(compiler, this);
3602 } else { 3552 } else {
3603 assembler->WriteCurrentPositionToRegister( 3553 assembler->WriteCurrentPositionToRegister(
3604 data_.u_submatch.current_position_register, 0); 3554 data_.u_submatch.current_position_register, 0);
3605 assembler->WriteStackPointerToRegister( 3555 assembler->WriteStackPointerToRegister(
3606 data_.u_submatch.stack_pointer_register); 3556 data_.u_submatch.stack_pointer_register);
3607 on_success()->Emit(compiler, trace); 3557 on_success()->Emit(compiler, trace);
3608 } 3558 }
3609 break; 3559 break;
3610 case EMPTY_MATCH_CHECK: { 3560 case EMPTY_MATCH_CHECK: {
3611 intptr_t start_pos_reg = data_.u_empty_match_check.start_register; 3561 int start_pos_reg = data_.u_empty_match_check.start_register;
3612 intptr_t stored_pos = 0; 3562 int stored_pos = 0;
3613 intptr_t rep_reg = data_.u_empty_match_check.repetition_register; 3563 int rep_reg = data_.u_empty_match_check.repetition_register;
3614 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); 3564 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3615 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); 3565 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3616 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { 3566 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3617 // If we know we haven't advanced and there is no minimum we 3567 // If we know we haven't advanced and there is no minimum we
3618 // can just backtrack immediately. 3568 // can just backtrack immediately.
3619 assembler->GoTo(trace->backtrack()); 3569 assembler->GoTo(trace->backtrack());
3620 } else if (know_dist && stored_pos < trace->cp_offset()) { 3570 } else if (know_dist && stored_pos < trace->cp_offset()) {
3621 // If we know we've advanced we can generate the continuation 3571 // If we know we've advanced we can generate the continuation
3622 // immediately. 3572 // immediately.
3623 on_success()->Emit(compiler, trace); 3573 on_success()->Emit(compiler, trace);
3624 } else if (!trace->is_trivial()) { 3574 } else if (!trace->is_trivial()) {
3625 trace->Flush(compiler, this); 3575 trace->Flush(compiler, this);
3626 } else { 3576 } else {
3627 BlockLabel skip_empty_check; 3577 Label skip_empty_check;
3628 // If we have a minimum number of repetitions we check the current 3578 // If we have a minimum number of repetitions we check the current
3629 // number first and skip the empty check if it's not enough. 3579 // number first and skip the empty check if it's not enough.
3630 if (has_minimum) { 3580 if (has_minimum) {
3631 intptr_t limit = data_.u_empty_match_check.repetition_limit; 3581 int limit = data_.u_empty_match_check.repetition_limit;
3632 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); 3582 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3633 } 3583 }
3634 // If the match is empty we bail out, otherwise we fall through 3584 // If the match is empty we bail out, otherwise we fall through
3635 // to the on-success continuation. 3585 // to the on-success continuation.
3636 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, 3586 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3637 trace->backtrack()); 3587 trace->backtrack());
3638 assembler->BindBlock(&skip_empty_check); 3588 assembler->Bind(&skip_empty_check);
3639 on_success()->Emit(compiler, trace); 3589 on_success()->Emit(compiler, trace);
3640 } 3590 }
3641 break; 3591 break;
3642 } 3592 }
3643 case POSITIVE_SUBMATCH_SUCCESS: { 3593 case POSITIVE_SUBMATCH_SUCCESS: {
3644 if (!trace->is_trivial()) { 3594 if (!trace->is_trivial()) {
3645 trace->Flush(compiler, this); 3595 trace->Flush(compiler, this);
3646 return; 3596 return;
3647 } 3597 }
3648 assembler->ReadCurrentPositionFromRegister( 3598 assembler->ReadCurrentPositionFromRegister(
3649 data_.u_submatch.current_position_register); 3599 data_.u_submatch.current_position_register);
3650 assembler->ReadStackPointerFromRegister( 3600 assembler->ReadStackPointerFromRegister(
3651 data_.u_submatch.stack_pointer_register); 3601 data_.u_submatch.stack_pointer_register);
3652 intptr_t clear_register_count = data_.u_submatch.clear_register_count; 3602 int clear_register_count = data_.u_submatch.clear_register_count;
3653 if (clear_register_count == 0) { 3603 if (clear_register_count == 0) {
3654 on_success()->Emit(compiler, trace); 3604 on_success()->Emit(compiler, trace);
3655 return; 3605 return;
3656 } 3606 }
3657 intptr_t clear_registers_from = data_.u_submatch.clear_register_from; 3607 int clear_registers_from = data_.u_submatch.clear_register_from;
3658 BlockLabel clear_registers_backtrack; 3608 Label clear_registers_backtrack;
3659 Trace new_trace = *trace; 3609 Trace new_trace = *trace;
3660 new_trace.set_backtrack(&clear_registers_backtrack); 3610 new_trace.set_backtrack(&clear_registers_backtrack);
3661 on_success()->Emit(compiler, &new_trace); 3611 on_success()->Emit(compiler, &new_trace);
3662 3612
3663 assembler->BindBlock(&clear_registers_backtrack); 3613 assembler->Bind(&clear_registers_backtrack);
3664 intptr_t clear_registers_to = 3614 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3665 clear_registers_from + clear_register_count - 1;
3666 assembler->ClearRegisters(clear_registers_from, clear_registers_to); 3615 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3667 3616
3668 ASSERT(trace->backtrack() == NULL); 3617 DCHECK(trace->backtrack() == NULL);
3669 assembler->Backtrack(); 3618 assembler->Backtrack();
3670 return; 3619 return;
3671 } 3620 }
3672 default: 3621 default:
3673 UNREACHABLE(); 3622 UNREACHABLE();
3674 } 3623 }
3675 } 3624 }
3676 3625
3677 3626
3678 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3627 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3679 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3628 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3680 if (!trace->is_trivial()) { 3629 if (!trace->is_trivial()) {
3681 trace->Flush(compiler, this); 3630 trace->Flush(compiler, this);
3682 return; 3631 return;
3683 } 3632 }
3684 3633
3685 LimitResult limit_result = LimitVersions(compiler, trace); 3634 LimitResult limit_result = LimitVersions(compiler, trace);
3686 if (limit_result == DONE) return; 3635 if (limit_result == DONE) return;
3687 ASSERT(limit_result == CONTINUE); 3636 DCHECK(limit_result == CONTINUE);
3688 3637
3689 RecursionCheck rc(compiler); 3638 RecursionCheck rc(compiler);
3690 3639
3691 ASSERT(start_reg_ + 1 == end_reg_); 3640 DCHECK_EQ(start_reg_ + 1, end_reg_);
3692 if (compiler->ignore_case()) { 3641 if (compiler->ignore_case()) {
3693 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, 3642 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
3694 trace->backtrack()); 3643 trace->backtrack());
3695 } else { 3644 } else {
3696 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); 3645 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3697 } 3646 }
3698 on_success()->Emit(compiler, trace); 3647 on_success()->Emit(compiler, trace);
3699 } 3648 }
3700 3649
3701 3650
3702 // ------------------------------------------------------------------- 3651 // -------------------------------------------------------------------
3703 // Dot/dotty output 3652 // Dot/dotty output
3704 3653
3705 3654
3706 #ifdef DEBUG 3655 #ifdef DEBUG
3707 3656
3708 3657
3709 class DotPrinter: public NodeVisitor { 3658 class DotPrinter: public NodeVisitor {
3710 public: 3659 public:
3711 explicit DotPrinter(bool ignore_case) 3660 DotPrinter(OStream& os, bool ignore_case) // NOLINT
3712 : ignore_case_(ignore_case) {} 3661 : os_(os),
3662 ignore_case_(ignore_case) {}
3713 void PrintNode(const char* label, RegExpNode* node); 3663 void PrintNode(const char* label, RegExpNode* node);
3714 void Visit(RegExpNode* node); 3664 void Visit(RegExpNode* node);
3715 void PrintAttributes(RegExpNode* from); 3665 void PrintAttributes(RegExpNode* from);
3716 void PrintOnFailure(RegExpNode* from, RegExpNode* to); 3666 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
3717 #define DECLARE_VISIT(Type) \ 3667 #define DECLARE_VISIT(Type) \
3718 virtual void Visit##Type(Type##Node* that); 3668 virtual void Visit##Type(Type##Node* that);
3719 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 3669 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3720 #undef DECLARE_VISIT 3670 #undef DECLARE_VISIT
3721 private: 3671 private:
3672 OStream& os_;
3722 bool ignore_case_; 3673 bool ignore_case_;
3723 }; 3674 };
3724 3675
3725 3676
3726 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { 3677 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
3727 OS::Print("digraph G {\n graph [label=\""); 3678 os_ << "digraph G {\n graph [label=\"";
3728 for (intptr_t i = 0; label[i]; i++) { 3679 for (int i = 0; label[i]; i++) {
3729 switch (label[i]) { 3680 switch (label[i]) {
3730 case '\\': 3681 case '\\':
3731 OS::Print("\\\\"); 3682 os_ << "\\\\";
3732 break; 3683 break;
3733 case '"': 3684 case '"':
3734 OS::Print("\""); 3685 os_ << "\"";
3735 break; 3686 break;
3736 default: 3687 default:
3737 OS::Print("%c", label[i]); 3688 os_ << label[i];
3738 break; 3689 break;
3739 } 3690 }
3740 } 3691 }
3741 OS::Print("\"];\n"); 3692 os_ << "\"];\n";
3742 Visit(node); 3693 Visit(node);
3743 OS::Print("}\n"); 3694 os_ << "}" << endl;
3744 } 3695 }
3745 3696
3746 3697
3747 void DotPrinter::Visit(RegExpNode* node) { 3698 void DotPrinter::Visit(RegExpNode* node) {
3748 if (node->info()->visited) return; 3699 if (node->info()->visited) return;
3749 node->info()->visited = true; 3700 node->info()->visited = true;
3750 node->Accept(this); 3701 node->Accept(this);
3751 } 3702 }
3752 3703
3753 3704
3754 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { 3705 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
3755 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); 3706 os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n";
3756 Visit(on_failure); 3707 Visit(on_failure);
3757 } 3708 }
3758 3709
3759 3710
3760 class AttributePrinter : public ValueObject { 3711 class TableEntryBodyPrinter {
3761 public: 3712 public:
3762 AttributePrinter() : first_(true) {} 3713 TableEntryBodyPrinter(OStream& os, ChoiceNode* choice) // NOLINT
3714 : os_(os),
3715 choice_(choice) {}
3716 void Call(uc16 from, DispatchTable::Entry entry) {
3717 OutSet* out_set = entry.out_set();
3718 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
3719 if (out_set->Get(i)) {
3720 os_ << " n" << choice() << ":s" << from << "o" << i << " -> n"
3721 << choice()->alternatives()->at(i).node() << ";\n";
3722 }
3723 }
3724 }
3725 private:
3726 ChoiceNode* choice() { return choice_; }
3727 OStream& os_;
3728 ChoiceNode* choice_;
3729 };
3730
3731
3732 class TableEntryHeaderPrinter {
3733 public:
3734 explicit TableEntryHeaderPrinter(OStream& os) // NOLINT
3735 : first_(true),
3736 os_(os) {}
3737 void Call(uc16 from, DispatchTable::Entry entry) {
3738 if (first_) {
3739 first_ = false;
3740 } else {
3741 os_ << "|";
3742 }
3743 os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{";
3744 OutSet* out_set = entry.out_set();
3745 int priority = 0;
3746 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
3747 if (out_set->Get(i)) {
3748 if (priority > 0) os_ << "|";
3749 os_ << "<s" << from << "o" << i << "> " << priority;
3750 priority++;
3751 }
3752 }
3753 os_ << "}}";
3754 }
3755
3756 private:
3757 bool first_;
3758 OStream& os_;
3759 };
3760
3761
3762 class AttributePrinter {
3763 public:
3764 explicit AttributePrinter(OStream& os) // NOLINT
3765 : os_(os),
3766 first_(true) {}
3763 void PrintSeparator() { 3767 void PrintSeparator() {
3764 if (first_) { 3768 if (first_) {
3765 first_ = false; 3769 first_ = false;
3766 } else { 3770 } else {
3767 OS::Print("|"); 3771 os_ << "|";
3768 } 3772 }
3769 } 3773 }
3770 void PrintBit(const char* name, bool value) { 3774 void PrintBit(const char* name, bool value) {
3771 if (!value) return; 3775 if (!value) return;
3772 PrintSeparator(); 3776 PrintSeparator();
3773 OS::Print("{%s}", name); 3777 os_ << "{" << name << "}";
3774 } 3778 }
3775 void PrintPositive(const char* name, intptr_t value) { 3779 void PrintPositive(const char* name, int value) {
3776 if (value < 0) return; 3780 if (value < 0) return;
3777 PrintSeparator(); 3781 PrintSeparator();
3778 OS::Print("{%s|%" Pd "}", name, value); 3782 os_ << "{" << name << "|" << value << "}";
3779 } 3783 }
3780 3784
3781 private: 3785 private:
3786 OStream& os_;
3782 bool first_; 3787 bool first_;
3783 }; 3788 };
3784 3789
3785 3790
3786 void DotPrinter::PrintAttributes(RegExpNode* that) { 3791 void DotPrinter::PrintAttributes(RegExpNode* that) {
3787 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " 3792 os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, "
3788 "margin=0.1, fontsize=10, label=\"{", that); 3793 << "margin=0.1, fontsize=10, label=\"{";
3789 AttributePrinter printer; 3794 AttributePrinter printer(os_);
3790 NodeInfo* info = that->info(); 3795 NodeInfo* info = that->info();
3791 printer.PrintBit("NI", info->follows_newline_interest); 3796 printer.PrintBit("NI", info->follows_newline_interest);
3792 printer.PrintBit("WI", info->follows_word_interest); 3797 printer.PrintBit("WI", info->follows_word_interest);
3793 printer.PrintBit("SI", info->follows_start_interest); 3798 printer.PrintBit("SI", info->follows_start_interest);
3794 BlockLabel* label = that->label(); 3799 Label* label = that->label();
3795 if (label->IsBound()) 3800 if (label->is_bound())
3796 printer.PrintPositive("@", label->Position()); 3801 printer.PrintPositive("@", label->pos());
3797 OS::Print("}\"];\n" 3802 os_ << "}\"];\n"
3798 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", 3803 << " a" << that << " -> n" << that
3799 that, that); 3804 << " [style=dashed, color=grey, arrowhead=none];\n";
3800 } 3805 }
3801 3806
3802 3807
3808 static const bool kPrintDispatchTable = false;
3803 void DotPrinter::VisitChoice(ChoiceNode* that) { 3809 void DotPrinter::VisitChoice(ChoiceNode* that) {
3804 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); 3810 if (kPrintDispatchTable) {
3805 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3811 os_ << " n" << that << " [shape=Mrecord, label=\"";
3806 GuardedAlternative alt = that->alternatives()->At(i); 3812 TableEntryHeaderPrinter header_printer(os_);
3807 OS::Print(" n%p -> n%p", that, alt.node()); 3813 that->GetTable(ignore_case_)->ForEach(&header_printer);
3808 } 3814 os_ << "\"]\n";
3809 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3815 PrintAttributes(that);
3810 GuardedAlternative alt = that->alternatives()->At(i); 3816 TableEntryBodyPrinter body_printer(os_, that);
3817 that->GetTable(ignore_case_)->ForEach(&body_printer);
3818 } else {
3819 os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n";
3820 for (int i = 0; i < that->alternatives()->length(); i++) {
3821 GuardedAlternative alt = that->alternatives()->at(i);
3822 os_ << " n" << that << " -> n" << alt.node();
3823 }
3824 }
3825 for (int i = 0; i < that->alternatives()->length(); i++) {
3826 GuardedAlternative alt = that->alternatives()->at(i);
3811 alt.node()->Accept(this); 3827 alt.node()->Accept(this);
3812 } 3828 }
3813 } 3829 }
3814 3830
3815 3831
3816 void DotPrinter::VisitText(TextNode* that) { 3832 void DotPrinter::VisitText(TextNode* that) {
3817 OS::Print(" n%p [label=\"", that); 3833 Zone* zone = that->zone();
3818 for (intptr_t i = 0; i < that->elements()->length(); i++) { 3834 os_ << " n" << that << " [label=\"";
3819 if (i > 0) OS::Print(" "); 3835 for (int i = 0; i < that->elements()->length(); i++) {
3820 TextElement elm = that->elements()->At(i); 3836 if (i > 0) os_ << " ";
3837 TextElement elm = that->elements()->at(i);
3821 switch (elm.text_type()) { 3838 switch (elm.text_type()) {
3822 case TextElement::ATOM: { 3839 case TextElement::ATOM: {
3823 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); 3840 Vector<const uc16> data = elm.atom()->data();
3824 for (intptr_t i = 0; i < data->length(); i++) { 3841 for (int i = 0; i < data.length(); i++) {
3825 OS::Print("%c", static_cast<char>(data->At(i))); 3842 os_ << static_cast<char>(data[i]);
3826 } 3843 }
3827 break; 3844 break;
3828 } 3845 }
3829 case TextElement::CHAR_CLASS: { 3846 case TextElement::CHAR_CLASS: {
3830 RegExpCharacterClass* node = elm.char_class(); 3847 RegExpCharacterClass* node = elm.char_class();
3831 OS::Print("["); 3848 os_ << "[";
3832 if (node->is_negated()) OS::Print("^"); 3849 if (node->is_negated()) os_ << "^";
3833 for (intptr_t j = 0; j < node->ranges()->length(); j++) { 3850 for (int j = 0; j < node->ranges(zone)->length(); j++) {
3834 CharacterRange range = node->ranges()->At(j); 3851 CharacterRange range = node->ranges(zone)->at(j);
3835 PrintUtf16(range.from()); 3852 os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
3836 OS::Print("-");
3837 PrintUtf16(range.to());
3838 } 3853 }
3839 OS::Print("]"); 3854 os_ << "]";
3840 break; 3855 break;
3841 } 3856 }
3842 default: 3857 default:
3843 UNREACHABLE(); 3858 UNREACHABLE();
3844 } 3859 }
3845 } 3860 }
3846 OS::Print("\", shape=box, peripheries=2];\n"); 3861 os_ << "\", shape=box, peripheries=2];\n";
3847 PrintAttributes(that); 3862 PrintAttributes(that);
3848 OS::Print(" n%p -> n%p;\n", that, that->on_success()); 3863 os_ << " n" << that << " -> n" << that->on_success() << ";\n";
3849 Visit(that->on_success()); 3864 Visit(that->on_success());
3850 } 3865 }
3851 3866
3852 3867
3853 void DotPrinter::VisitBackReference(BackReferenceNode* that) { 3868 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
3854 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", 3869 os_ << " n" << that << " [label=\"$" << that->start_register() << "..$"
3855 that, that->start_register(), that->end_register()); 3870 << that->end_register() << "\", shape=doubleoctagon];\n";
3856 PrintAttributes(that); 3871 PrintAttributes(that);
3857 OS::Print(" n%p -> n%p;\n", that, that->on_success()); 3872 os_ << " n" << that << " -> n" << that->on_success() << ";\n";
3858 Visit(that->on_success()); 3873 Visit(that->on_success());
3859 } 3874 }
3860 3875
3861 3876
3862 void DotPrinter::VisitEnd(EndNode* that) { 3877 void DotPrinter::VisitEnd(EndNode* that) {
3863 OS::Print(" n%p [style=bold, shape=point];\n", that); 3878 os_ << " n" << that << " [style=bold, shape=point];\n";
3864 PrintAttributes(that); 3879 PrintAttributes(that);
3865 } 3880 }
3866 3881
3867 3882
3868 void DotPrinter::VisitAssertion(AssertionNode* that) { 3883 void DotPrinter::VisitAssertion(AssertionNode* that) {
3869 OS::Print(" n%p [", that); 3884 os_ << " n" << that << " [";
3870 switch (that->assertion_type()) { 3885 switch (that->assertion_type()) {
3871 case AssertionNode::AT_END: 3886 case AssertionNode::AT_END:
3872 OS::Print("label=\"$\", shape=septagon"); 3887 os_ << "label=\"$\", shape=septagon";
3873 break; 3888 break;
3874 case AssertionNode::AT_START: 3889 case AssertionNode::AT_START:
3875 OS::Print("label=\"^\", shape=septagon"); 3890 os_ << "label=\"^\", shape=septagon";
3876 break; 3891 break;
3877 case AssertionNode::AT_BOUNDARY: 3892 case AssertionNode::AT_BOUNDARY:
3878 OS::Print("label=\"\\b\", shape=septagon"); 3893 os_ << "label=\"\\b\", shape=septagon";
3879 break; 3894 break;
3880 case AssertionNode::AT_NON_BOUNDARY: 3895 case AssertionNode::AT_NON_BOUNDARY:
3881 OS::Print("label=\"\\B\", shape=septagon"); 3896 os_ << "label=\"\\B\", shape=septagon";
3882 break; 3897 break;
3883 case AssertionNode::AFTER_NEWLINE: 3898 case AssertionNode::AFTER_NEWLINE:
3884 OS::Print("label=\"(?<=\\n)\", shape=septagon"); 3899 os_ << "label=\"(?<=\\n)\", shape=septagon";
3885 break; 3900 break;
3886 } 3901 }
3887 OS::Print("];\n"); 3902 os_ << "];\n";
3888 PrintAttributes(that); 3903 PrintAttributes(that);
3889 RegExpNode* successor = that->on_success(); 3904 RegExpNode* successor = that->on_success();
3890 OS::Print(" n%p -> n%p;\n", that, successor); 3905 os_ << " n" << that << " -> n" << successor << ";\n";
3891 Visit(successor); 3906 Visit(successor);
3892 } 3907 }
3893 3908
3894 3909
3895 void DotPrinter::VisitAction(ActionNode* that) { 3910 void DotPrinter::VisitAction(ActionNode* that) {
3896 OS::Print(" n%p [", that); 3911 os_ << " n" << that << " [";
3897 switch (that->action_type_) { 3912 switch (that->action_type_) {
3898 case ActionNode::SET_REGISTER: 3913 case ActionNode::SET_REGISTER:
3899 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", 3914 os_ << "label=\"$" << that->data_.u_store_register.reg
3900 that->data_.u_store_register.reg, 3915 << ":=" << that->data_.u_store_register.value << "\", shape=octagon";
3901 that->data_.u_store_register.value);
3902 break; 3916 break;
3903 case ActionNode::INCREMENT_REGISTER: 3917 case ActionNode::INCREMENT_REGISTER:
3904 OS::Print("label=\"$%" Pd "++\", shape=octagon", 3918 os_ << "label=\"$" << that->data_.u_increment_register.reg
3905 that->data_.u_increment_register.reg); 3919 << "++\", shape=octagon";
3906 break; 3920 break;
3907 case ActionNode::STORE_POSITION: 3921 case ActionNode::STORE_POSITION:
3908 OS::Print("label=\"$%" Pd ":=$pos\", shape=octagon", 3922 os_ << "label=\"$" << that->data_.u_position_register.reg
3909 that->data_.u_position_register.reg); 3923 << ":=$pos\", shape=octagon";
3910 break; 3924 break;
3911 case ActionNode::BEGIN_SUBMATCH: 3925 case ActionNode::BEGIN_SUBMATCH:
3912 OS::Print("label=\"$%" Pd ":=$pos,begin\", shape=septagon", 3926 os_ << "label=\"$" << that->data_.u_submatch.current_position_register
3913 that->data_.u_submatch.current_position_register); 3927 << ":=$pos,begin\", shape=septagon";
3914 break; 3928 break;
3915 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: 3929 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3916 OS::Print("label=\"escape\", shape=septagon"); 3930 os_ << "label=\"escape\", shape=septagon";
3917 break; 3931 break;
3918 case ActionNode::EMPTY_MATCH_CHECK: 3932 case ActionNode::EMPTY_MATCH_CHECK:
3919 OS::Print("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon", 3933 os_ << "label=\"$" << that->data_.u_empty_match_check.start_register
3920 that->data_.u_empty_match_check.start_register, 3934 << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register
3921 that->data_.u_empty_match_check.repetition_register, 3935 << "<" << that->data_.u_empty_match_check.repetition_limit
3922 that->data_.u_empty_match_check.repetition_limit); 3936 << "?\", shape=septagon";
3923 break; 3937 break;
3924 case ActionNode::CLEAR_CAPTURES: { 3938 case ActionNode::CLEAR_CAPTURES: {
3925 OS::Print("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon", 3939 os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from
3926 that->data_.u_clear_captures.range_from, 3940 << " to $" << that->data_.u_clear_captures.range_to
3927 that->data_.u_clear_captures.range_to); 3941 << "\", shape=septagon";
3928 break; 3942 break;
3929 } 3943 }
3930 } 3944 }
3931 OS::Print("];\n"); 3945 os_ << "];\n";
3932 PrintAttributes(that); 3946 PrintAttributes(that);
3933 RegExpNode* successor = that->on_success(); 3947 RegExpNode* successor = that->on_success();
3934 OS::Print(" n%p -> n%p;\n", that, successor); 3948 os_ << " n" << that << " -> n" << successor << ";\n";
3935 Visit(successor); 3949 Visit(successor);
3936 } 3950 }
3937 3951
3938 3952
3953 class DispatchTableDumper {
3954 public:
3955 explicit DispatchTableDumper(OStream& os) : os_(os) {}
3956 void Call(uc16 key, DispatchTable::Entry entry);
3957 private:
3958 OStream& os_;
3959 };
3960
3961
3962 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
3963 os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {";
3964 OutSet* set = entry.out_set();
3965 bool first = true;
3966 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
3967 if (set->Get(i)) {
3968 if (first) {
3969 first = false;
3970 } else {
3971 os_ << ", ";
3972 }
3973 os_ << i;
3974 }
3975 }
3976 os_ << "}\n";
3977 }
3978
3979
3980 void DispatchTable::Dump() {
3981 OFStream os(stderr);
3982 DispatchTableDumper dumper(os);
3983 tree()->ForEach(&dumper);
3984 }
3985
3986
3939 void RegExpEngine::DotPrint(const char* label, 3987 void RegExpEngine::DotPrint(const char* label,
3940 RegExpNode* node, 3988 RegExpNode* node,
3941 bool ignore_case) { 3989 bool ignore_case) {
3942 DotPrinter printer(ignore_case); 3990 OFStream os(stdout);
3991 DotPrinter printer(os, ignore_case);
3943 printer.PrintNode(label, node); 3992 printer.PrintNode(label, node);
3944 } 3993 }
3945 3994
3946 3995
3947 #endif // DEBUG 3996 #endif // DEBUG
3948 3997
3949 3998
3950 // ------------------------------------------------------------------- 3999 // -------------------------------------------------------------------
3951 // Tree to graph conversion 4000 // Tree to graph conversion
3952 4001
3953 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 4002 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
3954 RegExpNode* on_success) { 4003 RegExpNode* on_success) {
3955 ZoneGrowableArray<TextElement>* elms = 4004 ZoneList<TextElement>* elms =
3956 new(CI) ZoneGrowableArray<TextElement>(1); 4005 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
3957 elms->Add(TextElement::Atom(this)); 4006 elms->Add(TextElement::Atom(this), compiler->zone());
3958 return new(CI) TextNode(elms, on_success); 4007 return new(compiler->zone()) TextNode(elms, on_success);
3959 } 4008 }
3960 4009
3961 4010
3962 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4011 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
3963 RegExpNode* on_success) { 4012 RegExpNode* on_success) {
3964 ZoneGrowableArray<TextElement>* elms = 4013 return new(compiler->zone()) TextNode(elements(), on_success);
3965 new(CI) ZoneGrowableArray<TextElement>(1);
3966 for (intptr_t i = 0; i < elements()->length(); i++) {
3967 elms->Add(elements()->At(i));
3968 }
3969 return new(CI) TextNode(elms, on_success);
3970 } 4014 }
3971 4015
3972 4016
3973 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, 4017 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
3974 const intptr_t* special_class, 4018 const int* special_class,
3975 intptr_t length) { 4019 int length) {
3976 length--; // Remove final 0x10000. 4020 length--; // Remove final 0x10000.
3977 ASSERT(special_class[length] == 0x10000); 4021 DCHECK(special_class[length] == 0x10000);
3978 ASSERT(ranges->length() != 0); 4022 DCHECK(ranges->length() != 0);
3979 ASSERT(length != 0); 4023 DCHECK(length != 0);
3980 ASSERT(special_class[0] != 0); 4024 DCHECK(special_class[0] != 0);
3981 if (ranges->length() != (length >> 1) + 1) { 4025 if (ranges->length() != (length >> 1) + 1) {
3982 return false; 4026 return false;
3983 } 4027 }
3984 CharacterRange range = ranges->At(0); 4028 CharacterRange range = ranges->at(0);
3985 if (range.from() != 0) { 4029 if (range.from() != 0) {
3986 return false; 4030 return false;
3987 } 4031 }
3988 for (intptr_t i = 0; i < length; i += 2) { 4032 for (int i = 0; i < length; i += 2) {
3989 if (special_class[i] != (range.to() + 1)) { 4033 if (special_class[i] != (range.to() + 1)) {
3990 return false; 4034 return false;
3991 } 4035 }
3992 range = ranges->At((i >> 1) + 1); 4036 range = ranges->at((i >> 1) + 1);
3993 if (special_class[i+1] != range.from()) { 4037 if (special_class[i+1] != range.from()) {
3994 return false; 4038 return false;
3995 } 4039 }
3996 } 4040 }
3997 if (range.to() != 0xffff) { 4041 if (range.to() != 0xffff) {
3998 return false; 4042 return false;
3999 } 4043 }
4000 return true; 4044 return true;
4001 } 4045 }
4002 4046
4003 4047
4004 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, 4048 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4005 const intptr_t* special_class, 4049 const int* special_class,
4006 intptr_t length) { 4050 int length) {
4007 length--; // Remove final 0x10000. 4051 length--; // Remove final 0x10000.
4008 ASSERT(special_class[length] == 0x10000); 4052 DCHECK(special_class[length] == 0x10000);
4009 if (ranges->length() * 2 != length) { 4053 if (ranges->length() * 2 != length) {
4010 return false; 4054 return false;
4011 } 4055 }
4012 for (intptr_t i = 0; i < length; i += 2) { 4056 for (int i = 0; i < length; i += 2) {
4013 CharacterRange range = ranges->At(i >> 1); 4057 CharacterRange range = ranges->at(i >> 1);
4014 if (range.from() != special_class[i] || 4058 if (range.from() != special_class[i] ||
4015 range.to() != special_class[i + 1] - 1) { 4059 range.to() != special_class[i + 1] - 1) {
4016 return false; 4060 return false;
4017 } 4061 }
4018 } 4062 }
4019 return true; 4063 return true;
4020 } 4064 }
4021 4065
4022 4066
4023 bool RegExpCharacterClass::is_standard() { 4067 bool RegExpCharacterClass::is_standard(Zone* zone) {
4024 // TODO(lrn): Remove need for this function, by not throwing away information 4068 // TODO(lrn): Remove need for this function, by not throwing away information
4025 // along the way. 4069 // along the way.
4026 if (is_negated_) { 4070 if (is_negated_) {
4027 return false; 4071 return false;
4028 } 4072 }
4029 if (set_.is_standard()) { 4073 if (set_.is_standard()) {
4030 return true; 4074 return true;
4031 } 4075 }
4032 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { 4076 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4033 set_.set_standard_set_type('s'); 4077 set_.set_standard_set_type('s');
4034 return true; 4078 return true;
4035 } 4079 }
4036 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { 4080 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4037 set_.set_standard_set_type('S'); 4081 set_.set_standard_set_type('S');
4038 return true; 4082 return true;
4039 } 4083 }
4040 if (CompareInverseRanges(set_.ranges(), 4084 if (CompareInverseRanges(set_.ranges(zone),
4041 kLineTerminatorRanges, 4085 kLineTerminatorRanges,
4042 kLineTerminatorRangeCount)) { 4086 kLineTerminatorRangeCount)) {
4043 set_.set_standard_set_type('.'); 4087 set_.set_standard_set_type('.');
4044 return true; 4088 return true;
4045 } 4089 }
4046 if (CompareRanges(set_.ranges(), 4090 if (CompareRanges(set_.ranges(zone),
4047 kLineTerminatorRanges, 4091 kLineTerminatorRanges,
4048 kLineTerminatorRangeCount)) { 4092 kLineTerminatorRangeCount)) {
4049 set_.set_standard_set_type('n'); 4093 set_.set_standard_set_type('n');
4050 return true; 4094 return true;
4051 } 4095 }
4052 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { 4096 if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4053 set_.set_standard_set_type('w'); 4097 set_.set_standard_set_type('w');
4054 return true; 4098 return true;
4055 } 4099 }
4056 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { 4100 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4057 set_.set_standard_set_type('W'); 4101 set_.set_standard_set_type('W');
4058 return true; 4102 return true;
4059 } 4103 }
4060 return false; 4104 return false;
4061 } 4105 }
4062 4106
4063 4107
4064 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 4108 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4065 RegExpNode* on_success) { 4109 RegExpNode* on_success) {
4066 return new(CI) TextNode(this, on_success); 4110 return new(compiler->zone()) TextNode(this, on_success);
4067 } 4111 }
4068 4112
4069 4113
4070 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 4114 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
4071 RegExpNode* on_success) { 4115 RegExpNode* on_success) {
4072 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); 4116 ZoneList<RegExpTree*>* alternatives = this->alternatives();
4073 intptr_t length = alternatives->length(); 4117 int length = alternatives->length();
4074 ChoiceNode* result = 4118 ChoiceNode* result =
4075 new(CI) ChoiceNode(length, CI); 4119 new(compiler->zone()) ChoiceNode(length, compiler->zone());
4076 for (intptr_t i = 0; i < length; i++) { 4120 for (int i = 0; i < length; i++) {
4077 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, 4121 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
4078 on_success)); 4122 on_success));
4079 result->AddAlternative(alternative); 4123 result->AddAlternative(alternative);
4080 } 4124 }
4081 return result; 4125 return result;
4082 } 4126 }
4083 4127
4084 4128
4085 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 4129 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
4086 RegExpNode* on_success) { 4130 RegExpNode* on_success) {
4087 return ToNode(min(), 4131 return ToNode(min(),
4088 max(), 4132 max(),
4089 is_greedy(), 4133 is_greedy(),
4090 body(), 4134 body(),
4091 compiler, 4135 compiler,
4092 on_success); 4136 on_success);
4093 } 4137 }
4094 4138
4095 4139
4096 // Scoped object to keep track of how much we unroll quantifier loops in the 4140 // Scoped object to keep track of how much we unroll quantifier loops in the
4097 // regexp graph generator. 4141 // regexp graph generator.
4098 class RegExpExpansionLimiter : public ValueObject { 4142 class RegExpExpansionLimiter {
4099 public: 4143 public:
4100 static const intptr_t kMaxExpansionFactor = 6; 4144 static const int kMaxExpansionFactor = 6;
4101 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) 4145 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
4102 : compiler_(compiler), 4146 : compiler_(compiler),
4103 saved_expansion_factor_(compiler->current_expansion_factor()), 4147 saved_expansion_factor_(compiler->current_expansion_factor()),
4104 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { 4148 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
4105 ASSERT(factor > 0); 4149 DCHECK(factor > 0);
4106 if (ok_to_expand_) { 4150 if (ok_to_expand_) {
4107 if (factor > kMaxExpansionFactor) { 4151 if (factor > kMaxExpansionFactor) {
4108 // Avoid integer overflow of the current expansion factor. 4152 // Avoid integer overflow of the current expansion factor.
4109 ok_to_expand_ = false; 4153 ok_to_expand_ = false;
4110 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); 4154 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
4111 } else { 4155 } else {
4112 intptr_t new_factor = saved_expansion_factor_ * factor; 4156 int new_factor = saved_expansion_factor_ * factor;
4113 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); 4157 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
4114 compiler->set_current_expansion_factor(new_factor); 4158 compiler->set_current_expansion_factor(new_factor);
4115 } 4159 }
4116 } 4160 }
4117 } 4161 }
4118 4162
4119 ~RegExpExpansionLimiter() { 4163 ~RegExpExpansionLimiter() {
4120 compiler_->set_current_expansion_factor(saved_expansion_factor_); 4164 compiler_->set_current_expansion_factor(saved_expansion_factor_);
4121 } 4165 }
4122 4166
4123 bool ok_to_expand() { return ok_to_expand_; } 4167 bool ok_to_expand() { return ok_to_expand_; }
4124 4168
4125 private: 4169 private:
4126 RegExpCompiler* compiler_; 4170 RegExpCompiler* compiler_;
4127 intptr_t saved_expansion_factor_; 4171 int saved_expansion_factor_;
4128 bool ok_to_expand_; 4172 bool ok_to_expand_;
4129 4173
4130 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); 4174 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
4131 }; 4175 };
4132 4176
4133 4177
4134 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, 4178 RegExpNode* RegExpQuantifier::ToNode(int min,
4135 intptr_t max, 4179 int max,
4136 bool is_greedy, 4180 bool is_greedy,
4137 RegExpTree* body, 4181 RegExpTree* body,
4138 RegExpCompiler* compiler, 4182 RegExpCompiler* compiler,
4139 RegExpNode* on_success, 4183 RegExpNode* on_success,
4140 bool not_at_start) { 4184 bool not_at_start) {
4141 // x{f, t} becomes this: 4185 // x{f, t} becomes this:
4142 // 4186 //
4143 // (r++)<-. 4187 // (r++)<-.
4144 // | ` 4188 // | `
4145 // | (x) 4189 // | (x)
4146 // v ^ 4190 // v ^
4147 // (r=0)-->(?)---/ [if r < t] 4191 // (r=0)-->(?)---/ [if r < t]
4148 // | 4192 // |
4149 // [if r >= f] \----> ... 4193 // [if r >= f] \----> ...
4150 // 4194 //
4151 4195
4152 // 15.10.2.5 RepeatMatcher algorithm. 4196 // 15.10.2.5 RepeatMatcher algorithm.
4153 // The parser has already eliminated the case where max is 0. In the case 4197 // The parser has already eliminated the case where max is 0. In the case
4154 // where max_match is zero the parser has removed the quantifier if min was 4198 // where max_match is zero the parser has removed the quantifier if min was
4155 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. 4199 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
4156 4200
4157 // If we know that we cannot match zero length then things are a little 4201 // If we know that we cannot match zero length then things are a little
4158 // simpler since we don't need to make the special zero length match check 4202 // simpler since we don't need to make the special zero length match check
4159 // from step 2.1. If the min and max are small we can unroll a little in 4203 // from step 2.1. If the min and max are small we can unroll a little in
4160 // this case. 4204 // this case.
4161 // Unroll (foo)+ and (foo){3,} 4205 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
4162 static const intptr_t kMaxUnrolledMinMatches = 3; 4206 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
4163 // Unroll (foo)? and (foo){x,3}
4164 static const intptr_t kMaxUnrolledMaxMatches = 3;
4165 if (max == 0) return on_success; // This can happen due to recursion. 4207 if (max == 0) return on_success; // This can happen due to recursion.
4166 bool body_can_be_empty = (body->min_match() == 0); 4208 bool body_can_be_empty = (body->min_match() == 0);
4167 intptr_t body_start_reg = RegExpCompiler::kNoRegister; 4209 int body_start_reg = RegExpCompiler::kNoRegister;
4168 Interval capture_registers = body->CaptureRegisters(); 4210 Interval capture_registers = body->CaptureRegisters();
4169 bool needs_capture_clearing = !capture_registers.is_empty(); 4211 bool needs_capture_clearing = !capture_registers.is_empty();
4170 Isolate* isolate = compiler->isolate(); 4212 Zone* zone = compiler->zone();
4171 4213
4172 if (body_can_be_empty) { 4214 if (body_can_be_empty) {
4173 body_start_reg = compiler->AllocateRegister(); 4215 body_start_reg = compiler->AllocateRegister();
4174 } else if (kRegexpOptimization && !needs_capture_clearing) { 4216 } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
4175 // Only unroll if there are no captures and the body can't be 4217 // Only unroll if there are no captures and the body can't be
4176 // empty. 4218 // empty.
4177 { 4219 {
4178 RegExpExpansionLimiter limiter( 4220 RegExpExpansionLimiter limiter(
4179 compiler, min + ((max != min) ? 1 : 0)); 4221 compiler, min + ((max != min) ? 1 : 0));
4180 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { 4222 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4181 intptr_t new_max = (max == kInfinity) ? max : max - min; 4223 int new_max = (max == kInfinity) ? max : max - min;
4182 // Recurse once to get the loop or optional matches after the fixed 4224 // Recurse once to get the loop or optional matches after the fixed
4183 // ones. 4225 // ones.
4184 RegExpNode* answer = ToNode( 4226 RegExpNode* answer = ToNode(
4185 0, new_max, is_greedy, body, compiler, on_success, true); 4227 0, new_max, is_greedy, body, compiler, on_success, true);
4186 // Unroll the forced matches from 0 to min. This can cause chains of 4228 // Unroll the forced matches from 0 to min. This can cause chains of
4187 // TextNodes (which the parser does not generate). These should be 4229 // TextNodes (which the parser does not generate). These should be
4188 // combined if it turns out they hinder good code generation. 4230 // combined if it turns out they hinder good code generation.
4189 for (intptr_t i = 0; i < min; i++) { 4231 for (int i = 0; i < min; i++) {
4190 answer = body->ToNode(compiler, answer); 4232 answer = body->ToNode(compiler, answer);
4191 } 4233 }
4192 return answer; 4234 return answer;
4193 } 4235 }
4194 } 4236 }
4195 if (max <= kMaxUnrolledMaxMatches && min == 0) { 4237 if (max <= kMaxUnrolledMaxMatches && min == 0) {
4196 ASSERT(max > 0); // Due to the 'if' above. 4238 DCHECK(max > 0); // Due to the 'if' above.
4197 RegExpExpansionLimiter limiter(compiler, max); 4239 RegExpExpansionLimiter limiter(compiler, max);
4198 if (limiter.ok_to_expand()) { 4240 if (limiter.ok_to_expand()) {
4199 // Unroll the optional matches up to max. 4241 // Unroll the optional matches up to max.
4200 RegExpNode* answer = on_success; 4242 RegExpNode* answer = on_success;
4201 for (intptr_t i = 0; i < max; i++) { 4243 for (int i = 0; i < max; i++) {
4202 ChoiceNode* alternation = new(isolate) ChoiceNode(2, isolate); 4244 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone);
4203 if (is_greedy) { 4245 if (is_greedy) {
4204 alternation->AddAlternative( 4246 alternation->AddAlternative(
4205 GuardedAlternative(body->ToNode(compiler, answer))); 4247 GuardedAlternative(body->ToNode(compiler, answer)));
4206 alternation->AddAlternative(GuardedAlternative(on_success)); 4248 alternation->AddAlternative(GuardedAlternative(on_success));
4207 } else { 4249 } else {
4208 alternation->AddAlternative(GuardedAlternative(on_success)); 4250 alternation->AddAlternative(GuardedAlternative(on_success));
4209 alternation->AddAlternative( 4251 alternation->AddAlternative(
4210 GuardedAlternative(body->ToNode(compiler, answer))); 4252 GuardedAlternative(body->ToNode(compiler, answer)));
4211 } 4253 }
4212 answer = alternation; 4254 answer = alternation;
4213 if (not_at_start) alternation->set_not_at_start(); 4255 if (not_at_start) alternation->set_not_at_start();
4214 } 4256 }
4215 return answer; 4257 return answer;
4216 } 4258 }
4217 } 4259 }
4218 } 4260 }
4219 bool has_min = min > 0; 4261 bool has_min = min > 0;
4220 bool has_max = max < RegExpTree::kInfinity; 4262 bool has_max = max < RegExpTree::kInfinity;
4221 bool needs_counter = has_min || has_max; 4263 bool needs_counter = has_min || has_max;
4222 intptr_t reg_ctr = needs_counter 4264 int reg_ctr = needs_counter
4223 ? compiler->AllocateRegister() 4265 ? compiler->AllocateRegister()
4224 : RegExpCompiler::kNoRegister; 4266 : RegExpCompiler::kNoRegister;
4225 LoopChoiceNode* center = new(isolate) LoopChoiceNode(body->min_match() == 0, 4267 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
4226 isolate); 4268 zone);
4227 if (not_at_start) center->set_not_at_start(); 4269 if (not_at_start) center->set_not_at_start();
4228 RegExpNode* loop_return = needs_counter 4270 RegExpNode* loop_return = needs_counter
4229 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 4271 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
4230 : static_cast<RegExpNode*>(center); 4272 : static_cast<RegExpNode*>(center);
4231 if (body_can_be_empty) { 4273 if (body_can_be_empty) {
4232 // If the body can be empty we need to check if it was and then 4274 // If the body can be empty we need to check if it was and then
4233 // backtrack. 4275 // backtrack.
4234 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, 4276 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
4235 reg_ctr, 4277 reg_ctr,
4236 min, 4278 min,
4237 loop_return); 4279 loop_return);
4238 } 4280 }
4239 RegExpNode* body_node = body->ToNode(compiler, loop_return); 4281 RegExpNode* body_node = body->ToNode(compiler, loop_return);
4240 if (body_can_be_empty) { 4282 if (body_can_be_empty) {
4241 // If the body can be empty we need to store the start position 4283 // If the body can be empty we need to store the start position
4242 // so we can bail out if it was empty. 4284 // so we can bail out if it was empty.
4243 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); 4285 body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
4244 } 4286 }
4245 if (needs_capture_clearing) { 4287 if (needs_capture_clearing) {
4246 // Before entering the body of this loop we need to clear captures. 4288 // Before entering the body of this loop we need to clear captures.
4247 body_node = ActionNode::ClearCaptures(capture_registers, body_node); 4289 body_node = ActionNode::ClearCaptures(capture_registers, body_node);
4248 } 4290 }
4249 GuardedAlternative body_alt(body_node); 4291 GuardedAlternative body_alt(body_node);
4250 if (has_max) { 4292 if (has_max) {
4251 Guard* body_guard = 4293 Guard* body_guard =
4252 new(isolate) Guard(reg_ctr, Guard::LT, max); 4294 new(zone) Guard(reg_ctr, Guard::LT, max);
4253 body_alt.AddGuard(body_guard, isolate); 4295 body_alt.AddGuard(body_guard, zone);
4254 } 4296 }
4255 GuardedAlternative rest_alt(on_success); 4297 GuardedAlternative rest_alt(on_success);
4256 if (has_min) { 4298 if (has_min) {
4257 Guard* rest_guard = new(isolate) Guard(reg_ctr, Guard::GEQ, min); 4299 Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
4258 rest_alt.AddGuard(rest_guard, isolate); 4300 rest_alt.AddGuard(rest_guard, zone);
4259 } 4301 }
4260 if (is_greedy) { 4302 if (is_greedy) {
4261 center->AddLoopAlternative(body_alt); 4303 center->AddLoopAlternative(body_alt);
4262 center->AddContinueAlternative(rest_alt); 4304 center->AddContinueAlternative(rest_alt);
4263 } else { 4305 } else {
4264 center->AddContinueAlternative(rest_alt); 4306 center->AddContinueAlternative(rest_alt);
4265 center->AddLoopAlternative(body_alt); 4307 center->AddLoopAlternative(body_alt);
4266 } 4308 }
4267 if (needs_counter) { 4309 if (needs_counter) {
4268 return ActionNode::SetRegister(reg_ctr, 0, center); 4310 return ActionNode::SetRegister(reg_ctr, 0, center);
4269 } else { 4311 } else {
4270 return center; 4312 return center;
4271 } 4313 }
4272 } 4314 }
4273 4315
4274 4316
4275 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 4317 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
4276 RegExpNode* on_success) { 4318 RegExpNode* on_success) {
4319 NodeInfo info;
4320 Zone* zone = compiler->zone();
4321
4277 switch (assertion_type()) { 4322 switch (assertion_type()) {
4278 case START_OF_LINE: 4323 case START_OF_LINE:
4279 return AssertionNode::AfterNewline(on_success); 4324 return AssertionNode::AfterNewline(on_success);
4280 case START_OF_INPUT: 4325 case START_OF_INPUT:
4281 return AssertionNode::AtStart(on_success); 4326 return AssertionNode::AtStart(on_success);
4282 case BOUNDARY: 4327 case BOUNDARY:
4283 return AssertionNode::AtBoundary(on_success); 4328 return AssertionNode::AtBoundary(on_success);
4284 case NON_BOUNDARY: 4329 case NON_BOUNDARY:
4285 return AssertionNode::AtNonBoundary(on_success); 4330 return AssertionNode::AtNonBoundary(on_success);
4286 case END_OF_INPUT: 4331 case END_OF_INPUT:
4287 return AssertionNode::AtEnd(on_success); 4332 return AssertionNode::AtEnd(on_success);
4288 case END_OF_LINE: { 4333 case END_OF_LINE: {
4289 // Compile $ in multiline regexps as an alternation with a positive 4334 // Compile $ in multiline regexps as an alternation with a positive
4290 // lookahead in one side and an end-of-input on the other side. 4335 // lookahead in one side and an end-of-input on the other side.
4291 // We need two registers for the lookahead. 4336 // We need two registers for the lookahead.
4292 intptr_t stack_pointer_register = compiler->AllocateRegister(); 4337 int stack_pointer_register = compiler->AllocateRegister();
4293 intptr_t position_register = compiler->AllocateRegister(); 4338 int position_register = compiler->AllocateRegister();
4294 // The ChoiceNode to distinguish between a newline and end-of-input. 4339 // The ChoiceNode to distinguish between a newline and end-of-input.
4295 ChoiceNode* result = new ChoiceNode(2, on_success->isolate()); 4340 ChoiceNode* result = new(zone) ChoiceNode(2, zone);
4296 // Create a newline atom. 4341 // Create a newline atom.
4297 ZoneGrowableArray<CharacterRange>* newline_ranges = 4342 ZoneList<CharacterRange>* newline_ranges =
4298 new ZoneGrowableArray<CharacterRange>(3); 4343 new(zone) ZoneList<CharacterRange>(3, zone);
4299 CharacterRange::AddClassEscape('n', newline_ranges); 4344 CharacterRange::AddClassEscape('n', newline_ranges, zone);
4300 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); 4345 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
4301 TextNode* newline_matcher = new TextNode( 4346 TextNode* newline_matcher = new(zone) TextNode(
4302 newline_atom, 4347 newline_atom,
4303 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 4348 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
4304 position_register, 4349 position_register,
4305 0, // No captures inside. 4350 0, // No captures inside.
4306 -1, // Ignored if no captures. 4351 -1, // Ignored if no captures.
4307 on_success)); 4352 on_success));
4308 // Create an end-of-input matcher. 4353 // Create an end-of-input matcher.
4309 RegExpNode* end_of_line = ActionNode::BeginSubmatch( 4354 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
4310 stack_pointer_register, 4355 stack_pointer_register,
4311 position_register, 4356 position_register,
4312 newline_matcher); 4357 newline_matcher);
4313 // Add the two alternatives to the ChoiceNode. 4358 // Add the two alternatives to the ChoiceNode.
4314 GuardedAlternative eol_alternative(end_of_line); 4359 GuardedAlternative eol_alternative(end_of_line);
4315 result->AddAlternative(eol_alternative); 4360 result->AddAlternative(eol_alternative);
4316 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 4361 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4317 result->AddAlternative(end_alternative); 4362 result->AddAlternative(end_alternative);
4318 return result; 4363 return result;
4319 } 4364 }
4320 default: 4365 default:
4321 UNREACHABLE(); 4366 UNREACHABLE();
4322 } 4367 }
4323 return on_success; 4368 return on_success;
4324 } 4369 }
4325 4370
4326 4371
4327 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 4372 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
4328 RegExpNode* on_success) { 4373 RegExpNode* on_success) {
4329 return new(CI) 4374 return new(compiler->zone())
4330 BackReferenceNode(RegExpCapture::StartRegister(index()), 4375 BackReferenceNode(RegExpCapture::StartRegister(index()),
4331 RegExpCapture::EndRegister(index()), 4376 RegExpCapture::EndRegister(index()),
4332 on_success); 4377 on_success);
4333 } 4378 }
4334 4379
4335 4380
4336 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 4381 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
4337 RegExpNode* on_success) { 4382 RegExpNode* on_success) {
4338 return on_success; 4383 return on_success;
4339 } 4384 }
4340 4385
4341 4386
4342 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 4387 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
4343 RegExpNode* on_success) { 4388 RegExpNode* on_success) {
4344 intptr_t stack_pointer_register = compiler->AllocateRegister(); 4389 int stack_pointer_register = compiler->AllocateRegister();
4345 intptr_t position_register = compiler->AllocateRegister(); 4390 int position_register = compiler->AllocateRegister();
4346 4391
4347 const intptr_t registers_per_capture = 2; 4392 const int registers_per_capture = 2;
4348 const intptr_t register_of_first_capture = 2; 4393 const int register_of_first_capture = 2;
4349 intptr_t register_count = capture_count_ * registers_per_capture; 4394 int register_count = capture_count_ * registers_per_capture;
4350 intptr_t register_start = 4395 int register_start =
4351 register_of_first_capture + capture_from_ * registers_per_capture; 4396 register_of_first_capture + capture_from_ * registers_per_capture;
4352 4397
4353 RegExpNode* success; 4398 RegExpNode* success;
4354 if (is_positive()) { 4399 if (is_positive()) {
4355 RegExpNode* node = ActionNode::BeginSubmatch( 4400 RegExpNode* node = ActionNode::BeginSubmatch(
4356 stack_pointer_register, 4401 stack_pointer_register,
4357 position_register, 4402 position_register,
4358 body()->ToNode( 4403 body()->ToNode(
4359 compiler, 4404 compiler,
4360 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 4405 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
4361 position_register, 4406 position_register,
4362 register_count, 4407 register_count,
4363 register_start, 4408 register_start,
4364 on_success))); 4409 on_success)));
4365 return node; 4410 return node;
4366 } else { 4411 } else {
4367 // We use a ChoiceNode for a negative lookahead because it has most of 4412 // We use a ChoiceNode for a negative lookahead because it has most of
4368 // the characteristics we need. It has the body of the lookahead as its 4413 // the characteristics we need. It has the body of the lookahead as its
4369 // first alternative and the expression after the lookahead of the second 4414 // first alternative and the expression after the lookahead of the second
4370 // alternative. If the first alternative succeeds then the 4415 // alternative. If the first alternative succeeds then the
4371 // NegativeSubmatchSuccess will unwind the stack including everything the 4416 // NegativeSubmatchSuccess will unwind the stack including everything the
4372 // choice node set up and backtrack. If the first alternative fails then 4417 // choice node set up and backtrack. If the first alternative fails then
4373 // the second alternative is tried, which is exactly the desired result 4418 // the second alternative is tried, which is exactly the desired result
4374 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special 4419 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
4375 // ChoiceNode that knows to ignore the first exit when calculating quick 4420 // ChoiceNode that knows to ignore the first exit when calculating quick
4376 // checks. 4421 // checks.
4422 Zone* zone = compiler->zone();
4377 4423
4378 GuardedAlternative body_alt( 4424 GuardedAlternative body_alt(
4379 body()->ToNode( 4425 body()->ToNode(
4380 compiler, 4426 compiler,
4381 success = new(CI) NegativeSubmatchSuccess(stack_pointer_register, 4427 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
4382 position_register, 4428 position_register,
4383 register_count, 4429 register_count,
4384 register_start, 4430 register_start,
4385 CI))); 4431 zone)));
4386 ChoiceNode* choice_node = 4432 ChoiceNode* choice_node =
4387 new(CI) NegativeLookaheadChoiceNode(body_alt, 4433 new(zone) NegativeLookaheadChoiceNode(body_alt,
4388 GuardedAlternative(on_success), 4434 GuardedAlternative(on_success),
4389 CI); 4435 zone);
4390 return ActionNode::BeginSubmatch(stack_pointer_register, 4436 return ActionNode::BeginSubmatch(stack_pointer_register,
4391 position_register, 4437 position_register,
4392 choice_node); 4438 choice_node);
4393 } 4439 }
4394 } 4440 }
4395 4441
4396 4442
4397 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 4443 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
4398 RegExpNode* on_success) { 4444 RegExpNode* on_success) {
4399 return ToNode(body(), index(), compiler, on_success); 4445 return ToNode(body(), index(), compiler, on_success);
4400 } 4446 }
4401 4447
4402 4448
4403 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 4449 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
4404 intptr_t index, 4450 int index,
4405 RegExpCompiler* compiler, 4451 RegExpCompiler* compiler,
4406 RegExpNode* on_success) { 4452 RegExpNode* on_success) {
4407 intptr_t start_reg = RegExpCapture::StartRegister(index); 4453 int start_reg = RegExpCapture::StartRegister(index);
4408 intptr_t end_reg = RegExpCapture::EndRegister(index); 4454 int end_reg = RegExpCapture::EndRegister(index);
4409 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); 4455 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
4410 RegExpNode* body_node = body->ToNode(compiler, store_end); 4456 RegExpNode* body_node = body->ToNode(compiler, store_end);
4411 return ActionNode::StorePosition(start_reg, true, body_node); 4457 return ActionNode::StorePosition(start_reg, true, body_node);
4412 } 4458 }
4413 4459
4414 4460
4415 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 4461 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
4416 RegExpNode* on_success) { 4462 RegExpNode* on_success) {
4417 ZoneGrowableArray<RegExpTree*>* children = nodes(); 4463 ZoneList<RegExpTree*>* children = nodes();
4418 RegExpNode* current = on_success; 4464 RegExpNode* current = on_success;
4419 for (intptr_t i = children->length() - 1; i >= 0; i--) { 4465 for (int i = children->length() - 1; i >= 0; i--) {
4420 current = children->At(i)->ToNode(compiler, current); 4466 current = children->at(i)->ToNode(compiler, current);
4421 } 4467 }
4422 return current; 4468 return current;
4423 } 4469 }
4424 4470
4425 4471
4426 static void AddClass(const intptr_t* elmv, 4472 static void AddClass(const int* elmv,
4427 intptr_t elmc, 4473 int elmc,
4428 ZoneGrowableArray<CharacterRange>* ranges) { 4474 ZoneList<CharacterRange>* ranges,
4475 Zone* zone) {
4429 elmc--; 4476 elmc--;
4430 ASSERT(elmv[elmc] == 0x10000); 4477 DCHECK(elmv[elmc] == 0x10000);
4431 for (intptr_t i = 0; i < elmc; i += 2) { 4478 for (int i = 0; i < elmc; i += 2) {
4432 ASSERT(elmv[i] < elmv[i + 1]); 4479 DCHECK(elmv[i] < elmv[i + 1]);
4433 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); 4480 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
4434 } 4481 }
4435 } 4482 }
4436 4483
4437 4484
4438 static void AddClassNegated(const intptr_t *elmv, 4485 static void AddClassNegated(const int *elmv,
4439 intptr_t elmc, 4486 int elmc,
4440 ZoneGrowableArray<CharacterRange>* ranges) { 4487 ZoneList<CharacterRange>* ranges,
4488 Zone* zone) {
4441 elmc--; 4489 elmc--;
4442 ASSERT(elmv[elmc] == 0x10000); 4490 DCHECK(elmv[elmc] == 0x10000);
4443 ASSERT(elmv[0] != 0x0000); 4491 DCHECK(elmv[0] != 0x0000);
4444 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); 4492 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
4445 uint16_t last = 0x0000; 4493 uc16 last = 0x0000;
4446 for (intptr_t i = 0; i < elmc; i += 2) { 4494 for (int i = 0; i < elmc; i += 2) {
4447 ASSERT(last <= elmv[i] - 1); 4495 DCHECK(last <= elmv[i] - 1);
4448 ASSERT(elmv[i] < elmv[i + 1]); 4496 DCHECK(elmv[i] < elmv[i + 1]);
4449 ranges->Add(CharacterRange(last, elmv[i] - 1)); 4497 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
4450 last = elmv[i + 1]; 4498 last = elmv[i + 1];
4451 } 4499 }
4452 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); 4500 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);
4453 } 4501 }
4454 4502
4455 4503
4456 void CharacterRange::AddClassEscape(uint16_t type, 4504 void CharacterRange::AddClassEscape(uc16 type,
4457 ZoneGrowableArray<CharacterRange>* ranges) { 4505 ZoneList<CharacterRange>* ranges,
4506 Zone* zone) {
4458 switch (type) { 4507 switch (type) {
4459 case 's': 4508 case 's':
4460 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); 4509 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
4461 break; 4510 break;
4462 case 'S': 4511 case 'S':
4463 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); 4512 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
4464 break; 4513 break;
4465 case 'w': 4514 case 'w':
4466 AddClass(kWordRanges, kWordRangeCount, ranges); 4515 AddClass(kWordRanges, kWordRangeCount, ranges, zone);
4467 break; 4516 break;
4468 case 'W': 4517 case 'W':
4469 AddClassNegated(kWordRanges, kWordRangeCount, ranges); 4518 AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
4470 break; 4519 break;
4471 case 'd': 4520 case 'd':
4472 AddClass(kDigitRanges, kDigitRangeCount, ranges); 4521 AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
4473 break; 4522 break;
4474 case 'D': 4523 case 'D':
4475 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); 4524 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
4476 break; 4525 break;
4477 case '.': 4526 case '.':
4478 AddClassNegated(kLineTerminatorRanges, 4527 AddClassNegated(kLineTerminatorRanges,
4479 kLineTerminatorRangeCount, 4528 kLineTerminatorRangeCount,
4480 ranges); 4529 ranges,
4530 zone);
4481 break; 4531 break;
4482 // This is not a character range as defined by the spec but a 4532 // This is not a character range as defined by the spec but a
4483 // convenient shorthand for a character class that matches any 4533 // convenient shorthand for a character class that matches any
4484 // character. 4534 // character.
4485 case '*': 4535 case '*':
4486 ranges->Add(CharacterRange::Everything()); 4536 ranges->Add(CharacterRange::Everything(), zone);
4487 break; 4537 break;
4488 // This is the set of characters matched by the $ and ^ symbols 4538 // This is the set of characters matched by the $ and ^ symbols
4489 // in multiline mode. 4539 // in multiline mode.
4490 case 'n': 4540 case 'n':
4491 AddClass(kLineTerminatorRanges, 4541 AddClass(kLineTerminatorRanges,
4492 kLineTerminatorRangeCount, 4542 kLineTerminatorRangeCount,
4493 ranges); 4543 ranges,
4544 zone);
4494 break; 4545 break;
4495 default: 4546 default:
4496 UNREACHABLE(); 4547 UNREACHABLE();
4497 } 4548 }
4498 } 4549 }
4499 4550
4500 4551
4501 void CharacterRange::AddCaseEquivalents( 4552 Vector<const int> CharacterRange::GetWordBounds() {
4502 ZoneGrowableArray<CharacterRange>* ranges, 4553 return Vector<const int>(kWordRanges, kWordRangeCount - 1);
4503 bool is_one_byte, 4554 }
4504 Isolate* isolate) { 4555
4505 uint16_t bottom = from(); 4556
4506 uint16_t top = to(); 4557 class CharacterRangeSplitter {
4558 public:
4559 CharacterRangeSplitter(ZoneList<CharacterRange>** included,
4560 ZoneList<CharacterRange>** excluded,
4561 Zone* zone)
4562 : included_(included),
4563 excluded_(excluded),
4564 zone_(zone) { }
4565 void Call(uc16 from, DispatchTable::Entry entry);
4566
4567 static const int kInBase = 0;
4568 static const int kInOverlay = 1;
4569
4570 private:
4571 ZoneList<CharacterRange>** included_;
4572 ZoneList<CharacterRange>** excluded_;
4573 Zone* zone_;
4574 };
4575
4576
4577 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
4578 if (!entry.out_set()->Get(kInBase)) return;
4579 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
4580 ? included_
4581 : excluded_;
4582 if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);
4583 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
4584 }
4585
4586
4587 void CharacterRange::Split(ZoneList<CharacterRange>* base,
4588 Vector<const int> overlay,
4589 ZoneList<CharacterRange>** included,
4590 ZoneList<CharacterRange>** excluded,
4591 Zone* zone) {
4592 DCHECK_EQ(NULL, *included);
4593 DCHECK_EQ(NULL, *excluded);
4594 DispatchTable table(zone);
4595 for (int i = 0; i < base->length(); i++)
4596 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
4597 for (int i = 0; i < overlay.length(); i += 2) {
4598 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
4599 CharacterRangeSplitter::kInOverlay, zone);
4600 }
4601 CharacterRangeSplitter callback(included, excluded, zone);
4602 table.ForEach(&callback);
4603 }
4604
4605
4606 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
4607 bool is_one_byte, Zone* zone) {
4608 Isolate* isolate = zone->isolate();
4609 uc16 bottom = from();
4610 uc16 top = to();
4507 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { 4611 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
4508 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; 4612 if (bottom > String::kMaxOneByteCharCode) return;
4509 if (top > Symbols::kMaxOneCharCodeSymbol) { 4613 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
4510 top = Symbols::kMaxOneCharCodeSymbol;
4511 }
4512 } 4614 }
4513 4615 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4514 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
4515 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
4516 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4517 if (top == bottom) { 4616 if (top == bottom) {
4518 // If this is a singleton we just expand the one character. 4617 // If this is a singleton we just expand the one character.
4519 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T 4618 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
4520 for (intptr_t i = 0; i < length; i++) { 4619 for (int i = 0; i < length; i++) {
4521 uint32_t chr = chars[i]; 4620 uc32 chr = chars[i];
4522 if (chr != bottom) { 4621 if (chr != bottom) {
4523 ranges->Add(CharacterRange::Singleton(chars[i])); 4622 ranges->Add(CharacterRange::Singleton(chars[i]), zone);
4524 } 4623 }
4525 } 4624 }
4526 } else { 4625 } else {
4527 // If this is a range we expand the characters block by block, 4626 // If this is a range we expand the characters block by block,
4528 // expanding contiguous subranges (blocks) one at a time. 4627 // expanding contiguous subranges (blocks) one at a time.
4529 // The approach is as follows. For a given start character we 4628 // The approach is as follows. For a given start character we
4530 // look up the remainder of the block that contains it (represented 4629 // look up the remainder of the block that contains it (represented
4531 // by the end point), for instance we find 'z' if the character 4630 // by the end point), for instance we find 'z' if the character
4532 // is 'c'. A block is characterized by the property 4631 // is 'c'. A block is characterized by the property
4533 // that all characters uncanonicalize in the same way, except that 4632 // that all characters uncanonicalize in the same way, except that
4534 // each entry in the result is incremented by the distance from the first 4633 // each entry in the result is incremented by the distance from the first
4535 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and 4634 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
4536 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. 4635 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
4537 // Once we've found the end point we look up its uncanonicalization 4636 // Once we've found the end point we look up its uncanonicalization
4538 // and produce a range for each element. For instance for [c-f] 4637 // and produce a range for each element. For instance for [c-f]
4539 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only 4638 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
4540 // add a range if it is not already contained in the input, so [c-f] 4639 // add a range if it is not already contained in the input, so [c-f]
4541 // will be skipped but [C-F] will be added. If this range is not 4640 // will be skipped but [C-F] will be added. If this range is not
4542 // completely contained in a block we do this for all the blocks 4641 // completely contained in a block we do this for all the blocks
4543 // covered by the range (handling characters that is not in a block 4642 // covered by the range (handling characters that is not in a block
4544 // as a "singleton block"). 4643 // as a "singleton block").
4545 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4644 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4546 intptr_t pos = bottom; 4645 int pos = bottom;
4547 while (pos <= top) { 4646 while (pos <= top) {
4548 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); 4647 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
4549 uint16_t block_end; 4648 uc16 block_end;
4550 if (length == 0) { 4649 if (length == 0) {
4551 block_end = pos; 4650 block_end = pos;
4552 } else { 4651 } else {
4553 ASSERT(length == 1); 4652 DCHECK_EQ(1, length);
4554 block_end = range[0]; 4653 block_end = range[0];
4555 } 4654 }
4556 intptr_t end = (block_end > top) ? top : block_end; 4655 int end = (block_end > top) ? top : block_end;
4557 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT 4656 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
4558 for (intptr_t i = 0; i < length; i++) { 4657 for (int i = 0; i < length; i++) {
4559 uint32_t c = range[i]; 4658 uc32 c = range[i];
4560 uint16_t range_from = c - (block_end - pos); 4659 uc16 range_from = c - (block_end - pos);
4561 uint16_t range_to = c - (block_end - end); 4660 uc16 range_to = c - (block_end - end);
4562 if (!(bottom <= range_from && range_to <= top)) { 4661 if (!(bottom <= range_from && range_to <= top)) {
4563 ranges->Add(CharacterRange(range_from, range_to)); 4662 ranges->Add(CharacterRange(range_from, range_to), zone);
4564 } 4663 }
4565 } 4664 }
4566 pos = end + 1; 4665 pos = end + 1;
4567 } 4666 }
4568 } 4667 }
4569 } 4668 }
4570 4669
4571 4670
4572 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { 4671 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
4573 ASSERT(ranges != NULL); 4672 DCHECK_NOT_NULL(ranges);
4574 intptr_t n = ranges->length(); 4673 int n = ranges->length();
4575 if (n <= 1) return true; 4674 if (n <= 1) return true;
4576 intptr_t max = ranges->At(0).to(); 4675 int max = ranges->at(0).to();
4577 for (intptr_t i = 1; i < n; i++) { 4676 for (int i = 1; i < n; i++) {
4578 CharacterRange next_range = ranges->At(i); 4677 CharacterRange next_range = ranges->at(i);
4579 if (next_range.from() <= max + 1) return false; 4678 if (next_range.from() <= max + 1) return false;
4580 max = next_range.to(); 4679 max = next_range.to();
4581 } 4680 }
4582 return true; 4681 return true;
4583 } 4682 }
4584 4683
4585 4684
4586 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { 4685 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
4587 if (ranges_ == NULL) { 4686 if (ranges_ == NULL) {
4588 ranges_ = new ZoneGrowableArray<CharacterRange>(2); 4687 ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
4589 CharacterRange::AddClassEscape(standard_set_type_, ranges_); 4688 CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);
4590 } 4689 }
4591 return ranges_; 4690 return ranges_;
4592 } 4691 }
4593 4692
4594 4693
4595 // Move a number of elements in a zone array to another position 4694 // Move a number of elements in a zonelist to another position
4596 // in the same array. Handles overlapping source and target areas. 4695 // in the same list. Handles overlapping source and target areas.
4597 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, 4696 static void MoveRanges(ZoneList<CharacterRange>* list,
4598 intptr_t from, 4697 int from,
4599 intptr_t to, 4698 int to,
4600 intptr_t count) { 4699 int count) {
4601 // Ranges are potentially overlapping. 4700 // Ranges are potentially overlapping.
4602 if (from < to) { 4701 if (from < to) {
4603 for (intptr_t i = count - 1; i >= 0; i--) { 4702 for (int i = count - 1; i >= 0; i--) {
4604 (*list)[to + i] = list->At(from + i); 4703 list->at(to + i) = list->at(from + i);
4605 } 4704 }
4606 } else { 4705 } else {
4607 for (intptr_t i = 0; i < count; i++) { 4706 for (int i = 0; i < count; i++) {
4608 (*list)[to + i] = list->At(from + i); 4707 list->at(to + i) = list->at(from + i);
4609 } 4708 }
4610 } 4709 }
4611 } 4710 }
4612 4711
4613 4712
4614 static intptr_t InsertRangeInCanonicalList( 4713 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
4615 ZoneGrowableArray<CharacterRange>* list, 4714 int count,
4616 intptr_t count, 4715 CharacterRange insert) {
4617 CharacterRange insert) {
4618 // Inserts a range into list[0..count[, which must be sorted 4716 // Inserts a range into list[0..count[, which must be sorted
4619 // by from value and non-overlapping and non-adjacent, using at most 4717 // by from value and non-overlapping and non-adjacent, using at most
4620 // list[0..count] for the result. Returns the number of resulting 4718 // list[0..count] for the result. Returns the number of resulting
4621 // canonicalized ranges. Inserting a range may collapse existing ranges into 4719 // canonicalized ranges. Inserting a range may collapse existing ranges into
4622 // fewer ranges, so the return value can be anything in the range 1..count+1. 4720 // fewer ranges, so the return value can be anything in the range 1..count+1.
4623 uint16_t from = insert.from(); 4721 uc16 from = insert.from();
4624 uint16_t to = insert.to(); 4722 uc16 to = insert.to();
4625 intptr_t start_pos = 0; 4723 int start_pos = 0;
4626 intptr_t end_pos = count; 4724 int end_pos = count;
4627 for (intptr_t i = count - 1; i >= 0; i--) { 4725 for (int i = count - 1; i >= 0; i--) {
4628 CharacterRange current = list->At(i); 4726 CharacterRange current = list->at(i);
4629 if (current.from() > to + 1) { 4727 if (current.from() > to + 1) {
4630 end_pos = i; 4728 end_pos = i;
4631 } else if (current.to() + 1 < from) { 4729 } else if (current.to() + 1 < from) {
4632 start_pos = i + 1; 4730 start_pos = i + 1;
4633 break; 4731 break;
4634 } 4732 }
4635 } 4733 }
4636 4734
4637 // Inserted range overlaps, or is adjacent to, ranges at positions 4735 // Inserted range overlaps, or is adjacent to, ranges at positions
4638 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are 4736 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
4639 // not affected by the insertion. 4737 // not affected by the insertion.
4640 // If start_pos == end_pos, the range must be inserted before start_pos. 4738 // If start_pos == end_pos, the range must be inserted before start_pos.
4641 // if start_pos < end_pos, the entire range from start_pos to end_pos 4739 // if start_pos < end_pos, the entire range from start_pos to end_pos
4642 // must be merged with the insert range. 4740 // must be merged with the insert range.
4643 4741
4644 if (start_pos == end_pos) { 4742 if (start_pos == end_pos) {
4645 // Insert between existing ranges at position start_pos. 4743 // Insert between existing ranges at position start_pos.
4646 if (start_pos < count) { 4744 if (start_pos < count) {
4647 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); 4745 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
4648 } 4746 }
4649 (*list)[start_pos] = insert; 4747 list->at(start_pos) = insert;
4650 return count + 1; 4748 return count + 1;
4651 } 4749 }
4652 if (start_pos + 1 == end_pos) { 4750 if (start_pos + 1 == end_pos) {
4653 // Replace single existing range at position start_pos. 4751 // Replace single existing range at position start_pos.
4654 CharacterRange to_replace = list->At(start_pos); 4752 CharacterRange to_replace = list->at(start_pos);
4655 intptr_t new_from = Utils::Minimum(to_replace.from(), from); 4753 int new_from = Min(to_replace.from(), from);
4656 intptr_t new_to = Utils::Maximum(to_replace.to(), to); 4754 int new_to = Max(to_replace.to(), to);
4657 (*list)[start_pos] = CharacterRange(new_from, new_to); 4755 list->at(start_pos) = CharacterRange(new_from, new_to);
4658 return count; 4756 return count;
4659 } 4757 }
4660 // Replace a number of existing ranges from start_pos to end_pos - 1. 4758 // Replace a number of existing ranges from start_pos to end_pos - 1.
4661 // Move the remaining ranges down. 4759 // Move the remaining ranges down.
4662 4760
4663 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); 4761 int new_from = Min(list->at(start_pos).from(), from);
4664 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); 4762 int new_to = Max(list->at(end_pos - 1).to(), to);
4665 if (end_pos < count) { 4763 if (end_pos < count) {
4666 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); 4764 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
4667 } 4765 }
4668 (*list)[start_pos] = CharacterRange(new_from, new_to); 4766 list->at(start_pos) = CharacterRange(new_from, new_to);
4669 return count - (end_pos - start_pos) + 1; 4767 return count - (end_pos - start_pos) + 1;
4670 } 4768 }
4671 4769
4672 4770
4673 void CharacterSet::Canonicalize() { 4771 void CharacterSet::Canonicalize() {
4674 // Special/default classes are always considered canonical. The result 4772 // Special/default classes are always considered canonical. The result
4675 // of calling ranges() will be sorted. 4773 // of calling ranges() will be sorted.
4676 if (ranges_ == NULL) return; 4774 if (ranges_ == NULL) return;
4677 CharacterRange::Canonicalize(ranges_); 4775 CharacterRange::Canonicalize(ranges_);
4678 } 4776 }
4679 4777
4680 4778
4681 void CharacterRange::Canonicalize( 4779 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
4682 ZoneGrowableArray<CharacterRange>* character_ranges) {
4683 if (character_ranges->length() <= 1) return; 4780 if (character_ranges->length() <= 1) return;
4684 // Check whether ranges are already canonical (increasing, non-overlapping, 4781 // Check whether ranges are already canonical (increasing, non-overlapping,
4685 // non-adjacent). 4782 // non-adjacent).
4686 intptr_t n = character_ranges->length(); 4783 int n = character_ranges->length();
4687 intptr_t max = character_ranges->At(0).to(); 4784 int max = character_ranges->at(0).to();
4688 intptr_t i = 1; 4785 int i = 1;
4689 while (i < n) { 4786 while (i < n) {
4690 CharacterRange current = character_ranges->At(i); 4787 CharacterRange current = character_ranges->at(i);
4691 if (current.from() <= max + 1) { 4788 if (current.from() <= max + 1) {
4692 break; 4789 break;
4693 } 4790 }
4694 max = current.to(); 4791 max = current.to();
4695 i++; 4792 i++;
4696 } 4793 }
4697 // Canonical until the i'th range. If that's all of them, we are done. 4794 // Canonical until the i'th range. If that's all of them, we are done.
4698 if (i == n) return; 4795 if (i == n) return;
4699 4796
4700 // The ranges at index i and forward are not canonicalized. Make them so by 4797 // The ranges at index i and forward are not canonicalized. Make them so by
4701 // doing the equivalent of insertion sort (inserting each into the previous 4798 // doing the equivalent of insertion sort (inserting each into the previous
4702 // list, in order). 4799 // list, in order).
4703 // Notice that inserting a range can reduce the number of ranges in the 4800 // Notice that inserting a range can reduce the number of ranges in the
4704 // result due to combining of adjacent and overlapping ranges. 4801 // result due to combining of adjacent and overlapping ranges.
4705 intptr_t read = i; // Range to insert. 4802 int read = i; // Range to insert.
4706 intptr_t num_canonical = i; // Length of canonicalized part of list. 4803 int num_canonical = i; // Length of canonicalized part of list.
4707 do { 4804 do {
4708 num_canonical = InsertRangeInCanonicalList(character_ranges, 4805 num_canonical = InsertRangeInCanonicalList(character_ranges,
4709 num_canonical, 4806 num_canonical,
4710 character_ranges->At(read)); 4807 character_ranges->at(read));
4711 read++; 4808 read++;
4712 } while (read < n); 4809 } while (read < n);
4713 character_ranges->TruncateTo(num_canonical); 4810 character_ranges->Rewind(num_canonical);
4714 4811
4715 ASSERT(CharacterRange::IsCanonical(character_ranges)); 4812 DCHECK(CharacterRange::IsCanonical(character_ranges));
4716 } 4813 }
4717 4814
4718 4815
4719 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, 4816 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
4720 ZoneGrowableArray<CharacterRange>* negated_ranges) { 4817 ZoneList<CharacterRange>* negated_ranges,
4721 ASSERT(CharacterRange::IsCanonical(ranges)); 4818 Zone* zone) {
4722 ASSERT(negated_ranges->length() == 0); 4819 DCHECK(CharacterRange::IsCanonical(ranges));
4723 intptr_t range_count = ranges->length(); 4820 DCHECK_EQ(0, negated_ranges->length());
4724 uint16_t from = 0; 4821 int range_count = ranges->length();
4725 intptr_t i = 0; 4822 uc16 from = 0;
4726 if (range_count > 0 && ranges->At(0).from() == 0) { 4823 int i = 0;
4727 from = ranges->At(0).to(); 4824 if (range_count > 0 && ranges->at(0).from() == 0) {
4825 from = ranges->at(0).to();
4728 i = 1; 4826 i = 1;
4729 } 4827 }
4730 while (i < range_count) { 4828 while (i < range_count) {
4731 CharacterRange range = ranges->At(i); 4829 CharacterRange range = ranges->at(i);
4732 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); 4830 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
4733 from = range.to(); 4831 from = range.to();
4734 i++; 4832 i++;
4735 } 4833 }
4736 if (from < Utf16::kMaxCodeUnit) { 4834 if (from < String::kMaxUtf16CodeUnit) {
4737 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); 4835 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
4836 zone);
4738 } 4837 }
4739 } 4838 }
4740 4839
4741 4840
4742 // ------------------------------------------------------------------- 4841 // -------------------------------------------------------------------
4743 // Splay tree 4842 // Splay tree
4744 4843
4745 4844
4746 // Workaround for the fact that ZoneGrowableArray does not have contains(). 4845 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
4747 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, 4846 if (Get(value))
4748 unsigned value) { 4847 return this;
4749 for (intptr_t i = 0; i < array->length(); i++) { 4848 if (successors(zone) != NULL) {
4750 if (array->At(i) == value) { 4849 for (int i = 0; i < successors(zone)->length(); i++) {
4751 return true; 4850 OutSet* successor = successors(zone)->at(i);
4851 if (successor->Get(value))
4852 return successor;
4752 } 4853 }
4854 } else {
4855 successors_ = new(zone) ZoneList<OutSet*>(2, zone);
4753 } 4856 }
4754 return false; 4857 OutSet* result = new(zone) OutSet(first_, remaining_);
4858 result->Set(value, zone);
4859 successors(zone)->Add(result, zone);
4860 return result;
4755 } 4861 }
4756 4862
4757 4863
4758 void OutSet::Set(unsigned value, Isolate* isolate) { 4864 void OutSet::Set(unsigned value, Zone *zone) {
4759 if (value < kFirstLimit) { 4865 if (value < kFirstLimit) {
4760 first_ |= (1 << value); 4866 first_ |= (1 << value);
4761 } else { 4867 } else {
4762 if (remaining_ == NULL) 4868 if (remaining_ == NULL)
4763 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1); 4869 remaining_ = new(zone) ZoneList<unsigned>(1, zone);
4764 4870 if (remaining_->is_empty() || !remaining_->Contains(value))
4765 bool remaining_contains_value = ArrayContains(remaining_, value); 4871 remaining_->Add(value, zone);
4766 if (remaining_->is_empty() || !remaining_contains_value) {
4767 remaining_->Add(value);
4768 }
4769 } 4872 }
4770 } 4873 }
4771 4874
4772 4875
4773 bool OutSet::Get(unsigned value) const { 4876 bool OutSet::Get(unsigned value) const {
4774 if (value < kFirstLimit) { 4877 if (value < kFirstLimit) {
4775 return (first_ & (1 << value)) != 0; 4878 return (first_ & (1 << value)) != 0;
4776 } else if (remaining_ == NULL) { 4879 } else if (remaining_ == NULL) {
4777 return false; 4880 return false;
4778 } else { 4881 } else {
4779 return ArrayContains(remaining_, value); 4882 return remaining_->Contains(value);
4780 } 4883 }
4781 } 4884 }
4782 4885
4783 4886
4887 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
4888
4889
4890 void DispatchTable::AddRange(CharacterRange full_range, int value,
4891 Zone* zone) {
4892 CharacterRange current = full_range;
4893 if (tree()->is_empty()) {
4894 // If this is the first range we just insert into the table.
4895 ZoneSplayTree<Config>::Locator loc;
4896 DCHECK_RESULT(tree()->Insert(current.from(), &loc));
4897 loc.set_value(Entry(current.from(), current.to(),
4898 empty()->Extend(value, zone)));
4899 return;
4900 }
4901 // First see if there is a range to the left of this one that
4902 // overlaps.
4903 ZoneSplayTree<Config>::Locator loc;
4904 if (tree()->FindGreatestLessThan(current.from(), &loc)) {
4905 Entry* entry = &loc.value();
4906 // If we've found a range that overlaps with this one, and it
4907 // starts strictly to the left of this one, we have to fix it
4908 // because the following code only handles ranges that start on
4909 // or after the start point of the range we're adding.
4910 if (entry->from() < current.from() && entry->to() >= current.from()) {
4911 // Snap the overlapping range in half around the start point of
4912 // the range we're adding.
4913 CharacterRange left(entry->from(), current.from() - 1);
4914 CharacterRange right(current.from(), entry->to());
4915 // The left part of the overlapping range doesn't overlap.
4916 // Truncate the whole entry to be just the left part.
4917 entry->set_to(left.to());
4918 // The right part is the one that overlaps. We add this part
4919 // to the map and let the next step deal with merging it with
4920 // the range we're adding.
4921 ZoneSplayTree<Config>::Locator loc;
4922 DCHECK_RESULT(tree()->Insert(right.from(), &loc));
4923 loc.set_value(Entry(right.from(),
4924 right.to(),
4925 entry->out_set()));
4926 }
4927 }
4928 while (current.is_valid()) {
4929 if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
4930 (loc.value().from() <= current.to()) &&
4931 (loc.value().to() >= current.from())) {
4932 Entry* entry = &loc.value();
4933 // We have overlap. If there is space between the start point of
4934 // the range we're adding and where the overlapping range starts
4935 // then we have to add a range covering just that space.
4936 if (current.from() < entry->from()) {
4937 ZoneSplayTree<Config>::Locator ins;
4938 DCHECK_RESULT(tree()->Insert(current.from(), &ins));
4939 ins.set_value(Entry(current.from(),
4940 entry->from() - 1,
4941 empty()->Extend(value, zone)));
4942 current.set_from(entry->from());
4943 }
4944 DCHECK_EQ(current.from(), entry->from());
4945 // If the overlapping range extends beyond the one we want to add
4946 // we have to snap the right part off and add it separately.
4947 if (entry->to() > current.to()) {
4948 ZoneSplayTree<Config>::Locator ins;
4949 DCHECK_RESULT(tree()->Insert(current.to() + 1, &ins));
4950 ins.set_value(Entry(current.to() + 1,
4951 entry->to(),
4952 entry->out_set()));
4953 entry->set_to(current.to());
4954 }
4955 DCHECK(entry->to() <= current.to());
4956 // The overlapping range is now completely contained by the range
4957 // we're adding so we can just update it and move the start point
4958 // of the range we're adding just past it.
4959 entry->AddValue(value, zone);
4960 // Bail out if the last interval ended at 0xFFFF since otherwise
4961 // adding 1 will wrap around to 0.
4962 if (entry->to() == String::kMaxUtf16CodeUnit)
4963 break;
4964 DCHECK(entry->to() + 1 > current.from());
4965 current.set_from(entry->to() + 1);
4966 } else {
4967 // There is no overlap so we can just add the range
4968 ZoneSplayTree<Config>::Locator ins;
4969 DCHECK_RESULT(tree()->Insert(current.from(), &ins));
4970 ins.set_value(Entry(current.from(),
4971 current.to(),
4972 empty()->Extend(value, zone)));
4973 break;
4974 }
4975 }
4976 }
4977
4978
4979 OutSet* DispatchTable::Get(uc16 value) {
4980 ZoneSplayTree<Config>::Locator loc;
4981 if (!tree()->FindGreatestLessThan(value, &loc))
4982 return empty();
4983 Entry* entry = &loc.value();
4984 if (value <= entry->to())
4985 return entry->out_set();
4986 else
4987 return empty();
4988 }
4989
4990
4784 // ------------------------------------------------------------------- 4991 // -------------------------------------------------------------------
4785 // Analysis 4992 // Analysis
4786 4993
4787 4994
4788 void Analysis::EnsureAnalyzed(RegExpNode* that) { 4995 void Analysis::EnsureAnalyzed(RegExpNode* that) {
4996 StackLimitCheck check(that->zone()->isolate());
4997 if (check.HasOverflowed()) {
4998 fail("Stack overflow");
4999 return;
5000 }
4789 if (that->info()->been_analyzed || that->info()->being_analyzed) 5001 if (that->info()->been_analyzed || that->info()->being_analyzed)
4790 return; 5002 return;
4791 that->info()->being_analyzed = true; 5003 that->info()->being_analyzed = true;
4792 that->Accept(this); 5004 that->Accept(this);
4793 that->info()->being_analyzed = false; 5005 that->info()->being_analyzed = false;
4794 that->info()->been_analyzed = true; 5006 that->info()->been_analyzed = true;
4795 } 5007 }
4796 5008
4797 5009
4798 void Analysis::VisitEnd(EndNode* that) { 5010 void Analysis::VisitEnd(EndNode* that) {
4799 // nothing to do 5011 // nothing to do
4800 } 5012 }
4801 5013
4802 5014
4803 void TextNode::CalculateOffsets() { 5015 void TextNode::CalculateOffsets() {
4804 intptr_t element_count = elements()->length(); 5016 int element_count = elements()->length();
4805 // Set up the offsets of the elements relative to the start. This is a fixed 5017 // Set up the offsets of the elements relative to the start. This is a fixed
4806 // quantity since a TextNode can only contain fixed-width things. 5018 // quantity since a TextNode can only contain fixed-width things.
4807 intptr_t cp_offset = 0; 5019 int cp_offset = 0;
4808 for (intptr_t i = 0; i < element_count; i++) { 5020 for (int i = 0; i < element_count; i++) {
4809 TextElement& elm = (*elements())[i]; 5021 TextElement& elm = elements()->at(i);
4810 elm.set_cp_offset(cp_offset); 5022 elm.set_cp_offset(cp_offset);
4811 cp_offset += elm.length(); 5023 cp_offset += elm.length();
4812 } 5024 }
4813 } 5025 }
4814 5026
4815 5027
4816 void Analysis::VisitText(TextNode* that) { 5028 void Analysis::VisitText(TextNode* that) {
4817 if (ignore_case_) { 5029 if (ignore_case_) {
4818 that->MakeCaseIndependent(is_one_byte_); 5030 that->MakeCaseIndependent(is_one_byte_);
4819 } 5031 }
(...skipping 10 matching lines...) Expand all
4830 if (!has_failed()) { 5042 if (!has_failed()) {
4831 // If the next node is interested in what it follows then this node 5043 // If the next node is interested in what it follows then this node
4832 // has to be interested too so it can pass the information on. 5044 // has to be interested too so it can pass the information on.
4833 that->info()->AddFromFollowing(target->info()); 5045 that->info()->AddFromFollowing(target->info());
4834 } 5046 }
4835 } 5047 }
4836 5048
4837 5049
4838 void Analysis::VisitChoice(ChoiceNode* that) { 5050 void Analysis::VisitChoice(ChoiceNode* that) {
4839 NodeInfo* info = that->info(); 5051 NodeInfo* info = that->info();
4840 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 5052 for (int i = 0; i < that->alternatives()->length(); i++) {
4841 RegExpNode* node = (*that->alternatives())[i].node(); 5053 RegExpNode* node = that->alternatives()->at(i).node();
4842 EnsureAnalyzed(node); 5054 EnsureAnalyzed(node);
4843 if (has_failed()) return; 5055 if (has_failed()) return;
4844 // Anything the following nodes need to know has to be known by 5056 // Anything the following nodes need to know has to be known by
4845 // this node also, so it can pass it on. 5057 // this node also, so it can pass it on.
4846 info->AddFromFollowing(node->info()); 5058 info->AddFromFollowing(node->info());
4847 } 5059 }
4848 } 5060 }
4849 5061
4850 5062
4851 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { 5063 void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
4852 NodeInfo* info = that->info(); 5064 NodeInfo* info = that->info();
4853 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 5065 for (int i = 0; i < that->alternatives()->length(); i++) {
4854 RegExpNode* node = (*that->alternatives())[i].node(); 5066 RegExpNode* node = that->alternatives()->at(i).node();
4855 if (node != that->loop_node()) { 5067 if (node != that->loop_node()) {
4856 EnsureAnalyzed(node); 5068 EnsureAnalyzed(node);
4857 if (has_failed()) return; 5069 if (has_failed()) return;
4858 info->AddFromFollowing(node->info()); 5070 info->AddFromFollowing(node->info());
4859 } 5071 }
4860 } 5072 }
4861 // Check the loop last since it may need the value of this node 5073 // Check the loop last since it may need the value of this node
4862 // to get a correct result. 5074 // to get a correct result.
4863 EnsureAnalyzed(that->loop_node()); 5075 EnsureAnalyzed(that->loop_node());
4864 if (!has_failed()) { 5076 if (!has_failed()) {
4865 info->AddFromFollowing(that->loop_node()->info()); 5077 info->AddFromFollowing(that->loop_node()->info());
4866 } 5078 }
4867 } 5079 }
4868 5080
4869 5081
4870 void Analysis::VisitBackReference(BackReferenceNode* that) { 5082 void Analysis::VisitBackReference(BackReferenceNode* that) {
4871 EnsureAnalyzed(that->on_success()); 5083 EnsureAnalyzed(that->on_success());
4872 } 5084 }
4873 5085
4874 5086
4875 void Analysis::VisitAssertion(AssertionNode* that) { 5087 void Analysis::VisitAssertion(AssertionNode* that) {
4876 EnsureAnalyzed(that->on_success()); 5088 EnsureAnalyzed(that->on_success());
4877 } 5089 }
4878 5090
4879 5091
4880 void BackReferenceNode::FillInBMInfo(intptr_t offset, 5092 void BackReferenceNode::FillInBMInfo(int offset,
4881 intptr_t budget, 5093 int budget,
4882 BoyerMooreLookahead* bm, 5094 BoyerMooreLookahead* bm,
4883 bool not_at_start) { 5095 bool not_at_start) {
4884 // Working out the set of characters that a backreference can match is too 5096 // Working out the set of characters that a backreference can match is too
4885 // hard, so we just say that any character can match. 5097 // hard, so we just say that any character can match.
4886 bm->SetRest(offset); 5098 bm->SetRest(offset);
4887 SaveBMInfo(bm, not_at_start, offset); 5099 SaveBMInfo(bm, not_at_start, offset);
4888 } 5100 }
4889 5101
4890 5102
4891 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == 5103 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
4892 RegExpMacroAssembler::kTableSize); 5104 RegExpMacroAssembler::kTableSize);
4893 5105
4894 5106
4895 void ChoiceNode::FillInBMInfo(intptr_t offset, 5107 void ChoiceNode::FillInBMInfo(int offset,
4896 intptr_t budget, 5108 int budget,
4897 BoyerMooreLookahead* bm, 5109 BoyerMooreLookahead* bm,
4898 bool not_at_start) { 5110 bool not_at_start) {
4899 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); 5111 ZoneList<GuardedAlternative>* alts = alternatives();
4900 budget = (budget - 1) / alts->length(); 5112 budget = (budget - 1) / alts->length();
4901 for (intptr_t i = 0; i < alts->length(); i++) { 5113 for (int i = 0; i < alts->length(); i++) {
4902 GuardedAlternative& alt = (*alts)[i]; 5114 GuardedAlternative& alt = alts->at(i);
4903 if (alt.guards() != NULL && alt.guards()->length() != 0) { 5115 if (alt.guards() != NULL && alt.guards()->length() != 0) {
4904 bm->SetRest(offset); // Give up trying to fill in info. 5116 bm->SetRest(offset); // Give up trying to fill in info.
4905 SaveBMInfo(bm, not_at_start, offset); 5117 SaveBMInfo(bm, not_at_start, offset);
4906 return; 5118 return;
4907 } 5119 }
4908 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); 5120 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
4909 } 5121 }
4910 SaveBMInfo(bm, not_at_start, offset); 5122 SaveBMInfo(bm, not_at_start, offset);
4911 } 5123 }
4912 5124
4913 5125
4914 void TextNode::FillInBMInfo(intptr_t initial_offset, 5126 void TextNode::FillInBMInfo(int initial_offset,
4915 intptr_t budget, 5127 int budget,
4916 BoyerMooreLookahead* bm, 5128 BoyerMooreLookahead* bm,
4917 bool not_at_start) { 5129 bool not_at_start) {
4918 if (initial_offset >= bm->length()) return; 5130 if (initial_offset >= bm->length()) return;
4919 intptr_t offset = initial_offset; 5131 int offset = initial_offset;
4920 intptr_t max_char = bm->max_char(); 5132 int max_char = bm->max_char();
4921 for (intptr_t i = 0; i < elements()->length(); i++) { 5133 for (int i = 0; i < elements()->length(); i++) {
4922 if (offset >= bm->length()) { 5134 if (offset >= bm->length()) {
4923 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5135 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4924 return; 5136 return;
4925 } 5137 }
4926 TextElement text = elements()->At(i); 5138 TextElement text = elements()->at(i);
4927 if (text.text_type() == TextElement::ATOM) { 5139 if (text.text_type() == TextElement::ATOM) {
4928 RegExpAtom* atom = text.atom(); 5140 RegExpAtom* atom = text.atom();
4929 for (intptr_t j = 0; j < atom->length(); j++, offset++) { 5141 for (int j = 0; j < atom->length(); j++, offset++) {
4930 if (offset >= bm->length()) { 5142 if (offset >= bm->length()) {
4931 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5143 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4932 return; 5144 return;
4933 } 5145 }
4934 uint16_t character = atom->data()->At(j); 5146 uc16 character = atom->data()[j];
4935 if (bm->compiler()->ignore_case()) { 5147 if (bm->compiler()->ignore_case()) {
4936 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4937 intptr_t length = GetCaseIndependentLetters( 5149 int length = GetCaseIndependentLetters(
5150 Isolate::Current(),
4938 character, 5151 character,
4939 bm->max_char() == Symbols::kMaxOneCharCodeSymbol, 5152 bm->max_char() == String::kMaxOneByteCharCode,
4940 chars); 5153 chars);
4941 for (intptr_t j = 0; j < length; j++) { 5154 for (int j = 0; j < length; j++) {
4942 bm->Set(offset, chars[j]); 5155 bm->Set(offset, chars[j]);
4943 } 5156 }
4944 } else { 5157 } else {
4945 if (character <= max_char) bm->Set(offset, character); 5158 if (character <= max_char) bm->Set(offset, character);
4946 } 5159 }
4947 } 5160 }
4948 } else { 5161 } else {
4949 ASSERT(text.text_type() == TextElement::CHAR_CLASS); 5162 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
4950 RegExpCharacterClass* char_class = text.char_class(); 5163 RegExpCharacterClass* char_class = text.char_class();
4951 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); 5164 ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
4952 if (char_class->is_negated()) { 5165 if (char_class->is_negated()) {
4953 bm->SetAll(offset); 5166 bm->SetAll(offset);
4954 } else { 5167 } else {
4955 for (intptr_t k = 0; k < ranges->length(); k++) { 5168 for (int k = 0; k < ranges->length(); k++) {
4956 CharacterRange& range = (*ranges)[k]; 5169 CharacterRange& range = ranges->at(k);
4957 if (range.from() > max_char) continue; 5170 if (range.from() > max_char) continue;
4958 intptr_t to = Utils::Minimum(max_char, 5171 int to = Min(max_char, static_cast<int>(range.to()));
4959 static_cast<intptr_t>(range.to()));
4960 bm->SetInterval(offset, Interval(range.from(), to)); 5172 bm->SetInterval(offset, Interval(range.from(), to));
4961 } 5173 }
4962 } 5174 }
4963 offset++; 5175 offset++;
4964 } 5176 }
4965 } 5177 }
4966 if (offset >= bm->length()) { 5178 if (offset >= bm->length()) {
4967 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5179 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4968 return; 5180 return;
4969 } 5181 }
4970 on_success()->FillInBMInfo(offset, 5182 on_success()->FillInBMInfo(offset,
4971 budget - 1, 5183 budget - 1,
4972 bm, 5184 bm,
4973 true); // Not at start after a text node. 5185 true); // Not at start after a text node.
4974 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5186 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4975 } 5187 }
4976 5188
4977 5189
5190 // -------------------------------------------------------------------
5191 // Dispatch table construction
5192
5193
5194 void DispatchTableConstructor::VisitEnd(EndNode* that) {
5195 AddRange(CharacterRange::Everything());
5196 }
5197
5198
5199 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
5200 node->set_being_calculated(true);
5201 ZoneList<GuardedAlternative>* alternatives = node->alternatives();
5202 for (int i = 0; i < alternatives->length(); i++) {
5203 set_choice_index(i);
5204 alternatives->at(i).node()->Accept(this);
5205 }
5206 node->set_being_calculated(false);
5207 }
5208
5209
5210 class AddDispatchRange {
5211 public:
5212 explicit AddDispatchRange(DispatchTableConstructor* constructor)
5213 : constructor_(constructor) { }
5214 void Call(uc32 from, DispatchTable::Entry entry);
5215 private:
5216 DispatchTableConstructor* constructor_;
5217 };
5218
5219
5220 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
5221 CharacterRange range(from, entry.to());
5222 constructor_->AddRange(range);
5223 }
5224
5225
5226 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
5227 if (node->being_calculated())
5228 return;
5229 DispatchTable* table = node->GetTable(ignore_case_);
5230 AddDispatchRange adder(this);
5231 table->ForEach(&adder);
5232 }
5233
5234
5235 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
5236 // TODO(160): Find the node that we refer back to and propagate its start
5237 // set back to here. For now we just accept anything.
5238 AddRange(CharacterRange::Everything());
5239 }
5240
5241
5242 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
5243 RegExpNode* target = that->on_success();
5244 target->Accept(this);
5245 }
5246
5247
5248 static int CompareRangeByFrom(const CharacterRange* a,
5249 const CharacterRange* b) {
5250 return Compare<uc16>(a->from(), b->from());
5251 }
5252
5253
5254 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
5255 ranges->Sort(CompareRangeByFrom);
5256 uc16 last = 0;
5257 for (int i = 0; i < ranges->length(); i++) {
5258 CharacterRange range = ranges->at(i);
5259 if (last < range.from())
5260 AddRange(CharacterRange(last, range.from() - 1));
5261 if (range.to() >= last) {
5262 if (range.to() == String::kMaxUtf16CodeUnit) {
5263 return;
5264 } else {
5265 last = range.to() + 1;
5266 }
5267 }
5268 }
5269 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));
5270 }
5271
5272
5273 void DispatchTableConstructor::VisitText(TextNode* that) {
5274 TextElement elm = that->elements()->at(0);
5275 switch (elm.text_type()) {
5276 case TextElement::ATOM: {
5277 uc16 c = elm.atom()->data()[0];
5278 AddRange(CharacterRange(c, c));
5279 break;
5280 }
5281 case TextElement::CHAR_CLASS: {
5282 RegExpCharacterClass* tree = elm.char_class();
5283 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
5284 if (tree->is_negated()) {
5285 AddInverse(ranges);
5286 } else {
5287 for (int i = 0; i < ranges->length(); i++)
5288 AddRange(ranges->at(i));
5289 }
5290 break;
5291 }
5292 default: {
5293 UNIMPLEMENTED();
5294 }
5295 }
5296 }
5297
5298
5299 void DispatchTableConstructor::VisitAction(ActionNode* that) {
5300 RegExpNode* target = that->on_success();
5301 target->Accept(this);
5302 }
5303
5304
4978 RegExpEngine::CompilationResult RegExpEngine::Compile( 5305 RegExpEngine::CompilationResult RegExpEngine::Compile(
4979 RegExpCompileData* data, 5306 RegExpCompileData* data, bool ignore_case, bool is_global,
4980 const ParsedFunction* parsed_function, 5307 bool is_multiline, bool is_sticky, Handle<String> pattern,
4981 const ZoneGrowableArray<const ICData*>& ic_data_array) { 5308 Handle<String> sample_subject, bool is_one_byte, Zone* zone) {
4982 Isolate* isolate = Isolate::Current(); 5309 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
5310 return IrregexpRegExpTooBig(zone->isolate());
5311 }
5312 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
4983 5313
4984 const Function& function = parsed_function->function(); 5314 // Sample some characters from the middle of the string.
4985 const intptr_t specialization_cid = function.regexp_cid(); 5315 static const int kSampleSize = 128;
4986 const bool is_one_byte = (specialization_cid == kOneByteStringCid ||
4987 specialization_cid == kExternalOneByteStringCid);
4988 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp());
4989 const String& pattern = String::Handle(isolate, regexp.pattern());
4990 5316
4991 ASSERT(!regexp.IsNull()); 5317 sample_subject = String::Flatten(sample_subject);
4992 ASSERT(!pattern.IsNull()); 5318 int chars_sampled = 0;
4993 5319 int half_way = (sample_subject->length() - kSampleSize) / 2;
4994 const bool ignore_case = regexp.is_ignore_case(); 5320 for (int i = Max(0, half_way);
4995 const bool is_global = regexp.is_global(); 5321 i < sample_subject->length() && chars_sampled < kSampleSize;
4996 5322 i++, chars_sampled++) {
4997 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); 5323 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
4998 5324 }
4999 // TODO(zerny): Frequency sampling is currently disabled because of several
5000 // issues. We do not want to store subject strings in the regexp object since
5001 // they might be long and we should not prevent their garbage collection.
5002 // Passing them to this function explicitly does not help, since we must
5003 // generate exactly the same IR for both the unoptimizing and optimizing
5004 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5005 // An option would be to store sampling results in the regexp object, but
5006 // I'm not sure the performance gains are relevant enough.
5007 5325
5008 // Wrap the body of the regexp in capture #0. 5326 // Wrap the body of the regexp in capture #0.
5009 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 5327 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
5010 0, 5328 0,
5011 &compiler, 5329 &compiler,
5012 compiler.accept()); 5330 compiler.accept());
5013
5014 RegExpNode* node = captured_body; 5331 RegExpNode* node = captured_body;
5015 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 5332 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5016 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 5333 bool is_start_anchored = data->tree->IsAnchoredAtStart();
5017 intptr_t max_length = data->tree->max_match(); 5334 int max_length = data->tree->max_match();
5018 if (!is_start_anchored) { 5335 if (!is_start_anchored && !is_sticky) {
5019 // Add a .*? at the beginning, outside the body capture, unless 5336 // Add a .*? at the beginning, outside the body capture, unless
5020 // this expression is anchored at the beginning. 5337 // this expression is anchored at the beginning or sticky.
5021 RegExpNode* loop_node = 5338 RegExpNode* loop_node =
5022 RegExpQuantifier::ToNode(0, 5339 RegExpQuantifier::ToNode(0,
5023 RegExpTree::kInfinity, 5340 RegExpTree::kInfinity,
5024 false, 5341 false,
5025 new(isolate) RegExpCharacterClass('*'), 5342 new(zone) RegExpCharacterClass('*'),
5026 &compiler, 5343 &compiler,
5027 captured_body, 5344 captured_body,
5028 data->contains_anchor); 5345 data->contains_anchor);
5029 5346
5030 if (data->contains_anchor) { 5347 if (data->contains_anchor) {
5031 // Unroll loop once, to take care of the case that might start 5348 // Unroll loop once, to take care of the case that might start
5032 // at the start of input. 5349 // at the start of input.
5033 ChoiceNode* first_step_node = new(isolate) ChoiceNode(2, isolate); 5350 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
5034 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 5351 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5035 first_step_node->AddAlternative(GuardedAlternative( 5352 first_step_node->AddAlternative(GuardedAlternative(
5036 new(isolate) TextNode( 5353 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
5037 new(isolate) RegExpCharacterClass('*'), loop_node)));
5038 node = first_step_node; 5354 node = first_step_node;
5039 } else { 5355 } else {
5040 node = loop_node; 5356 node = loop_node;
5041 } 5357 }
5042 } 5358 }
5043 if (is_one_byte) { 5359 if (is_one_byte) {
5044 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 5360 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5045 // Do it again to propagate the new nodes to places where they were not 5361 // Do it again to propagate the new nodes to places where they were not
5046 // put because they had not been calculated yet. 5362 // put because they had not been calculated yet.
5047 if (node != NULL) { 5363 if (node != NULL) {
5048 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 5364 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5049 } 5365 }
5050 } 5366 }
5051 5367
5052 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate); 5368 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
5053 data->node = node; 5369 data->node = node;
5054 Analysis analysis(ignore_case, is_one_byte); 5370 Analysis analysis(ignore_case, is_one_byte);
5055 analysis.EnsureAnalyzed(node); 5371 analysis.EnsureAnalyzed(node);
5056 if (analysis.has_failed()) { 5372 if (analysis.has_failed()) {
5057 const char* error_message = analysis.error_message(); 5373 const char* error_message = analysis.error_message();
5058 return CompilationResult(error_message); 5374 return CompilationResult(zone->isolate(), error_message);
5059 } 5375 }
5060 5376
5377 // Create the correct assembler for the architecture.
5378 #ifndef V8_INTERPRETED_REGEXP
5061 // Native regexp implementation. 5379 // Native regexp implementation.
5062 5380
5063 IRRegExpMacroAssembler* macro_assembler = 5381 NativeRegExpMacroAssembler::Mode mode =
5064 new(isolate) IRRegExpMacroAssembler(specialization_cid, 5382 is_one_byte ? NativeRegExpMacroAssembler::LATIN1
5065 data->capture_count, 5383 : NativeRegExpMacroAssembler::UC16;
5066 parsed_function, 5384
5067 ic_data_array, 5385 #if V8_TARGET_ARCH_IA32
5068 isolate); 5386 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
5387 zone);
5388 #elif V8_TARGET_ARCH_X64
5389 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,
5390 zone);
5391 #elif V8_TARGET_ARCH_ARM
5392 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,
5393 zone);
5394 #elif V8_TARGET_ARCH_ARM64
5395 RegExpMacroAssemblerARM64 macro_assembler(mode, (data->capture_count + 1) * 2,
5396 zone);
5397 #elif V8_TARGET_ARCH_MIPS
5398 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
5399 zone);
5400 #elif V8_TARGET_ARCH_MIPS64
5401 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2,
5402 zone);
5403 #elif V8_TARGET_ARCH_X87
5404 RegExpMacroAssemblerX87 macro_assembler(mode, (data->capture_count + 1) * 2,
5405 zone);
5406 #else
5407 #error "Unsupported architecture"
5408 #endif
5409
5410 #else // V8_INTERPRETED_REGEXP
5411 // Interpreted regexp implementation.
5412 EmbeddedVector<byte, 1024> codes;
5413 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
5414 #endif // V8_INTERPRETED_REGEXP
5069 5415
5070 // Inserted here, instead of in Assembler, because it depends on information 5416 // Inserted here, instead of in Assembler, because it depends on information
5071 // in the AST that isn't replicated in the Node structure. 5417 // in the AST that isn't replicated in the Node structure.
5072 static const intptr_t kMaxBacksearchLimit = 1024; 5418 static const int kMaxBacksearchLimit = 1024;
5073 if (is_end_anchored && 5419 if (is_end_anchored &&
5074 !is_start_anchored && 5420 !is_start_anchored &&
5075 max_length < kMaxBacksearchLimit) { 5421 max_length < kMaxBacksearchLimit) {
5076 macro_assembler->SetCurrentPositionFromEnd(max_length); 5422 macro_assembler.SetCurrentPositionFromEnd(max_length);
5077 } 5423 }
5078 5424
5079 if (is_global) { 5425 if (is_global) {
5080 macro_assembler->set_global_mode( 5426 macro_assembler.set_global_mode(
5081 (data->tree->min_match() > 0) 5427 (data->tree->min_match() > 0)
5082 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 5428 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
5083 : RegExpMacroAssembler::GLOBAL); 5429 : RegExpMacroAssembler::GLOBAL);
5084 } 5430 }
5085 5431
5086 RegExpEngine::CompilationResult result = 5432 return compiler.Assemble(&macro_assembler,
5087 compiler.Assemble(macro_assembler, 5433 node,
5088 node, 5434 data->capture_count,
5089 data->capture_count, 5435 pattern);
5090 pattern);
5091
5092 if (FLAG_trace_irregexp) {
5093 macro_assembler->PrintBlocks();
5094 }
5095
5096 return result;
5097 } 5436 }
5098 5437
5099
5100 static void CreateSpecializedFunction(Isolate* isolate,
5101 const JSRegExp& regexp,
5102 intptr_t specialization_cid,
5103 const Object& owner) {
5104 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5105
5106 Function& fn = Function::Handle(isolate, Function::New(
5107 Symbols::IrregExp(),
5108 RawFunction::kIrregexpFunction,
5109 true, // Static.
5110 false, // Not const.
5111 false, // Not abstract.
5112 false, // Not external.
5113 false, // Not native.
5114 owner,
5115 0)); // No token position.
5116
5117 // TODO(zerny): Share these arrays between all irregexp functions.
5118 fn.set_num_fixed_parameters(kParamCount);
5119 fn.set_parameter_types(Array::Handle(isolate, Array::New(kParamCount,
5120 Heap::kOld)));
5121 fn.set_parameter_names(Array::Handle(isolate, Array::New(kParamCount,
5122 Heap::kOld)));
5123 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType()));
5124 fn.SetParameterNameAt(0, Symbols::string_param());
5125 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType()));
5126 fn.SetParameterNameAt(1, Symbols::start_index_param());
5127 fn.set_result_type(Type::Handle(isolate, Type::ArrayType()));
5128
5129 // Cache the result.
5130 regexp.set_function(specialization_cid, fn);
5131
5132 fn.set_regexp(regexp);
5133 fn.set_regexp_cid(specialization_cid);
5134
5135 // The function is compiled lazily during the first call.
5136 }
5137
5138
5139 RawJSRegExp* RegExpEngine::CreateJSRegExp(Isolate* isolate,
5140 const String& pattern,
5141 bool multi_line,
5142 bool ignore_case) {
5143 const JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0));
5144
5145 regexp.set_pattern(pattern);
5146
5147 if (multi_line) {
5148 regexp.set_is_multi_line();
5149 }
5150 if (ignore_case) {
5151 regexp.set_is_ignore_case();
5152 }
5153
5154 // TODO(zerny): We might want to use normal string searching algorithms
5155 // for simple patterns.
5156 regexp.set_is_complex();
5157 regexp.set_is_global(); // All dart regexps are global.
5158
5159 const Library& lib = Library::Handle(isolate, Library::CoreLibrary());
5160 const Class& owner = Class::Handle(
5161 isolate, lib.LookupClass(Symbols::RegExp()));
5162
5163 CreateSpecializedFunction(isolate, regexp, kOneByteStringCid, owner);
5164 CreateSpecializedFunction(isolate, regexp, kTwoByteStringCid, owner);
5165 CreateSpecializedFunction(isolate, regexp, kExternalOneByteStringCid, owner);
5166 CreateSpecializedFunction(isolate, regexp, kExternalTwoByteStringCid, owner);
5167
5168 return regexp.raw();
5169 }
5170
5171
5172 } // namespace dart 5438 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698