OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
6 | 6 |
7 #include "vm/dart_entry.h" | 7 // SNIP |
8 #include "vm/regexp_assembler.h" | |
9 #include "vm/regexp_ast.h" | |
10 #include "vm/unibrow-inl.h" | |
11 #include "vm/unicode.h" | |
12 #include "vm/symbols.h" | |
13 | |
14 #define I (isolate()) | |
15 #define CI (compiler->isolate()) | |
16 | 8 |
17 namespace dart { | 9 namespace dart { |
18 | 10 |
19 DECLARE_FLAG(bool, trace_irregexp); | 11 // SNIP |
20 | |
21 // Default to generating optimized regexp code. | |
22 static const bool kRegexpOptimization = true; | |
23 | |
24 // More makes code generation slower, less makes V8 benchmark score lower. | |
25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; | |
26 | |
27 ContainedInLattice AddRange(ContainedInLattice containment, | |
28 const intptr_t* ranges, | |
29 intptr_t ranges_length, | |
30 Interval new_range) { | |
31 ASSERT((ranges_length & 1) == 1); | |
32 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1); | |
33 if (containment == kLatticeUnknown) return containment; | |
34 bool inside = false; | |
35 intptr_t last = 0; | |
36 for (intptr_t i = 0; i < ranges_length; | |
37 inside = !inside, last = ranges[i], i++) { | |
38 // Consider the range from last to ranges[i]. | |
39 // We haven't got to the new range yet. | |
40 if (ranges[i] <= new_range.from()) continue; | |
41 // New range is wholly inside last-ranges[i]. Note that new_range.to() is | |
42 // inclusive, but the values in ranges are not. | |
43 if (last <= new_range.from() && new_range.to() < ranges[i]) { | |
44 return Combine(containment, inside ? kLatticeIn : kLatticeOut); | |
45 } | |
46 return kLatticeUnknown; | |
47 } | |
48 return containment; | |
49 } | |
50 | 12 |
51 // ------------------------------------------------------------------- | 13 // ------------------------------------------------------------------- |
52 // Implementation of the Irregexp regular expression engine. | 14 // Implementation of the Irregexp regular expression engine. |
53 // | 15 // |
54 // The Irregexp regular expression engine is intended to be a complete | 16 // The Irregexp regular expression engine is intended to be a complete |
55 // implementation of ECMAScript regular expressions. It generates | 17 // implementation of ECMAScript regular expressions. It generates either |
56 // IR code that is subsequently compiled to native code. | 18 // bytecodes or native code. |
57 | 19 |
58 // The Irregexp regexp engine is structured in three steps. | 20 // The Irregexp regexp engine is structured in three steps. |
59 // 1) The parser generates an abstract syntax tree. See regexp_ast.cc. | 21 // 1) The parser generates an abstract syntax tree. See ast.cc. |
60 // 2) From the AST a node network is created. The nodes are all | 22 // 2) From the AST a node network is created. The nodes are all |
61 // subclasses of RegExpNode. The nodes represent states when | 23 // subclasses of RegExpNode. The nodes represent states when |
62 // executing a regular expression. Several optimizations are | 24 // executing a regular expression. Several optimizations are |
63 // performed on the node network. | 25 // performed on the node network. |
64 // 3) From the nodes we generate IR instructions that can actually | 26 // 3) From the nodes we generate either byte codes or native code |
65 // execute the regular expression (perform the search). The | 27 // that can actually execute the regular expression (perform |
66 // code generation step is described in more detail below. | 28 // the search). The code generation step is described in more |
| 29 // detail below. |
67 | 30 |
68 // Code generation. | 31 // Code generation. |
69 // | 32 // |
70 // The nodes are divided into four main categories. | 33 // The nodes are divided into four main categories. |
71 // * Choice nodes | 34 // * Choice nodes |
72 // These represent places where the regular expression can | 35 // These represent places where the regular expression can |
73 // match in more than one way. For example on entry to an | 36 // match in more than one way. For example on entry to an |
74 // alternation (foo|bar) or a repetition (*, +, ? or {}). | 37 // alternation (foo|bar) or a repetition (*, +, ? or {}). |
75 // * Action nodes | 38 // * Action nodes |
76 // These represent places where some action should be | 39 // These represent places where some action should be |
77 // performed. Examples include recording the current position | 40 // performed. Examples include recording the current position |
78 // in the input string to a register (in order to implement | 41 // in the input string to a register (in order to implement |
79 // captures) or other actions on register for example in order | 42 // captures) or other actions on register for example in order |
80 // to implement the counters needed for {} repetitions. | 43 // to implement the counters needed for {} repetitions. |
81 // * Matching nodes | 44 // * Matching nodes |
82 // These attempt to match some element part of the input string. | 45 // These attempt to match some element part of the input string. |
83 // Examples of elements include character classes, plain strings | 46 // Examples of elements include character classes, plain strings |
84 // or back references. | 47 // or back references. |
85 // * End nodes | 48 // * End nodes |
86 // These are used to implement the actions required on finding | 49 // These are used to implement the actions required on finding |
87 // a successful match or failing to find a match. | 50 // a successful match or failing to find a match. |
88 // | 51 // |
89 // The code generated maintains some state as it runs. This consists of the | 52 // The code generated (whether as byte codes or native code) maintains |
90 // following elements: | 53 // some state as it runs. This consists of the following elements: |
91 // | 54 // |
92 // * The capture registers. Used for string captures. | 55 // * The capture registers. Used for string captures. |
93 // * Other registers. Used for counters etc. | 56 // * Other registers. Used for counters etc. |
94 // * The current position. | 57 // * The current position. |
95 // * The stack of backtracking information. Used when a matching node | 58 // * The stack of backtracking information. Used when a matching node |
96 // fails to find a match and needs to try an alternative. | 59 // fails to find a match and needs to try an alternative. |
97 // | 60 // |
98 // Conceptual regular expression execution model: | 61 // Conceptual regular expression execution model: |
99 // | 62 // |
100 // There is a simple conceptual model of regular expression execution | 63 // There is a simple conceptual model of regular expression execution |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
190 // to that trace. The code generator therefore has the ability to generate | 153 // to that trace. The code generator therefore has the ability to generate |
191 // code for each node several times. In order to limit the size of the | 154 // code for each node several times. In order to limit the size of the |
192 // generated code there is an arbitrary limit on how many specialized sets of | 155 // generated code there is an arbitrary limit on how many specialized sets of |
193 // code may be generated for a given node. If the limit is reached, the | 156 // code may be generated for a given node. If the limit is reached, the |
194 // trace is flushed and a generic version of the code for a node is emitted. | 157 // trace is flushed and a generic version of the code for a node is emitted. |
195 // This is subsequently used for that node. The code emitted for non-generic | 158 // This is subsequently used for that node. The code emitted for non-generic |
196 // trace is not recorded in the node and so it cannot currently be reused in | 159 // trace is not recorded in the node and so it cannot currently be reused in |
197 // the event that code generation is requested for an identical trace. | 160 // the event that code generation is requested for an identical trace. |
198 | 161 |
199 | 162 |
200 void RegExpTree::AppendToText(RegExpText* text) { | 163 void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { |
201 UNREACHABLE(); | 164 UNREACHABLE(); |
202 } | 165 } |
203 | 166 |
204 | 167 |
205 void RegExpAtom::AppendToText(RegExpText* text) { | 168 void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) { |
206 text->AddElement(TextElement::Atom(this)); | 169 text->AddElement(TextElement::Atom(this), zone); |
207 } | 170 } |
208 | 171 |
209 | 172 |
210 void RegExpCharacterClass::AppendToText(RegExpText* text) { | 173 void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) { |
211 text->AddElement(TextElement::CharClass(this)); | 174 text->AddElement(TextElement::CharClass(this), zone); |
212 } | 175 } |
213 | 176 |
214 | 177 |
215 void RegExpText::AppendToText(RegExpText* text) { | 178 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { |
216 for (intptr_t i = 0; i < elements()->length(); i++) | 179 for (int i = 0; i < elements()->length(); i++) |
217 text->AddElement((*elements())[i]); | 180 text->AddElement(elements()->at(i), zone); |
218 } | 181 } |
219 | 182 |
220 | 183 |
221 TextElement TextElement::Atom(RegExpAtom* atom) { | 184 TextElement TextElement::Atom(RegExpAtom* atom) { |
222 return TextElement(ATOM, atom); | 185 return TextElement(ATOM, atom); |
223 } | 186 } |
224 | 187 |
225 | 188 |
226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { | 189 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
227 return TextElement(CHAR_CLASS, char_class); | 190 return TextElement(CHAR_CLASS, char_class); |
228 } | 191 } |
229 | 192 |
230 | 193 |
231 intptr_t TextElement::length() const { | 194 int TextElement::length() const { |
232 switch (text_type()) { | 195 switch (text_type()) { |
233 case ATOM: | 196 case ATOM: |
234 return atom()->length(); | 197 return atom()->length(); |
235 | 198 |
236 case CHAR_CLASS: | 199 case CHAR_CLASS: |
237 return 1; | 200 return 1; |
238 } | 201 } |
239 UNREACHABLE(); | 202 UNREACHABLE(); |
240 return 0; | 203 return 0; |
241 } | 204 } |
242 | 205 |
243 | 206 |
244 class FrequencyCollator : public ValueObject { | 207 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { |
| 208 if (table_ == NULL) { |
| 209 table_ = new(zone()) DispatchTable(zone()); |
| 210 DispatchTableConstructor cons(table_, ignore_case, zone()); |
| 211 cons.BuildTable(this); |
| 212 } |
| 213 return table_; |
| 214 } |
| 215 |
| 216 |
| 217 class FrequencyCollator { |
245 public: | 218 public: |
246 FrequencyCollator() : total_samples_(0) { | 219 FrequencyCollator() : total_samples_(0) { |
247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { | 220 for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) { |
248 frequencies_[i] = CharacterFrequency(i); | 221 frequencies_[i] = CharacterFrequency(i); |
249 } | 222 } |
250 } | 223 } |
251 | 224 |
252 void CountCharacter(intptr_t character) { | 225 void CountCharacter(int character) { |
253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); | 226 int index = (character & RegExpMacroAssembler::kTableMask); |
254 frequencies_[index].Increment(); | 227 frequencies_[index].Increment(); |
255 total_samples_++; | 228 total_samples_++; |
256 } | 229 } |
257 | 230 |
258 // Does not measure in percent, but rather per-128 (the table size from the | 231 // Does not measure in percent, but rather per-128 (the table size from the |
259 // regexp macro assembler). | 232 // regexp macro assembler). |
260 intptr_t Frequency(intptr_t in_character) { | 233 int Frequency(int in_character) { |
261 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); | 234 DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character); |
262 if (total_samples_ < 1) return 1; // Division by zero. | 235 if (total_samples_ < 1) return 1; // Division by zero. |
263 intptr_t freq_in_per128 = | 236 int freq_in_per128 = |
264 (frequencies_[in_character].counter() * 128) / total_samples_; | 237 (frequencies_[in_character].counter() * 128) / total_samples_; |
265 return freq_in_per128; | 238 return freq_in_per128; |
266 } | 239 } |
267 | 240 |
268 private: | 241 private: |
269 class CharacterFrequency { | 242 class CharacterFrequency { |
270 public: | 243 public: |
271 CharacterFrequency() : counter_(0), character_(-1) { } | 244 CharacterFrequency() : counter_(0), character_(-1) { } |
272 explicit CharacterFrequency(intptr_t character) | 245 explicit CharacterFrequency(int character) |
273 : counter_(0), character_(character) { } | 246 : counter_(0), character_(character) { } |
274 | 247 |
275 void Increment() { counter_++; } | 248 void Increment() { counter_++; } |
276 intptr_t counter() { return counter_; } | 249 int counter() { return counter_; } |
277 intptr_t character() { return character_; } | 250 int character() { return character_; } |
278 | 251 |
279 private: | 252 private: |
280 intptr_t counter_; | 253 int counter_; |
281 intptr_t character_; | 254 int character_; |
282 | |
283 DISALLOW_ALLOCATION(); | |
284 }; | 255 }; |
285 | 256 |
286 | 257 |
287 private: | 258 private: |
288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 259 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
289 intptr_t total_samples_; | 260 int total_samples_; |
290 }; | 261 }; |
291 | 262 |
292 | 263 |
293 class RegExpCompiler : public ValueObject { | 264 class RegExpCompiler { |
294 public: | 265 public: |
295 RegExpCompiler(intptr_t capture_count, | 266 RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte, |
296 bool ignore_case, | 267 Zone* zone); |
297 intptr_t specialization_cid); | |
298 | 268 |
299 intptr_t AllocateRegister() { | 269 int AllocateRegister() { |
| 270 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { |
| 271 reg_exp_too_big_ = true; |
| 272 return next_register_; |
| 273 } |
300 return next_register_++; | 274 return next_register_++; |
301 } | 275 } |
302 | 276 |
303 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, | 277 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, |
304 RegExpNode* start, | 278 RegExpNode* start, |
305 intptr_t capture_count, | 279 int capture_count, |
306 const String& pattern); | 280 Handle<String> pattern); |
307 | 281 |
308 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 282 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
309 | 283 |
310 static const intptr_t kImplementationOffset = 0; | 284 static const int kImplementationOffset = 0; |
311 static const intptr_t kNumberOfRegistersOffset = 0; | 285 static const int kNumberOfRegistersOffset = 0; |
312 static const intptr_t kCodeOffset = 1; | 286 static const int kCodeOffset = 1; |
313 | 287 |
314 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 288 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
315 EndNode* accept() { return accept_; } | 289 EndNode* accept() { return accept_; } |
316 | 290 |
317 static const intptr_t kMaxRecursion = 100; | 291 static const int kMaxRecursion = 100; |
318 inline intptr_t recursion_depth() { return recursion_depth_; } | 292 inline int recursion_depth() { return recursion_depth_; } |
319 inline void IncrementRecursionDepth() { recursion_depth_++; } | 293 inline void IncrementRecursionDepth() { recursion_depth_++; } |
320 inline void DecrementRecursionDepth() { recursion_depth_--; } | 294 inline void DecrementRecursionDepth() { recursion_depth_--; } |
321 | 295 |
322 void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 296 void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
323 | 297 |
324 inline bool ignore_case() { return ignore_case_; } | 298 inline bool ignore_case() { return ignore_case_; } |
325 inline bool one_byte() const { | 299 inline bool one_byte() { return one_byte_; } |
326 return (specialization_cid_ == kOneByteStringCid || | |
327 specialization_cid_ == kExternalOneByteStringCid); | |
328 } | |
329 inline intptr_t specialization_cid() { return specialization_cid_; } | |
330 FrequencyCollator* frequency_collator() { return &frequency_collator_; } | 300 FrequencyCollator* frequency_collator() { return &frequency_collator_; } |
331 | 301 |
332 intptr_t current_expansion_factor() { return current_expansion_factor_; } | 302 int current_expansion_factor() { return current_expansion_factor_; } |
333 void set_current_expansion_factor(intptr_t value) { | 303 void set_current_expansion_factor(int value) { |
334 current_expansion_factor_ = value; | 304 current_expansion_factor_ = value; |
335 } | 305 } |
336 | 306 |
337 Isolate* isolate() const { return isolate_; } | 307 Zone* zone() const { return zone_; } |
338 | 308 |
339 static const intptr_t kNoRegister = -1; | 309 static const int kNoRegister = -1; |
340 | 310 |
341 private: | 311 private: |
342 EndNode* accept_; | 312 EndNode* accept_; |
343 intptr_t next_register_; | 313 int next_register_; |
344 ZoneGrowableArray<RegExpNode*>* work_list_; | 314 List<RegExpNode*>* work_list_; |
345 intptr_t recursion_depth_; | 315 int recursion_depth_; |
346 IRRegExpMacroAssembler* macro_assembler_; | 316 RegExpMacroAssembler* macro_assembler_; |
347 bool ignore_case_; | 317 bool ignore_case_; |
348 intptr_t specialization_cid_; | 318 bool one_byte_; |
349 bool reg_exp_too_big_; | 319 bool reg_exp_too_big_; |
350 intptr_t current_expansion_factor_; | 320 int current_expansion_factor_; |
351 FrequencyCollator frequency_collator_; | 321 FrequencyCollator frequency_collator_; |
352 Isolate* isolate_; | 322 Zone* zone_; |
353 }; | 323 }; |
354 | 324 |
355 | 325 |
356 class RecursionCheck : public ValueObject { | 326 class RecursionCheck { |
357 public: | 327 public: |
358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 328 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
359 compiler->IncrementRecursionDepth(); | 329 compiler->IncrementRecursionDepth(); |
360 } | 330 } |
361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 331 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
362 private: | 332 private: |
363 RegExpCompiler* compiler_; | 333 RegExpCompiler* compiler_; |
364 }; | 334 }; |
365 | 335 |
366 | 336 |
367 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 337 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { |
368 return RegExpEngine::CompilationResult("RegExp too big"); | 338 return RegExpEngine::CompilationResult(isolate, "RegExp too big"); |
369 } | 339 } |
370 | 340 |
371 | 341 |
372 // Attempts to compile the regexp using an Irregexp code generator. Returns | 342 // Attempts to compile the regexp using an Irregexp code generator. Returns |
373 // a fixed array or a null handle depending on whether it succeeded. | 343 // a fixed array or a null handle depending on whether it succeeded. |
374 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, | 344 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, |
375 intptr_t specialization_cid) | 345 bool one_byte, Zone* zone) |
376 : next_register_(2 * (capture_count + 1)), | 346 : next_register_(2 * (capture_count + 1)), |
377 work_list_(NULL), | 347 work_list_(NULL), |
378 recursion_depth_(0), | 348 recursion_depth_(0), |
379 ignore_case_(ignore_case), | 349 ignore_case_(ignore_case), |
380 specialization_cid_(specialization_cid), | 350 one_byte_(one_byte), |
381 reg_exp_too_big_(false), | 351 reg_exp_too_big_(false), |
382 current_expansion_factor_(1), | 352 current_expansion_factor_(1), |
383 isolate_(Isolate::Current()) { | 353 frequency_collator_(), |
384 accept_ = new(I) EndNode(EndNode::ACCEPT, I); | 354 zone_(zone) { |
| 355 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); |
| 356 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
385 } | 357 } |
386 | 358 |
387 | 359 |
388 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 360 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
389 IRRegExpMacroAssembler* macro_assembler, | 361 RegExpMacroAssembler* macro_assembler, |
390 RegExpNode* start, | 362 RegExpNode* start, |
391 intptr_t capture_count, | 363 int capture_count, |
392 const String& pattern) { | 364 Handle<String> pattern) { |
393 static const bool use_slow_safe_regexp_compiler = false; | 365 Heap* heap = pattern->GetHeap(); |
| 366 |
| 367 bool use_slow_safe_regexp_compiler = false; |
| 368 if (heap->total_regexp_code_generated() > |
| 369 RegExpImpl::kRegWxpCompiledLimit && |
| 370 heap->isolate()->memory_allocator()->SizeExecutable() > |
| 371 RegExpImpl::kRegExpExecutableMemoryLimit) { |
| 372 use_slow_safe_regexp_compiler = true; |
| 373 } |
394 | 374 |
395 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); | 375 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); |
396 macro_assembler_ = macro_assembler; | |
397 | 376 |
398 ZoneGrowableArray<RegExpNode*> work_list(0); | 377 #ifdef DEBUG |
| 378 if (FLAG_trace_regexp_assembler) |
| 379 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
| 380 else |
| 381 #endif |
| 382 macro_assembler_ = macro_assembler; |
| 383 |
| 384 List <RegExpNode*> work_list(0); |
399 work_list_ = &work_list; | 385 work_list_ = &work_list; |
400 BlockLabel fail; | 386 Label fail; |
401 macro_assembler_->PushBacktrack(&fail); | 387 macro_assembler_->PushBacktrack(&fail); |
402 Trace new_trace; | 388 Trace new_trace; |
403 start->Emit(this, &new_trace); | 389 start->Emit(this, &new_trace); |
404 macro_assembler_->BindBlock(&fail); | 390 macro_assembler_->Bind(&fail); |
405 macro_assembler_->Fail(); | 391 macro_assembler_->Fail(); |
406 while (!work_list.is_empty()) { | 392 while (!work_list.is_empty()) { |
407 work_list.RemoveLast()->Emit(this, &new_trace); | 393 work_list.RemoveLast()->Emit(this, &new_trace); |
408 } | 394 } |
409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 395 if (reg_exp_too_big_) return IrregexpRegExpTooBig(zone_->isolate()); |
410 | 396 |
411 macro_assembler->GenerateBacktrackBlock(); | 397 Handle<HeapObject> code = macro_assembler_->GetCode(pattern); |
412 | 398 heap->IncreaseTotalRegexpCodeGenerated(code->Size()); |
413 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), | 399 work_list_ = NULL; |
414 macro_assembler->graph_entry(), | 400 #ifdef DEBUG |
415 macro_assembler->num_blocks(), | 401 if (FLAG_print_code) { |
416 macro_assembler->num_stack_locals()); | 402 CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer()); |
| 403 OFStream os(trace_scope.file()); |
| 404 Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os); |
| 405 } |
| 406 if (FLAG_trace_regexp_assembler) { |
| 407 delete macro_assembler_; |
| 408 } |
| 409 #endif |
| 410 return RegExpEngine::CompilationResult(*code, next_register_); |
417 } | 411 } |
418 | 412 |
419 | 413 |
420 bool Trace::DeferredAction::Mentions(intptr_t that) { | 414 bool Trace::DeferredAction::Mentions(int that) { |
421 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 415 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
422 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 416 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
423 return range.Contains(that); | 417 return range.Contains(that); |
424 } else { | 418 } else { |
425 return reg() == that; | 419 return reg() == that; |
426 } | 420 } |
427 } | 421 } |
428 | 422 |
429 | 423 |
430 bool Trace::mentions_reg(intptr_t reg) { | 424 bool Trace::mentions_reg(int reg) { |
431 for (DeferredAction* action = actions_; | 425 for (DeferredAction* action = actions_; |
432 action != NULL; | 426 action != NULL; |
433 action = action->next()) { | 427 action = action->next()) { |
434 if (action->Mentions(reg)) | 428 if (action->Mentions(reg)) |
435 return true; | 429 return true; |
436 } | 430 } |
437 return false; | 431 return false; |
438 } | 432 } |
439 | 433 |
440 | 434 |
441 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { | 435 bool Trace::GetStoredPosition(int reg, int* cp_offset) { |
442 ASSERT(*cp_offset == 0); | 436 DCHECK_EQ(0, *cp_offset); |
443 for (DeferredAction* action = actions_; | 437 for (DeferredAction* action = actions_; |
444 action != NULL; | 438 action != NULL; |
445 action = action->next()) { | 439 action = action->next()) { |
446 if (action->Mentions(reg)) { | 440 if (action->Mentions(reg)) { |
447 if (action->action_type() == ActionNode::STORE_POSITION) { | 441 if (action->action_type() == ActionNode::STORE_POSITION) { |
448 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 442 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
449 return true; | 443 return true; |
450 } else { | 444 } else { |
451 return false; | 445 return false; |
452 } | 446 } |
453 } | 447 } |
454 } | 448 } |
455 return false; | 449 return false; |
456 } | 450 } |
457 | 451 |
458 | 452 |
459 // This is called as we come into a loop choice node and some other tricky | 453 int Trace::FindAffectedRegisters(OutSet* affected_registers, |
460 // nodes. It normalizes the state of the code generator to ensure we can | 454 Zone* zone) { |
461 // generate generic code. | 455 int max_register = RegExpCompiler::kNoRegister; |
462 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, | |
463 Isolate* isolate) { | |
464 intptr_t max_register = RegExpCompiler::kNoRegister; | |
465 for (DeferredAction* action = actions_; | 456 for (DeferredAction* action = actions_; |
466 action != NULL; | 457 action != NULL; |
467 action = action->next()) { | 458 action = action->next()) { |
468 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 459 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
469 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 460 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
470 for (intptr_t i = range.from(); i <= range.to(); i++) | 461 for (int i = range.from(); i <= range.to(); i++) |
471 affected_registers->Set(i, isolate); | 462 affected_registers->Set(i, zone); |
472 if (range.to() > max_register) max_register = range.to(); | 463 if (range.to() > max_register) max_register = range.to(); |
473 } else { | 464 } else { |
474 affected_registers->Set(action->reg(), isolate); | 465 affected_registers->Set(action->reg(), zone); |
475 if (action->reg() > max_register) max_register = action->reg(); | 466 if (action->reg() > max_register) max_register = action->reg(); |
476 } | 467 } |
477 } | 468 } |
478 return max_register; | 469 return max_register; |
479 } | 470 } |
480 | 471 |
481 | 472 |
482 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, | 473 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, |
483 intptr_t max_register, | 474 int max_register, |
484 const OutSet& registers_to_pop, | 475 const OutSet& registers_to_pop, |
485 const OutSet& registers_to_clear) { | 476 const OutSet& registers_to_clear) { |
486 for (intptr_t reg = max_register; reg >= 0; reg--) { | 477 for (int reg = max_register; reg >= 0; reg--) { |
487 if (registers_to_pop.Get(reg)) { | 478 if (registers_to_pop.Get(reg)) { |
488 assembler->PopRegister(reg); | 479 assembler->PopRegister(reg); |
489 } else if (registers_to_clear.Get(reg)) { | 480 } else if (registers_to_clear.Get(reg)) { |
490 intptr_t clear_to = reg; | 481 int clear_to = reg; |
491 while (reg > 0 && registers_to_clear.Get(reg - 1)) { | 482 while (reg > 0 && registers_to_clear.Get(reg - 1)) { |
492 reg--; | 483 reg--; |
493 } | 484 } |
494 assembler->ClearRegisters(reg, clear_to); | 485 assembler->ClearRegisters(reg, clear_to); |
495 } | 486 } |
496 } | 487 } |
497 } | 488 } |
498 | 489 |
499 | 490 |
500 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, | 491 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
501 intptr_t max_register, | 492 int max_register, |
502 const OutSet& affected_registers, | 493 const OutSet& affected_registers, |
503 OutSet* registers_to_pop, | 494 OutSet* registers_to_pop, |
504 OutSet* registers_to_clear, | 495 OutSet* registers_to_clear, |
505 Isolate* isolate) { | 496 Zone* zone) { |
506 for (intptr_t reg = 0; reg <= max_register; reg++) { | 497 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. |
| 498 const int push_limit = (assembler->stack_limit_slack() + 1) / 2; |
| 499 |
| 500 // Count pushes performed to force a stack limit check occasionally. |
| 501 int pushes = 0; |
| 502 |
| 503 for (int reg = 0; reg <= max_register; reg++) { |
507 if (!affected_registers.Get(reg)) { | 504 if (!affected_registers.Get(reg)) { |
508 continue; | 505 continue; |
509 } | 506 } |
510 | 507 |
511 // The chronologically first deferred action in the trace | 508 // The chronologically first deferred action in the trace |
512 // is used to infer the action needed to restore a register | 509 // is used to infer the action needed to restore a register |
513 // to its previous state (or not, if it's safe to ignore it). | 510 // to its previous state (or not, if it's safe to ignore it). |
514 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; | 511 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; |
515 DeferredActionUndoType undo_action = IGNORE; | 512 DeferredActionUndoType undo_action = IGNORE; |
516 | 513 |
517 intptr_t value = 0; | 514 int value = 0; |
518 bool absolute = false; | 515 bool absolute = false; |
519 bool clear = false; | 516 bool clear = false; |
520 intptr_t store_position = -1; | 517 int store_position = -1; |
521 // This is a little tricky because we are scanning the actions in reverse | 518 // This is a little tricky because we are scanning the actions in reverse |
522 // historical order (newest first). | 519 // historical order (newest first). |
523 for (DeferredAction* action = actions_; | 520 for (DeferredAction* action = actions_; |
524 action != NULL; | 521 action != NULL; |
525 action = action->next()) { | 522 action = action->next()) { |
526 if (action->Mentions(reg)) { | 523 if (action->Mentions(reg)) { |
527 switch (action->action_type()) { | 524 switch (action->action_type()) { |
528 case ActionNode::SET_REGISTER: { | 525 case ActionNode::SET_REGISTER: { |
529 Trace::DeferredSetRegister* psr = | 526 Trace::DeferredSetRegister* psr = |
530 static_cast<Trace::DeferredSetRegister*>(action); | 527 static_cast<Trace::DeferredSetRegister*>(action); |
531 if (!absolute) { | 528 if (!absolute) { |
532 value += psr->value(); | 529 value += psr->value(); |
533 absolute = true; | 530 absolute = true; |
534 } | 531 } |
535 // SET_REGISTER is currently only used for newly introduced loop | 532 // SET_REGISTER is currently only used for newly introduced loop |
536 // counters. They can have a significant previous value if they | 533 // counters. They can have a significant previous value if they |
537 // occour in a loop. TODO(lrn): Propagate this information, so | 534 // occour in a loop. TODO(lrn): Propagate this information, so |
538 // we can set undo_action to IGNORE if we know there is no value to | 535 // we can set undo_action to IGNORE if we know there is no value to |
539 // restore. | 536 // restore. |
540 undo_action = RESTORE; | 537 undo_action = RESTORE; |
541 ASSERT(store_position == -1); | 538 DCHECK_EQ(store_position, -1); |
542 ASSERT(!clear); | 539 DCHECK(!clear); |
543 break; | 540 break; |
544 } | 541 } |
545 case ActionNode::INCREMENT_REGISTER: | 542 case ActionNode::INCREMENT_REGISTER: |
546 if (!absolute) { | 543 if (!absolute) { |
547 value++; | 544 value++; |
548 } | 545 } |
549 ASSERT(store_position == -1); | 546 DCHECK_EQ(store_position, -1); |
550 ASSERT(!clear); | 547 DCHECK(!clear); |
551 undo_action = RESTORE; | 548 undo_action = RESTORE; |
552 break; | 549 break; |
553 case ActionNode::STORE_POSITION: { | 550 case ActionNode::STORE_POSITION: { |
554 Trace::DeferredCapture* pc = | 551 Trace::DeferredCapture* pc = |
555 static_cast<Trace::DeferredCapture*>(action); | 552 static_cast<Trace::DeferredCapture*>(action); |
556 if (!clear && store_position == -1) { | 553 if (!clear && store_position == -1) { |
557 store_position = pc->cp_offset(); | 554 store_position = pc->cp_offset(); |
558 } | 555 } |
559 | 556 |
560 // For captures we know that stores and clears alternate. | 557 // For captures we know that stores and clears alternate. |
561 // Other register, are never cleared, and if the occur | 558 // Other register, are never cleared, and if the occur |
562 // inside a loop, they might be assigned more than once. | 559 // inside a loop, they might be assigned more than once. |
563 if (reg <= 1) { | 560 if (reg <= 1) { |
564 // Registers zero and one, aka "capture zero", is | 561 // Registers zero and one, aka "capture zero", is |
565 // always set correctly if we succeed. There is no | 562 // always set correctly if we succeed. There is no |
566 // need to undo a setting on backtrack, because we | 563 // need to undo a setting on backtrack, because we |
567 // will set it again or fail. | 564 // will set it again or fail. |
568 undo_action = IGNORE; | 565 undo_action = IGNORE; |
569 } else { | 566 } else { |
570 undo_action = pc->is_capture() ? CLEAR : RESTORE; | 567 undo_action = pc->is_capture() ? CLEAR : RESTORE; |
571 } | 568 } |
572 ASSERT(!absolute); | 569 DCHECK(!absolute); |
573 ASSERT(value == 0); | 570 DCHECK_EQ(value, 0); |
574 break; | 571 break; |
575 } | 572 } |
576 case ActionNode::CLEAR_CAPTURES: { | 573 case ActionNode::CLEAR_CAPTURES: { |
577 // Since we're scanning in reverse order, if we've already | 574 // Since we're scanning in reverse order, if we've already |
578 // set the position we have to ignore historically earlier | 575 // set the position we have to ignore historically earlier |
579 // clearing operations. | 576 // clearing operations. |
580 if (store_position == -1) { | 577 if (store_position == -1) { |
581 clear = true; | 578 clear = true; |
582 } | 579 } |
583 undo_action = RESTORE; | 580 undo_action = RESTORE; |
584 ASSERT(!absolute); | 581 DCHECK(!absolute); |
585 ASSERT(value == 0); | 582 DCHECK_EQ(value, 0); |
586 break; | 583 break; |
587 } | 584 } |
588 default: | 585 default: |
589 UNREACHABLE(); | 586 UNREACHABLE(); |
590 break; | 587 break; |
591 } | 588 } |
592 } | 589 } |
593 } | 590 } |
594 // Prepare for the undo-action (e.g., push if it's going to be popped). | 591 // Prepare for the undo-action (e.g., push if it's going to be popped). |
595 if (undo_action == RESTORE) { | 592 if (undo_action == RESTORE) { |
596 assembler->PushRegister(reg); | 593 pushes++; |
597 registers_to_pop->Set(reg, isolate); | 594 RegExpMacroAssembler::StackCheckFlag stack_check = |
| 595 RegExpMacroAssembler::kNoStackLimitCheck; |
| 596 if (pushes == push_limit) { |
| 597 stack_check = RegExpMacroAssembler::kCheckStackLimit; |
| 598 pushes = 0; |
| 599 } |
| 600 |
| 601 assembler->PushRegister(reg, stack_check); |
| 602 registers_to_pop->Set(reg, zone); |
598 } else if (undo_action == CLEAR) { | 603 } else if (undo_action == CLEAR) { |
599 registers_to_clear->Set(reg, isolate); | 604 registers_to_clear->Set(reg, zone); |
600 } | 605 } |
601 // Perform the chronologically last action (or accumulated increment) | 606 // Perform the chronologically last action (or accumulated increment) |
602 // for the register. | 607 // for the register. |
603 if (store_position != -1) { | 608 if (store_position != -1) { |
604 assembler->WriteCurrentPositionToRegister(reg, store_position); | 609 assembler->WriteCurrentPositionToRegister(reg, store_position); |
605 } else if (clear) { | 610 } else if (clear) { |
606 assembler->ClearRegisters(reg, reg); | 611 assembler->ClearRegisters(reg, reg); |
607 } else if (absolute) { | 612 } else if (absolute) { |
608 assembler->SetRegister(reg, value); | 613 assembler->SetRegister(reg, value); |
609 } else if (value != 0) { | 614 } else if (value != 0) { |
610 assembler->AdvanceRegister(reg, value); | 615 assembler->AdvanceRegister(reg, value); |
611 } | 616 } |
612 } | 617 } |
613 } | 618 } |
614 | 619 |
615 | 620 |
616 // This is called as we come into a loop choice node and some other tricky | 621 // This is called as we come into a loop choice node and some other tricky |
617 // nodes. It normalizes the state of the code generator to ensure we can | 622 // nodes. It normalizes the state of the code generator to ensure we can |
618 // generate generic code. | 623 // generate generic code. |
619 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 624 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
620 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 625 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
621 | 626 |
622 ASSERT(!is_trivial()); | 627 DCHECK(!is_trivial()); |
623 | 628 |
624 if (actions_ == NULL && backtrack() == NULL) { | 629 if (actions_ == NULL && backtrack() == NULL) { |
625 // Here we just have some deferred cp advances to fix and we are back to | 630 // Here we just have some deferred cp advances to fix and we are back to |
626 // a normal situation. We may also have to forget some information gained | 631 // a normal situation. We may also have to forget some information gained |
627 // through a quick check that was already performed. | 632 // through a quick check that was already performed. |
628 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); | 633 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); |
629 // Create a new trivial state and generate the node with that. | 634 // Create a new trivial state and generate the node with that. |
630 Trace new_state; | 635 Trace new_state; |
631 successor->Emit(compiler, &new_state); | 636 successor->Emit(compiler, &new_state); |
632 return; | 637 return; |
633 } | 638 } |
634 | 639 |
635 // Generate deferred actions here along with code to undo them again. | 640 // Generate deferred actions here along with code to undo them again. |
636 OutSet affected_registers; | 641 OutSet affected_registers; |
637 | 642 |
638 if (backtrack() != NULL) { | 643 if (backtrack() != NULL) { |
639 // Here we have a concrete backtrack location. These are set up by choice | 644 // Here we have a concrete backtrack location. These are set up by choice |
640 // nodes and so they indicate that we have a deferred save of the current | 645 // nodes and so they indicate that we have a deferred save of the current |
641 // position which we may need to emit here. | 646 // position which we may need to emit here. |
642 assembler->PushCurrentPosition(); | 647 assembler->PushCurrentPosition(); |
643 } | 648 } |
644 | 649 |
645 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI); | 650 int max_register = FindAffectedRegisters(&affected_registers, |
| 651 compiler->zone()); |
646 OutSet registers_to_pop; | 652 OutSet registers_to_pop; |
647 OutSet registers_to_clear; | 653 OutSet registers_to_clear; |
648 PerformDeferredActions(assembler, | 654 PerformDeferredActions(assembler, |
649 max_register, | 655 max_register, |
650 affected_registers, | 656 affected_registers, |
651 ®isters_to_pop, | 657 ®isters_to_pop, |
652 ®isters_to_clear, | 658 ®isters_to_clear, |
653 CI); | 659 compiler->zone()); |
654 if (cp_offset_ != 0) { | 660 if (cp_offset_ != 0) { |
655 assembler->AdvanceCurrentPosition(cp_offset_); | 661 assembler->AdvanceCurrentPosition(cp_offset_); |
656 } | 662 } |
657 | 663 |
658 // Create a new trivial state and generate the node with that. | 664 // Create a new trivial state and generate the node with that. |
659 BlockLabel undo; | 665 Label undo; |
660 assembler->PushBacktrack(&undo); | 666 assembler->PushBacktrack(&undo); |
661 Trace new_state; | 667 Trace new_state; |
662 successor->Emit(compiler, &new_state); | 668 successor->Emit(compiler, &new_state); |
663 | 669 |
664 // On backtrack we need to restore state. | 670 // On backtrack we need to restore state. |
665 assembler->BindBlock(&undo); | 671 assembler->Bind(&undo); |
666 RestoreAffectedRegisters(assembler, | 672 RestoreAffectedRegisters(assembler, |
667 max_register, | 673 max_register, |
668 registers_to_pop, | 674 registers_to_pop, |
669 registers_to_clear); | 675 registers_to_clear); |
670 if (backtrack() == NULL) { | 676 if (backtrack() == NULL) { |
671 assembler->Backtrack(); | 677 assembler->Backtrack(); |
672 } else { | 678 } else { |
673 assembler->PopCurrentPosition(); | 679 assembler->PopCurrentPosition(); |
674 assembler->GoTo(backtrack()); | 680 assembler->GoTo(backtrack()); |
675 } | 681 } |
676 } | 682 } |
677 | 683 |
678 | 684 |
679 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { | 685 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { |
680 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 686 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
681 | 687 |
682 // Omit flushing the trace. We discard the entire stack frame anyway. | 688 // Omit flushing the trace. We discard the entire stack frame anyway. |
683 | 689 |
684 if (!label()->IsBound()) { | 690 if (!label()->is_bound()) { |
685 // We are completely independent of the trace, since we ignore it, | 691 // We are completely independent of the trace, since we ignore it, |
686 // so this code can be used as the generic version. | 692 // so this code can be used as the generic version. |
687 assembler->BindBlock(label()); | 693 assembler->Bind(label()); |
688 } | 694 } |
689 | 695 |
690 // Throw away everything on the backtrack stack since the start | 696 // Throw away everything on the backtrack stack since the start |
691 // of the negative submatch and restore the character position. | 697 // of the negative submatch and restore the character position. |
692 assembler->ReadCurrentPositionFromRegister(current_position_register_); | 698 assembler->ReadCurrentPositionFromRegister(current_position_register_); |
693 assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 699 assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
694 if (clear_capture_count_ > 0) { | 700 if (clear_capture_count_ > 0) { |
695 // Clear any captures that might have been performed during the success | 701 // Clear any captures that might have been performed during the success |
696 // of the body of the negative look-ahead. | 702 // of the body of the negative look-ahead. |
697 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; | 703 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; |
698 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); | 704 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); |
699 } | 705 } |
700 // Now that we have unwound the stack we find at the top of the stack the | 706 // Now that we have unwound the stack we find at the top of the stack the |
701 // backtrack that the BeginSubmatch node got. | 707 // backtrack that the BeginSubmatch node got. |
702 assembler->Backtrack(); | 708 assembler->Backtrack(); |
703 } | 709 } |
704 | 710 |
705 | 711 |
706 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 712 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
707 if (!trace->is_trivial()) { | 713 if (!trace->is_trivial()) { |
708 trace->Flush(compiler, this); | 714 trace->Flush(compiler, this); |
709 return; | 715 return; |
710 } | 716 } |
711 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 717 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
712 if (!label()->IsBound()) { | 718 if (!label()->is_bound()) { |
713 assembler->BindBlock(label()); | 719 assembler->Bind(label()); |
714 } | 720 } |
715 switch (action_) { | 721 switch (action_) { |
716 case ACCEPT: | 722 case ACCEPT: |
717 assembler->Succeed(); | 723 assembler->Succeed(); |
718 return; | 724 return; |
719 case BACKTRACK: | 725 case BACKTRACK: |
720 assembler->GoTo(trace->backtrack()); | 726 assembler->GoTo(trace->backtrack()); |
721 return; | 727 return; |
722 case NEGATIVE_SUBMATCH_SUCCESS: | 728 case NEGATIVE_SUBMATCH_SUCCESS: |
723 // This case is handled in a different virtual method. | 729 // This case is handled in a different virtual method. |
724 UNREACHABLE(); | 730 UNREACHABLE(); |
725 } | 731 } |
726 UNIMPLEMENTED(); | 732 UNIMPLEMENTED(); |
727 } | 733 } |
728 | 734 |
729 | 735 |
730 void GuardedAlternative::AddGuard(Guard* guard, Isolate* isolate) { | 736 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { |
731 if (guards_ == NULL) | 737 if (guards_ == NULL) |
732 guards_ = new(isolate) ZoneGrowableArray<Guard*>(1); | 738 guards_ = new(zone) ZoneList<Guard*>(1, zone); |
733 guards_->Add(guard); | 739 guards_->Add(guard, zone); |
734 } | 740 } |
735 | 741 |
736 | 742 |
737 ActionNode* ActionNode::SetRegister(intptr_t reg, | 743 ActionNode* ActionNode::SetRegister(int reg, |
738 intptr_t val, | 744 int val, |
739 RegExpNode* on_success) { | 745 RegExpNode* on_success) { |
740 ActionNode* result = | 746 ActionNode* result = |
741 new(on_success->isolate()) ActionNode(SET_REGISTER, on_success); | 747 new(on_success->zone()) ActionNode(SET_REGISTER, on_success); |
742 result->data_.u_store_register.reg = reg; | 748 result->data_.u_store_register.reg = reg; |
743 result->data_.u_store_register.value = val; | 749 result->data_.u_store_register.value = val; |
744 return result; | 750 return result; |
745 } | 751 } |
746 | 752 |
747 | 753 |
748 ActionNode* ActionNode::IncrementRegister(intptr_t reg, | 754 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { |
749 RegExpNode* on_success) { | |
750 ActionNode* result = | 755 ActionNode* result = |
751 new(on_success->isolate()) ActionNode(INCREMENT_REGISTER, on_success); | 756 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); |
752 result->data_.u_increment_register.reg = reg; | 757 result->data_.u_increment_register.reg = reg; |
753 return result; | 758 return result; |
754 } | 759 } |
755 | 760 |
756 | 761 |
757 ActionNode* ActionNode::StorePosition(intptr_t reg, | 762 ActionNode* ActionNode::StorePosition(int reg, |
758 bool is_capture, | 763 bool is_capture, |
759 RegExpNode* on_success) { | 764 RegExpNode* on_success) { |
760 ActionNode* result = | 765 ActionNode* result = |
761 new(on_success->isolate()) ActionNode(STORE_POSITION, on_success); | 766 new(on_success->zone()) ActionNode(STORE_POSITION, on_success); |
762 result->data_.u_position_register.reg = reg; | 767 result->data_.u_position_register.reg = reg; |
763 result->data_.u_position_register.is_capture = is_capture; | 768 result->data_.u_position_register.is_capture = is_capture; |
764 return result; | 769 return result; |
765 } | 770 } |
766 | 771 |
767 | 772 |
768 ActionNode* ActionNode::ClearCaptures(Interval range, | 773 ActionNode* ActionNode::ClearCaptures(Interval range, |
769 RegExpNode* on_success) { | 774 RegExpNode* on_success) { |
770 ActionNode* result = | 775 ActionNode* result = |
771 new(on_success->isolate()) ActionNode(CLEAR_CAPTURES, on_success); | 776 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); |
772 result->data_.u_clear_captures.range_from = range.from(); | 777 result->data_.u_clear_captures.range_from = range.from(); |
773 result->data_.u_clear_captures.range_to = range.to(); | 778 result->data_.u_clear_captures.range_to = range.to(); |
774 return result; | 779 return result; |
775 } | 780 } |
776 | 781 |
777 | 782 |
778 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, | 783 ActionNode* ActionNode::BeginSubmatch(int stack_reg, |
779 intptr_t position_reg, | 784 int position_reg, |
780 RegExpNode* on_success) { | 785 RegExpNode* on_success) { |
781 ActionNode* result = | 786 ActionNode* result = |
782 new(on_success->isolate()) ActionNode(BEGIN_SUBMATCH, on_success); | 787 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); |
783 result->data_.u_submatch.stack_pointer_register = stack_reg; | 788 result->data_.u_submatch.stack_pointer_register = stack_reg; |
784 result->data_.u_submatch.current_position_register = position_reg; | 789 result->data_.u_submatch.current_position_register = position_reg; |
785 return result; | 790 return result; |
786 } | 791 } |
787 | 792 |
788 | 793 |
789 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, | 794 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, |
790 intptr_t position_reg, | 795 int position_reg, |
791 intptr_t clear_register_count, | 796 int clear_register_count, |
792 intptr_t clear_register_from, | 797 int clear_register_from, |
793 RegExpNode* on_success) { | 798 RegExpNode* on_success) { |
794 ActionNode* result = | 799 ActionNode* result = |
795 new(on_success->isolate()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, | 800 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
796 on_success); | |
797 result->data_.u_submatch.stack_pointer_register = stack_reg; | 801 result->data_.u_submatch.stack_pointer_register = stack_reg; |
798 result->data_.u_submatch.current_position_register = position_reg; | 802 result->data_.u_submatch.current_position_register = position_reg; |
799 result->data_.u_submatch.clear_register_count = clear_register_count; | 803 result->data_.u_submatch.clear_register_count = clear_register_count; |
800 result->data_.u_submatch.clear_register_from = clear_register_from; | 804 result->data_.u_submatch.clear_register_from = clear_register_from; |
801 return result; | 805 return result; |
802 } | 806 } |
803 | 807 |
804 | 808 |
805 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, | 809 ActionNode* ActionNode::EmptyMatchCheck(int start_register, |
806 intptr_t repetition_register, | 810 int repetition_register, |
807 intptr_t repetition_limit, | 811 int repetition_limit, |
808 RegExpNode* on_success) { | 812 RegExpNode* on_success) { |
809 ActionNode* result = | 813 ActionNode* result = |
810 new(on_success->isolate()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 814 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
811 result->data_.u_empty_match_check.start_register = start_register; | 815 result->data_.u_empty_match_check.start_register = start_register; |
812 result->data_.u_empty_match_check.repetition_register = repetition_register; | 816 result->data_.u_empty_match_check.repetition_register = repetition_register; |
813 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 817 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
814 return result; | 818 return result; |
815 } | 819 } |
816 | 820 |
817 | 821 |
818 #define DEFINE_ACCEPT(Type) \ | 822 #define DEFINE_ACCEPT(Type) \ |
819 void Type##Node::Accept(NodeVisitor* visitor) { \ | 823 void Type##Node::Accept(NodeVisitor* visitor) { \ |
820 visitor->Visit##Type(this); \ | 824 visitor->Visit##Type(this); \ |
821 } | 825 } |
822 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 826 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
823 #undef DEFINE_ACCEPT | 827 #undef DEFINE_ACCEPT |
824 | 828 |
825 | 829 |
826 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 830 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
827 visitor->VisitLoopChoice(this); | 831 visitor->VisitLoopChoice(this); |
828 } | 832 } |
829 | 833 |
830 | 834 |
831 // ------------------------------------------------------------------- | 835 // ------------------------------------------------------------------- |
832 // Emit code. | 836 // Emit code. |
833 | 837 |
834 | 838 |
835 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 839 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
836 Guard* guard, | 840 Guard* guard, |
837 Trace* trace) { | 841 Trace* trace) { |
838 switch (guard->op()) { | 842 switch (guard->op()) { |
839 case Guard::LT: | 843 case Guard::LT: |
840 ASSERT(!trace->mentions_reg(guard->reg())); | 844 DCHECK(!trace->mentions_reg(guard->reg())); |
841 macro_assembler->IfRegisterGE(guard->reg(), | 845 macro_assembler->IfRegisterGE(guard->reg(), |
842 guard->value(), | 846 guard->value(), |
843 trace->backtrack()); | 847 trace->backtrack()); |
844 break; | 848 break; |
845 case Guard::GEQ: | 849 case Guard::GEQ: |
846 ASSERT(!trace->mentions_reg(guard->reg())); | 850 DCHECK(!trace->mentions_reg(guard->reg())); |
847 macro_assembler->IfRegisterLT(guard->reg(), | 851 macro_assembler->IfRegisterLT(guard->reg(), |
848 guard->value(), | 852 guard->value(), |
849 trace->backtrack()); | 853 trace->backtrack()); |
850 break; | 854 break; |
851 } | 855 } |
852 } | 856 } |
853 | 857 |
854 | 858 |
855 // Returns the number of characters in the equivalence class, omitting those | 859 // Returns the number of characters in the equivalence class, omitting those |
856 // that cannot occur in the source string because it is ASCII. | 860 // that cannot occur in the source string because it is ASCII. |
857 static intptr_t GetCaseIndependentLetters(uint16_t character, | 861 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, |
858 bool one_byte_subject, | 862 bool one_byte_subject, |
859 int32_t* letters) { | 863 unibrow::uchar* letters) { |
860 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | 864 int length = |
861 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); | 865 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
862 // Unibrow returns 0 or 1 for characters where case independence is | 866 // Unibrow returns 0 or 1 for characters where case independence is |
863 // trivial. | 867 // trivial. |
864 if (length == 0) { | 868 if (length == 0) { |
865 letters[0] = character; | 869 letters[0] = character; |
866 length = 1; | 870 length = 1; |
867 } | 871 } |
868 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { | 872 if (!one_byte_subject || character <= String::kMaxOneByteCharCode) { |
869 return length; | 873 return length; |
870 } | 874 } |
871 | 875 |
872 // The standard requires that non-ASCII characters cannot have ASCII | 876 // The standard requires that non-ASCII characters cannot have ASCII |
873 // character codes in their equivalence class. | 877 // character codes in their equivalence class. |
874 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, | 878 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, |
875 // is it? For example, \u00C5 is equivalent to \u212B. | 879 // is it? For example, \u00C5 is equivalent to \u212B. |
876 return 0; | 880 return 0; |
877 } | 881 } |
878 | 882 |
879 | 883 |
880 static inline bool EmitSimpleCharacter(Isolate* isolate, | 884 static inline bool EmitSimpleCharacter(Isolate* isolate, |
881 RegExpCompiler* compiler, | 885 RegExpCompiler* compiler, |
882 uint16_t c, | 886 uc16 c, |
883 BlockLabel* on_failure, | 887 Label* on_failure, |
884 intptr_t cp_offset, | 888 int cp_offset, |
885 bool check, | 889 bool check, |
886 bool preloaded) { | 890 bool preloaded) { |
887 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 891 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
888 bool bound_checked = false; | 892 bool bound_checked = false; |
889 if (!preloaded) { | 893 if (!preloaded) { |
890 assembler->LoadCurrentCharacter( | 894 assembler->LoadCurrentCharacter( |
891 cp_offset, | 895 cp_offset, |
892 on_failure, | 896 on_failure, |
893 check); | 897 check); |
894 bound_checked = true; | 898 bound_checked = true; |
895 } | 899 } |
896 assembler->CheckNotCharacter(c, on_failure); | 900 assembler->CheckNotCharacter(c, on_failure); |
897 return bound_checked; | 901 return bound_checked; |
898 } | 902 } |
899 | 903 |
900 | 904 |
901 // Only emits non-letters (things that don't have case). Only used for case | 905 // Only emits non-letters (things that don't have case). Only used for case |
902 // independent matches. | 906 // independent matches. |
903 static inline bool EmitAtomNonLetter(Isolate* isolate, | 907 static inline bool EmitAtomNonLetter(Isolate* isolate, |
904 RegExpCompiler* compiler, | 908 RegExpCompiler* compiler, |
905 uint16_t c, | 909 uc16 c, |
906 BlockLabel* on_failure, | 910 Label* on_failure, |
907 intptr_t cp_offset, | 911 int cp_offset, |
908 bool check, | 912 bool check, |
909 bool preloaded) { | 913 bool preloaded) { |
910 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 914 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
911 bool one_byte = compiler->one_byte(); | 915 bool one_byte = compiler->one_byte(); |
912 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 916 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
913 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); | 917 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
914 if (length < 1) { | 918 if (length < 1) { |
915 // This can't match. Must be an one-byte subject and a non-one-byte | 919 // This can't match. Must be an one-byte subject and a non-one-byte |
916 // character. We do not need to do anything since the one-byte pass | 920 // character. We do not need to do anything since the one-byte pass |
917 // already handled this. | 921 // already handled this. |
918 return false; // Bounds not checked. | 922 return false; // Bounds not checked. |
919 } | 923 } |
920 bool checked = false; | 924 bool checked = false; |
921 // We handle the length > 1 case in a later pass. | 925 // We handle the length > 1 case in a later pass. |
922 if (length == 1) { | 926 if (length == 1) { |
923 if (one_byte && c > Symbols::kMaxOneCharCodeSymbol) { | 927 if (one_byte && c > String::kMaxOneByteCharCodeU) { |
924 // Can't match - see above. | 928 // Can't match - see above. |
925 return false; // Bounds not checked. | 929 return false; // Bounds not checked. |
926 } | 930 } |
927 if (!preloaded) { | 931 if (!preloaded) { |
928 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 932 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
929 checked = check; | 933 checked = check; |
930 } | 934 } |
931 macro_assembler->CheckNotCharacter(c, on_failure); | 935 macro_assembler->CheckNotCharacter(c, on_failure); |
932 } | 936 } |
933 return checked; | 937 return checked; |
934 } | 938 } |
935 | 939 |
936 | 940 |
937 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 941 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
938 bool one_byte, | 942 bool one_byte, uc16 c1, uc16 c2, |
939 uint16_t c1, | 943 Label* on_failure) { |
940 uint16_t c2, | 944 uc16 char_mask; |
941 BlockLabel* on_failure) { | |
942 uint16_t char_mask; | |
943 if (one_byte) { | 945 if (one_byte) { |
944 char_mask = Symbols::kMaxOneCharCodeSymbol; | 946 char_mask = String::kMaxOneByteCharCode; |
945 } else { | 947 } else { |
946 char_mask = Utf16::kMaxCodeUnit; | 948 char_mask = String::kMaxUtf16CodeUnit; |
947 } | 949 } |
948 uint16_t exor = c1 ^ c2; | 950 uc16 exor = c1 ^ c2; |
949 // Check whether exor has only one bit set. | 951 // Check whether exor has only one bit set. |
950 if (((exor - 1) & exor) == 0) { | 952 if (((exor - 1) & exor) == 0) { |
951 // If c1 and c2 differ only by one bit. | 953 // If c1 and c2 differ only by one bit. |
952 // Ecma262UnCanonicalize always gives the highest number last. | 954 // Ecma262UnCanonicalize always gives the highest number last. |
953 ASSERT(c2 > c1); | 955 DCHECK(c2 > c1); |
954 uint16_t mask = char_mask ^ exor; | 956 uc16 mask = char_mask ^ exor; |
955 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); | 957 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); |
956 return true; | 958 return true; |
957 } | 959 } |
958 ASSERT(c2 > c1); | 960 DCHECK(c2 > c1); |
959 uint16_t diff = c2 - c1; | 961 uc16 diff = c2 - c1; |
960 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 962 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
961 // If the characters differ by 2^n but don't differ by one bit then | 963 // If the characters differ by 2^n but don't differ by one bit then |
962 // subtract the difference from the found character, then do the or | 964 // subtract the difference from the found character, then do the or |
963 // trick. We avoid the theoretical case where negative numbers are | 965 // trick. We avoid the theoretical case where negative numbers are |
964 // involved in order to simplify code generation. | 966 // involved in order to simplify code generation. |
965 uint16_t mask = char_mask ^ diff; | 967 uc16 mask = char_mask ^ diff; |
966 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 968 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
967 diff, | 969 diff, |
968 mask, | 970 mask, |
969 on_failure); | 971 on_failure); |
970 return true; | 972 return true; |
971 } | 973 } |
972 return false; | 974 return false; |
973 } | 975 } |
974 | 976 |
975 | 977 |
976 typedef bool EmitCharacterFunction(Isolate* isolate, | 978 typedef bool EmitCharacterFunction(Isolate* isolate, |
977 RegExpCompiler* compiler, | 979 RegExpCompiler* compiler, |
978 uint16_t c, | 980 uc16 c, |
979 BlockLabel* on_failure, | 981 Label* on_failure, |
980 intptr_t cp_offset, | 982 int cp_offset, |
981 bool check, | 983 bool check, |
982 bool preloaded); | 984 bool preloaded); |
983 | 985 |
984 // Only emits letters (things that have case). Only used for case independent | 986 // Only emits letters (things that have case). Only used for case independent |
985 // matches. | 987 // matches. |
986 static inline bool EmitAtomLetter(Isolate* isolate, | 988 static inline bool EmitAtomLetter(Isolate* isolate, |
987 RegExpCompiler* compiler, | 989 RegExpCompiler* compiler, |
988 uint16_t c, | 990 uc16 c, |
989 BlockLabel* on_failure, | 991 Label* on_failure, |
990 intptr_t cp_offset, | 992 int cp_offset, |
991 bool check, | 993 bool check, |
992 bool preloaded) { | 994 bool preloaded) { |
993 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 995 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
994 bool one_byte = compiler->one_byte(); | 996 bool one_byte = compiler->one_byte(); |
995 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 997 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
996 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); | 998 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
997 if (length <= 1) return false; | 999 if (length <= 1) return false; |
998 // We may not need to check against the end of the input string | 1000 // We may not need to check against the end of the input string |
999 // if this character lies before a character that matched. | 1001 // if this character lies before a character that matched. |
1000 if (!preloaded) { | 1002 if (!preloaded) { |
1001 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1003 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1002 } | 1004 } |
1003 BlockLabel ok; | 1005 Label ok; |
1004 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1006 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1005 switch (length) { | 1007 switch (length) { |
1006 case 2: { | 1008 case 2: { |
1007 if (ShortCutEmitCharacterPair(macro_assembler, | 1009 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], |
1008 one_byte, | 1010 chars[1], on_failure)) { |
1009 chars[0], | |
1010 chars[1], | |
1011 on_failure)) { | |
1012 } else { | 1011 } else { |
1013 macro_assembler->CheckCharacter(chars[0], &ok); | 1012 macro_assembler->CheckCharacter(chars[0], &ok); |
1014 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1013 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
1015 macro_assembler->BindBlock(&ok); | 1014 macro_assembler->Bind(&ok); |
1016 } | 1015 } |
1017 break; | 1016 break; |
1018 } | 1017 } |
1019 case 4: | 1018 case 4: |
1020 macro_assembler->CheckCharacter(chars[3], &ok); | 1019 macro_assembler->CheckCharacter(chars[3], &ok); |
1021 // Fall through! | 1020 // Fall through! |
1022 case 3: | 1021 case 3: |
1023 macro_assembler->CheckCharacter(chars[0], &ok); | 1022 macro_assembler->CheckCharacter(chars[0], &ok); |
1024 macro_assembler->CheckCharacter(chars[1], &ok); | 1023 macro_assembler->CheckCharacter(chars[1], &ok); |
1025 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1024 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1026 macro_assembler->BindBlock(&ok); | 1025 macro_assembler->Bind(&ok); |
1027 break; | 1026 break; |
1028 default: | 1027 default: |
1029 UNREACHABLE(); | 1028 UNREACHABLE(); |
1030 break; | 1029 break; |
1031 } | 1030 } |
1032 return true; | 1031 return true; |
1033 } | 1032 } |
1034 | 1033 |
1035 | 1034 |
1036 static void EmitBoundaryTest(RegExpMacroAssembler* masm, | 1035 static void EmitBoundaryTest(RegExpMacroAssembler* masm, |
1037 intptr_t border, | 1036 int border, |
1038 BlockLabel* fall_through, | 1037 Label* fall_through, |
1039 BlockLabel* above_or_equal, | 1038 Label* above_or_equal, |
1040 BlockLabel* below) { | 1039 Label* below) { |
1041 if (below != fall_through) { | 1040 if (below != fall_through) { |
1042 masm->CheckCharacterLT(border, below); | 1041 masm->CheckCharacterLT(border, below); |
1043 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); | 1042 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); |
1044 } else { | 1043 } else { |
1045 masm->CheckCharacterGT(border - 1, above_or_equal); | 1044 masm->CheckCharacterGT(border - 1, above_or_equal); |
1046 } | 1045 } |
1047 } | 1046 } |
1048 | 1047 |
1049 | 1048 |
1050 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, | 1049 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, |
1051 intptr_t first, | 1050 int first, |
1052 intptr_t last, | 1051 int last, |
1053 BlockLabel* fall_through, | 1052 Label* fall_through, |
1054 BlockLabel* in_range, | 1053 Label* in_range, |
1055 BlockLabel* out_of_range) { | 1054 Label* out_of_range) { |
1056 if (in_range == fall_through) { | 1055 if (in_range == fall_through) { |
1057 if (first == last) { | 1056 if (first == last) { |
1058 masm->CheckNotCharacter(first, out_of_range); | 1057 masm->CheckNotCharacter(first, out_of_range); |
1059 } else { | 1058 } else { |
1060 masm->CheckCharacterNotInRange(first, last, out_of_range); | 1059 masm->CheckCharacterNotInRange(first, last, out_of_range); |
1061 } | 1060 } |
1062 } else { | 1061 } else { |
1063 if (first == last) { | 1062 if (first == last) { |
1064 masm->CheckCharacter(first, in_range); | 1063 masm->CheckCharacter(first, in_range); |
1065 } else { | 1064 } else { |
1066 masm->CheckCharacterInRange(first, last, in_range); | 1065 masm->CheckCharacterInRange(first, last, in_range); |
1067 } | 1066 } |
1068 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1067 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
1069 } | 1068 } |
1070 } | 1069 } |
1071 | 1070 |
1072 | 1071 |
1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1072 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1073 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
1075 static void EmitUseLookupTable( | 1074 static void EmitUseLookupTable( |
1076 RegExpMacroAssembler* masm, | 1075 RegExpMacroAssembler* masm, |
1077 ZoneGrowableArray<int>* ranges, | 1076 ZoneList<int>* ranges, |
1078 intptr_t start_index, | 1077 int start_index, |
1079 intptr_t end_index, | 1078 int end_index, |
1080 intptr_t min_char, | 1079 int min_char, |
1081 BlockLabel* fall_through, | 1080 Label* fall_through, |
1082 BlockLabel* even_label, | 1081 Label* even_label, |
1083 BlockLabel* odd_label) { | 1082 Label* odd_label) { |
1084 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1083 static const int kSize = RegExpMacroAssembler::kTableSize; |
1085 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1084 static const int kMask = RegExpMacroAssembler::kTableMask; |
1086 | 1085 |
1087 intptr_t base = (min_char & ~kMask); | 1086 int base = (min_char & ~kMask); |
| 1087 USE(base); |
1088 | 1088 |
1089 // Assert that everything is on one kTableSize page. | 1089 // Assert that everything is on one kTableSize page. |
1090 for (intptr_t i = start_index; i <= end_index; i++) { | 1090 for (int i = start_index; i <= end_index; i++) { |
1091 ASSERT((ranges->At(i) & ~kMask) == base); | 1091 DCHECK_EQ(ranges->at(i) & ~kMask, base); |
1092 } | 1092 } |
1093 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); | 1093 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); |
1094 | 1094 |
1095 char templ[kSize]; | 1095 char templ[kSize]; |
1096 BlockLabel* on_bit_set; | 1096 Label* on_bit_set; |
1097 BlockLabel* on_bit_clear; | 1097 Label* on_bit_clear; |
1098 intptr_t bit; | 1098 int bit; |
1099 if (even_label == fall_through) { | 1099 if (even_label == fall_through) { |
1100 on_bit_set = odd_label; | 1100 on_bit_set = odd_label; |
1101 on_bit_clear = even_label; | 1101 on_bit_clear = even_label; |
1102 bit = 1; | 1102 bit = 1; |
1103 } else { | 1103 } else { |
1104 on_bit_set = even_label; | 1104 on_bit_set = even_label; |
1105 on_bit_clear = odd_label; | 1105 on_bit_clear = odd_label; |
1106 bit = 0; | 1106 bit = 0; |
1107 } | 1107 } |
1108 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize; | 1108 for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) { |
1109 i++) { | |
1110 templ[i] = bit; | 1109 templ[i] = bit; |
1111 } | 1110 } |
1112 intptr_t j = 0; | 1111 int j = 0; |
1113 bit ^= 1; | 1112 bit ^= 1; |
1114 for (intptr_t i = start_index; i < end_index; i++) { | 1113 for (int i = start_index; i < end_index; i++) { |
1115 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { | 1114 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { |
1116 templ[j] = bit; | 1115 templ[j] = bit; |
1117 } | 1116 } |
1118 bit ^= 1; | 1117 bit ^= 1; |
1119 } | 1118 } |
1120 for (intptr_t i = j; i < kSize; i++) { | 1119 for (int i = j; i < kSize; i++) { |
1121 templ[i] = bit; | 1120 templ[i] = bit; |
1122 } | 1121 } |
| 1122 Factory* factory = masm->zone()->isolate()->factory(); |
1123 // TODO(erikcorry): Cache these. | 1123 // TODO(erikcorry): Cache these. |
1124 const TypedData& ba = TypedData::ZoneHandle( | 1124 Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED); |
1125 masm->isolate(), | 1125 for (int i = 0; i < kSize; i++) { |
1126 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 1126 ba->set(i, templ[i]); |
1127 for (intptr_t i = 0; i < kSize; i++) { | |
1128 ba.SetUint8(i, templ[i]); | |
1129 } | 1127 } |
1130 masm->CheckBitInTable(ba, on_bit_set); | 1128 masm->CheckBitInTable(ba, on_bit_set); |
1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
1132 } | 1130 } |
1133 | 1131 |
1134 | 1132 |
1135 static void CutOutRange(RegExpMacroAssembler* masm, | 1133 static void CutOutRange(RegExpMacroAssembler* masm, |
1136 ZoneGrowableArray<int>* ranges, | 1134 ZoneList<int>* ranges, |
1137 intptr_t start_index, | 1135 int start_index, |
1138 intptr_t end_index, | 1136 int end_index, |
1139 intptr_t cut_index, | 1137 int cut_index, |
1140 BlockLabel* even_label, | 1138 Label* even_label, |
1141 BlockLabel* odd_label) { | 1139 Label* odd_label) { |
1142 bool odd = (((cut_index - start_index) & 1) == 1); | 1140 bool odd = (((cut_index - start_index) & 1) == 1); |
1143 BlockLabel* in_range_label = odd ? odd_label : even_label; | 1141 Label* in_range_label = odd ? odd_label : even_label; |
1144 BlockLabel dummy; | 1142 Label dummy; |
1145 EmitDoubleBoundaryTest(masm, | 1143 EmitDoubleBoundaryTest(masm, |
1146 ranges->At(cut_index), | 1144 ranges->at(cut_index), |
1147 ranges->At(cut_index + 1) - 1, | 1145 ranges->at(cut_index + 1) - 1, |
1148 &dummy, | 1146 &dummy, |
1149 in_range_label, | 1147 in_range_label, |
1150 &dummy); | 1148 &dummy); |
1151 ASSERT(!dummy.IsLinked()); | 1149 DCHECK(!dummy.is_linked()); |
1152 // Cut out the single range by rewriting the array. This creates a new | 1150 // Cut out the single range by rewriting the array. This creates a new |
1153 // range that is a merger of the two ranges on either side of the one we | 1151 // range that is a merger of the two ranges on either side of the one we |
1154 // are cutting out. The oddity of the labels is preserved. | 1152 // are cutting out. The oddity of the labels is preserved. |
1155 for (intptr_t j = cut_index; j > start_index; j--) { | 1153 for (int j = cut_index; j > start_index; j--) { |
1156 (*ranges)[j] = ranges->At(j - 1); | 1154 ranges->at(j) = ranges->at(j - 1); |
1157 } | 1155 } |
1158 for (intptr_t j = cut_index + 1; j < end_index; j++) { | 1156 for (int j = cut_index + 1; j < end_index; j++) { |
1159 (*ranges)[j] = ranges->At(j + 1); | 1157 ranges->at(j) = ranges->at(j + 1); |
1160 } | 1158 } |
1161 } | 1159 } |
1162 | 1160 |
1163 | 1161 |
1164 // Unicode case. Split the search space into kSize spaces that are handled | 1162 // Unicode case. Split the search space into kSize spaces that are handled |
1165 // with recursion. | 1163 // with recursion. |
1166 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, | 1164 static void SplitSearchSpace(ZoneList<int>* ranges, |
1167 intptr_t start_index, | 1165 int start_index, |
1168 intptr_t end_index, | 1166 int end_index, |
1169 intptr_t* new_start_index, | 1167 int* new_start_index, |
1170 intptr_t* new_end_index, | 1168 int* new_end_index, |
1171 intptr_t* border) { | 1169 int* border) { |
1172 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1170 static const int kSize = RegExpMacroAssembler::kTableSize; |
1173 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1171 static const int kMask = RegExpMacroAssembler::kTableMask; |
1174 | 1172 |
1175 intptr_t first = ranges->At(start_index); | 1173 int first = ranges->at(start_index); |
1176 intptr_t last = ranges->At(end_index) - 1; | 1174 int last = ranges->at(end_index) - 1; |
1177 | 1175 |
1178 *new_start_index = start_index; | 1176 *new_start_index = start_index; |
1179 *border = (ranges->At(start_index) & ~kMask) + kSize; | 1177 *border = (ranges->at(start_index) & ~kMask) + kSize; |
1180 while (*new_start_index < end_index) { | 1178 while (*new_start_index < end_index) { |
1181 if (ranges->At(*new_start_index) > *border) break; | 1179 if (ranges->at(*new_start_index) > *border) break; |
1182 (*new_start_index)++; | 1180 (*new_start_index)++; |
1183 } | 1181 } |
1184 // new_start_index is the index of the first edge that is beyond the | 1182 // new_start_index is the index of the first edge that is beyond the |
1185 // current kSize space. | 1183 // current kSize space. |
1186 | 1184 |
1187 // For very large search spaces we do a binary chop search of the non-Latin1 | 1185 // For very large search spaces we do a binary chop search of the non-Latin1 |
1188 // space instead of just going to the end of the current kSize space. The | 1186 // space instead of just going to the end of the current kSize space. The |
1189 // heuristics are complicated a little by the fact that any 128-character | 1187 // heuristics are complicated a little by the fact that any 128-character |
1190 // encoding space can be quickly tested with a table lookup, so we don't | 1188 // encoding space can be quickly tested with a table lookup, so we don't |
1191 // wish to do binary chop search at a smaller granularity than that. A | 1189 // wish to do binary chop search at a smaller granularity than that. A |
1192 // 128-character space can take up a lot of space in the ranges array if, | 1190 // 128-character space can take up a lot of space in the ranges array if, |
1193 // for example, we only want to match every second character (eg. the lower | 1191 // for example, we only want to match every second character (eg. the lower |
1194 // case characters on some Unicode pages). | 1192 // case characters on some Unicode pages). |
1195 intptr_t binary_chop_index = (end_index + start_index) / 2; | 1193 int binary_chop_index = (end_index + start_index) / 2; |
1196 // The first test ensures that we get to the code that handles the Latin1 | 1194 // The first test ensures that we get to the code that handles the Latin1 |
1197 // range with a single not-taken branch, speeding up this important | 1195 // range with a single not-taken branch, speeding up this important |
1198 // character range (even non-Latin1 charset-based text has spaces and | 1196 // character range (even non-Latin1 charset-based text has spaces and |
1199 // punctuation). | 1197 // punctuation). |
1200 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. | 1198 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case. |
1201 end_index - start_index > (*new_start_index - start_index) * 2 && | 1199 end_index - start_index > (*new_start_index - start_index) * 2 && |
1202 last - first > kSize * 2 && | 1200 last - first > kSize * 2 && binary_chop_index > *new_start_index && |
1203 binary_chop_index > *new_start_index && | 1201 ranges->at(binary_chop_index) >= first + 2 * kSize) { |
1204 ranges->At(binary_chop_index) >= first + 2 * kSize) { | 1202 int scan_forward_for_section_border = binary_chop_index;; |
1205 intptr_t scan_forward_for_section_border = binary_chop_index;; | 1203 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; |
1206 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; | |
1207 | 1204 |
1208 while (scan_forward_for_section_border < end_index) { | 1205 while (scan_forward_for_section_border < end_index) { |
1209 if (ranges->At(scan_forward_for_section_border) > new_border) { | 1206 if (ranges->at(scan_forward_for_section_border) > new_border) { |
1210 *new_start_index = scan_forward_for_section_border; | 1207 *new_start_index = scan_forward_for_section_border; |
1211 *border = new_border; | 1208 *border = new_border; |
1212 break; | 1209 break; |
1213 } | 1210 } |
1214 scan_forward_for_section_border++; | 1211 scan_forward_for_section_border++; |
1215 } | 1212 } |
1216 } | 1213 } |
1217 | 1214 |
1218 ASSERT(*new_start_index > start_index); | 1215 DCHECK(*new_start_index > start_index); |
1219 *new_end_index = *new_start_index - 1; | 1216 *new_end_index = *new_start_index - 1; |
1220 if (ranges->At(*new_end_index) == *border) { | 1217 if (ranges->at(*new_end_index) == *border) { |
1221 (*new_end_index)--; | 1218 (*new_end_index)--; |
1222 } | 1219 } |
1223 if (*border >= ranges->At(end_index)) { | 1220 if (*border >= ranges->at(end_index)) { |
1224 *border = ranges->At(end_index); | 1221 *border = ranges->at(end_index); |
1225 *new_start_index = end_index; // Won't be used. | 1222 *new_start_index = end_index; // Won't be used. |
1226 *new_end_index = end_index - 1; | 1223 *new_end_index = end_index - 1; |
1227 } | 1224 } |
1228 } | 1225 } |
1229 | 1226 |
1230 | 1227 |
1231 // Gets a series of segment boundaries representing a character class. If the | 1228 // Gets a series of segment boundaries representing a character class. If the |
1232 // character is in the range between an even and an odd boundary (counting from | 1229 // character is in the range between an even and an odd boundary (counting from |
1233 // start_index) then go to even_label, otherwise go to odd_label. We already | 1230 // start_index) then go to even_label, otherwise go to odd_label. We already |
1234 // know that the character is in the range of min_char to max_char inclusive. | 1231 // know that the character is in the range of min_char to max_char inclusive. |
1235 // Either label can be NULL indicating backtracking. Either label can also be | 1232 // Either label can be NULL indicating backtracking. Either label can also be |
1236 // equal to the fall_through label. | 1233 // equal to the fall_through label. |
1237 static void GenerateBranches(RegExpMacroAssembler* masm, | 1234 static void GenerateBranches(RegExpMacroAssembler* masm, |
1238 ZoneGrowableArray<int>* ranges, | 1235 ZoneList<int>* ranges, |
1239 intptr_t start_index, | 1236 int start_index, |
1240 intptr_t end_index, | 1237 int end_index, |
1241 uint16_t min_char, | 1238 uc16 min_char, |
1242 uint16_t max_char, | 1239 uc16 max_char, |
1243 BlockLabel* fall_through, | 1240 Label* fall_through, |
1244 BlockLabel* even_label, | 1241 Label* even_label, |
1245 BlockLabel* odd_label) { | 1242 Label* odd_label) { |
1246 intptr_t first = ranges->At(start_index); | 1243 int first = ranges->at(start_index); |
1247 intptr_t last = ranges->At(end_index) - 1; | 1244 int last = ranges->at(end_index) - 1; |
1248 | 1245 |
1249 ASSERT(min_char < first); | 1246 DCHECK_LT(min_char, first); |
1250 | 1247 |
1251 // Just need to test if the character is before or on-or-after | 1248 // Just need to test if the character is before or on-or-after |
1252 // a particular character. | 1249 // a particular character. |
1253 if (start_index == end_index) { | 1250 if (start_index == end_index) { |
1254 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | 1251 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); |
1255 return; | 1252 return; |
1256 } | 1253 } |
1257 | 1254 |
1258 // Another almost trivial case: There is one interval in the middle that is | 1255 // Another almost trivial case: There is one interval in the middle that is |
1259 // different from the end intervals. | 1256 // different from the end intervals. |
1260 if (start_index + 1 == end_index) { | 1257 if (start_index + 1 == end_index) { |
1261 EmitDoubleBoundaryTest( | 1258 EmitDoubleBoundaryTest( |
1262 masm, first, last, fall_through, even_label, odd_label); | 1259 masm, first, last, fall_through, even_label, odd_label); |
1263 return; | 1260 return; |
1264 } | 1261 } |
1265 | 1262 |
1266 // It's not worth using table lookup if there are very few intervals in the | 1263 // It's not worth using table lookup if there are very few intervals in the |
1267 // character class. | 1264 // character class. |
1268 if (end_index - start_index <= 6) { | 1265 if (end_index - start_index <= 6) { |
1269 // It is faster to test for individual characters, so we look for those | 1266 // It is faster to test for individual characters, so we look for those |
1270 // first, then try arbitrary ranges in the second round. | 1267 // first, then try arbitrary ranges in the second round. |
1271 static intptr_t kNoCutIndex = -1; | 1268 static int kNoCutIndex = -1; |
1272 intptr_t cut = kNoCutIndex; | 1269 int cut = kNoCutIndex; |
1273 for (intptr_t i = start_index; i < end_index; i++) { | 1270 for (int i = start_index; i < end_index; i++) { |
1274 if (ranges->At(i) == ranges->At(i + 1) - 1) { | 1271 if (ranges->at(i) == ranges->at(i + 1) - 1) { |
1275 cut = i; | 1272 cut = i; |
1276 break; | 1273 break; |
1277 } | 1274 } |
1278 } | 1275 } |
1279 if (cut == kNoCutIndex) cut = start_index; | 1276 if (cut == kNoCutIndex) cut = start_index; |
1280 CutOutRange( | 1277 CutOutRange( |
1281 masm, ranges, start_index, end_index, cut, even_label, odd_label); | 1278 masm, ranges, start_index, end_index, cut, even_label, odd_label); |
1282 ASSERT(end_index - start_index >= 2); | 1279 DCHECK_GE(end_index - start_index, 2); |
1283 GenerateBranches(masm, | 1280 GenerateBranches(masm, |
1284 ranges, | 1281 ranges, |
1285 start_index + 1, | 1282 start_index + 1, |
1286 end_index - 1, | 1283 end_index - 1, |
1287 min_char, | 1284 min_char, |
1288 max_char, | 1285 max_char, |
1289 fall_through, | 1286 fall_through, |
1290 even_label, | 1287 even_label, |
1291 odd_label); | 1288 odd_label); |
1292 return; | 1289 return; |
1293 } | 1290 } |
1294 | 1291 |
1295 // If there are a lot of intervals in the regexp, then we will use tables to | 1292 // If there are a lot of intervals in the regexp, then we will use tables to |
1296 // determine whether the character is inside or outside the character class. | 1293 // determine whether the character is inside or outside the character class. |
1297 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; | 1294 static const int kBits = RegExpMacroAssembler::kTableSizeBits; |
1298 | 1295 |
1299 if ((max_char >> kBits) == (min_char >> kBits)) { | 1296 if ((max_char >> kBits) == (min_char >> kBits)) { |
1300 EmitUseLookupTable(masm, | 1297 EmitUseLookupTable(masm, |
1301 ranges, | 1298 ranges, |
1302 start_index, | 1299 start_index, |
1303 end_index, | 1300 end_index, |
1304 min_char, | 1301 min_char, |
1305 fall_through, | 1302 fall_through, |
1306 even_label, | 1303 even_label, |
1307 odd_label); | 1304 odd_label); |
1308 return; | 1305 return; |
1309 } | 1306 } |
1310 | 1307 |
1311 if ((min_char >> kBits) != (first >> kBits)) { | 1308 if ((min_char >> kBits) != (first >> kBits)) { |
1312 masm->CheckCharacterLT(first, odd_label); | 1309 masm->CheckCharacterLT(first, odd_label); |
1313 GenerateBranches(masm, | 1310 GenerateBranches(masm, |
1314 ranges, | 1311 ranges, |
1315 start_index + 1, | 1312 start_index + 1, |
1316 end_index, | 1313 end_index, |
1317 first, | 1314 first, |
1318 max_char, | 1315 max_char, |
1319 fall_through, | 1316 fall_through, |
1320 odd_label, | 1317 odd_label, |
1321 even_label); | 1318 even_label); |
1322 return; | 1319 return; |
1323 } | 1320 } |
1324 | 1321 |
1325 intptr_t new_start_index = 0; | 1322 int new_start_index = 0; |
1326 intptr_t new_end_index = 0; | 1323 int new_end_index = 0; |
1327 intptr_t border = 0; | 1324 int border = 0; |
1328 | 1325 |
1329 SplitSearchSpace(ranges, | 1326 SplitSearchSpace(ranges, |
1330 start_index, | 1327 start_index, |
1331 end_index, | 1328 end_index, |
1332 &new_start_index, | 1329 &new_start_index, |
1333 &new_end_index, | 1330 &new_end_index, |
1334 &border); | 1331 &border); |
1335 | 1332 |
1336 BlockLabel handle_rest; | 1333 Label handle_rest; |
1337 BlockLabel* above = &handle_rest; | 1334 Label* above = &handle_rest; |
1338 if (border == last + 1) { | 1335 if (border == last + 1) { |
1339 // We didn't find any section that started after the limit, so everything | 1336 // We didn't find any section that started after the limit, so everything |
1340 // above the border is one of the terminal labels. | 1337 // above the border is one of the terminal labels. |
1341 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; | 1338 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; |
1342 ASSERT(new_end_index == end_index - 1); | 1339 DCHECK(new_end_index == end_index - 1); |
1343 } | 1340 } |
1344 | 1341 |
1345 ASSERT(start_index <= new_end_index); | 1342 DCHECK_LE(start_index, new_end_index); |
1346 ASSERT(new_start_index <= end_index); | 1343 DCHECK_LE(new_start_index, end_index); |
1347 ASSERT(start_index < new_start_index); | 1344 DCHECK_LT(start_index, new_start_index); |
1348 ASSERT(new_end_index < end_index); | 1345 DCHECK_LT(new_end_index, end_index); |
1349 ASSERT(new_end_index + 1 == new_start_index || | 1346 DCHECK(new_end_index + 1 == new_start_index || |
1350 (new_end_index + 2 == new_start_index && | 1347 (new_end_index + 2 == new_start_index && |
1351 border == ranges->At(new_end_index + 1))); | 1348 border == ranges->at(new_end_index + 1))); |
1352 ASSERT(min_char < border - 1); | 1349 DCHECK_LT(min_char, border - 1); |
1353 ASSERT(border < max_char); | 1350 DCHECK_LT(border, max_char); |
1354 ASSERT(ranges->At(new_end_index) < border); | 1351 DCHECK_LT(ranges->at(new_end_index), border); |
1355 ASSERT(border < ranges->At(new_start_index) || | 1352 DCHECK(border < ranges->at(new_start_index) || |
1356 (border == ranges->At(new_start_index) && | 1353 (border == ranges->at(new_start_index) && |
1357 new_start_index == end_index && | 1354 new_start_index == end_index && |
1358 new_end_index == end_index - 1 && | 1355 new_end_index == end_index - 1 && |
1359 border == last + 1)); | 1356 border == last + 1)); |
1360 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); | 1357 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1)); |
1361 | 1358 |
1362 masm->CheckCharacterGT(border - 1, above); | 1359 masm->CheckCharacterGT(border - 1, above); |
1363 BlockLabel dummy; | 1360 Label dummy; |
1364 GenerateBranches(masm, | 1361 GenerateBranches(masm, |
1365 ranges, | 1362 ranges, |
1366 start_index, | 1363 start_index, |
1367 new_end_index, | 1364 new_end_index, |
1368 min_char, | 1365 min_char, |
1369 border - 1, | 1366 border - 1, |
1370 &dummy, | 1367 &dummy, |
1371 even_label, | 1368 even_label, |
1372 odd_label); | 1369 odd_label); |
1373 | 1370 if (handle_rest.is_linked()) { |
1374 if (handle_rest.IsLinked()) { | 1371 masm->Bind(&handle_rest); |
1375 masm->BindBlock(&handle_rest); | |
1376 bool flip = (new_start_index & 1) != (start_index & 1); | 1372 bool flip = (new_start_index & 1) != (start_index & 1); |
1377 GenerateBranches(masm, | 1373 GenerateBranches(masm, |
1378 ranges, | 1374 ranges, |
1379 new_start_index, | 1375 new_start_index, |
1380 end_index, | 1376 end_index, |
1381 border, | 1377 border, |
1382 max_char, | 1378 max_char, |
1383 &dummy, | 1379 &dummy, |
1384 flip ? odd_label : even_label, | 1380 flip ? odd_label : even_label, |
1385 flip ? even_label : odd_label); | 1381 flip ? even_label : odd_label); |
1386 } | 1382 } |
1387 } | 1383 } |
1388 | 1384 |
1389 | 1385 |
1390 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1386 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1391 RegExpCharacterClass* cc, | 1387 RegExpCharacterClass* cc, bool one_byte, |
1392 bool one_byte, | 1388 Label* on_failure, int cp_offset, bool check_offset, |
1393 BlockLabel* on_failure, | 1389 bool preloaded, Zone* zone) { |
1394 intptr_t cp_offset, | 1390 ZoneList<CharacterRange>* ranges = cc->ranges(zone); |
1395 bool check_offset, | |
1396 bool preloaded, | |
1397 Isolate* isolate) { | |
1398 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | |
1399 if (!CharacterRange::IsCanonical(ranges)) { | 1391 if (!CharacterRange::IsCanonical(ranges)) { |
1400 CharacterRange::Canonicalize(ranges); | 1392 CharacterRange::Canonicalize(ranges); |
1401 } | 1393 } |
1402 | 1394 |
1403 intptr_t max_char; | 1395 int max_char; |
1404 if (one_byte) { | 1396 if (one_byte) { |
1405 max_char = Symbols::kMaxOneCharCodeSymbol; | 1397 max_char = String::kMaxOneByteCharCode; |
1406 } else { | 1398 } else { |
1407 max_char = Utf16::kMaxCodeUnit; | 1399 max_char = String::kMaxUtf16CodeUnit; |
1408 } | 1400 } |
1409 | 1401 |
1410 intptr_t range_count = ranges->length(); | 1402 int range_count = ranges->length(); |
1411 | 1403 |
1412 intptr_t last_valid_range = range_count - 1; | 1404 int last_valid_range = range_count - 1; |
1413 while (last_valid_range >= 0) { | 1405 while (last_valid_range >= 0) { |
1414 CharacterRange& range = (*ranges)[last_valid_range]; | 1406 CharacterRange& range = ranges->at(last_valid_range); |
1415 if (range.from() <= max_char) { | 1407 if (range.from() <= max_char) { |
1416 break; | 1408 break; |
1417 } | 1409 } |
1418 last_valid_range--; | 1410 last_valid_range--; |
1419 } | 1411 } |
1420 | 1412 |
1421 if (last_valid_range < 0) { | 1413 if (last_valid_range < 0) { |
1422 if (!cc->is_negated()) { | 1414 if (!cc->is_negated()) { |
1423 macro_assembler->GoTo(on_failure); | 1415 macro_assembler->GoTo(on_failure); |
1424 } | 1416 } |
1425 if (check_offset) { | 1417 if (check_offset) { |
1426 macro_assembler->CheckPosition(cp_offset, on_failure); | 1418 macro_assembler->CheckPosition(cp_offset, on_failure); |
1427 } | 1419 } |
1428 return; | 1420 return; |
1429 } | 1421 } |
1430 | 1422 |
1431 if (last_valid_range == 0 && | 1423 if (last_valid_range == 0 && |
1432 ranges->At(0).IsEverything(max_char)) { | 1424 ranges->at(0).IsEverything(max_char)) { |
1433 if (cc->is_negated()) { | 1425 if (cc->is_negated()) { |
1434 macro_assembler->GoTo(on_failure); | 1426 macro_assembler->GoTo(on_failure); |
1435 } else { | 1427 } else { |
1436 // This is a common case hit by non-anchored expressions. | 1428 // This is a common case hit by non-anchored expressions. |
1437 if (check_offset) { | 1429 if (check_offset) { |
1438 macro_assembler->CheckPosition(cp_offset, on_failure); | 1430 macro_assembler->CheckPosition(cp_offset, on_failure); |
1439 } | 1431 } |
1440 } | 1432 } |
1441 return; | 1433 return; |
1442 } | 1434 } |
1443 if (last_valid_range == 0 && | 1435 if (last_valid_range == 0 && |
1444 !cc->is_negated() && | 1436 !cc->is_negated() && |
1445 ranges->At(0).IsEverything(max_char)) { | 1437 ranges->at(0).IsEverything(max_char)) { |
1446 // This is a common case hit by non-anchored expressions. | 1438 // This is a common case hit by non-anchored expressions. |
1447 if (check_offset) { | 1439 if (check_offset) { |
1448 macro_assembler->CheckPosition(cp_offset, on_failure); | 1440 macro_assembler->CheckPosition(cp_offset, on_failure); |
1449 } | 1441 } |
1450 return; | 1442 return; |
1451 } | 1443 } |
1452 | 1444 |
1453 if (!preloaded) { | 1445 if (!preloaded) { |
1454 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1446 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1455 } | 1447 } |
1456 | 1448 |
1457 if (cc->is_standard() && | 1449 if (cc->is_standard(zone) && |
1458 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1450 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), |
1459 on_failure)) { | 1451 on_failure)) { |
1460 return; | 1452 return; |
1461 } | 1453 } |
1462 | 1454 |
1463 | 1455 |
1464 // A new list with ascending entries. Each entry is a code unit | 1456 // A new list with ascending entries. Each entry is a code unit |
1465 // where there is a boundary between code units that are part of | 1457 // where there is a boundary between code units that are part of |
1466 // the class and code units that are not. Normally we insert an | 1458 // the class and code units that are not. Normally we insert an |
1467 // entry at zero which goes to the failure label, but if there | 1459 // entry at zero which goes to the failure label, but if there |
1468 // was already one there we fall through for success on that entry. | 1460 // was already one there we fall through for success on that entry. |
1469 // Subsequent entries have alternating meaning (success/failure). | 1461 // Subsequent entries have alternating meaning (success/failure). |
1470 ZoneGrowableArray<int>* range_boundaries = | 1462 ZoneList<int>* range_boundaries = |
1471 new(isolate) ZoneGrowableArray<int>(last_valid_range); | 1463 new(zone) ZoneList<int>(last_valid_range, zone); |
1472 | 1464 |
1473 bool zeroth_entry_is_failure = !cc->is_negated(); | 1465 bool zeroth_entry_is_failure = !cc->is_negated(); |
1474 | 1466 |
1475 for (intptr_t i = 0; i <= last_valid_range; i++) { | 1467 for (int i = 0; i <= last_valid_range; i++) { |
1476 CharacterRange& range = (*ranges)[i]; | 1468 CharacterRange& range = ranges->at(i); |
1477 if (range.from() == 0) { | 1469 if (range.from() == 0) { |
1478 ASSERT(i == 0); | 1470 DCHECK_EQ(i, 0); |
1479 zeroth_entry_is_failure = !zeroth_entry_is_failure; | 1471 zeroth_entry_is_failure = !zeroth_entry_is_failure; |
1480 } else { | 1472 } else { |
1481 range_boundaries->Add(range.from()); | 1473 range_boundaries->Add(range.from(), zone); |
1482 } | 1474 } |
1483 range_boundaries->Add(range.to() + 1); | 1475 range_boundaries->Add(range.to() + 1, zone); |
1484 } | 1476 } |
1485 intptr_t end_index = range_boundaries->length() - 1; | 1477 int end_index = range_boundaries->length() - 1; |
1486 if (range_boundaries->At(end_index) > max_char) { | 1478 if (range_boundaries->at(end_index) > max_char) { |
1487 end_index--; | 1479 end_index--; |
1488 } | 1480 } |
1489 | 1481 |
1490 BlockLabel fall_through; | 1482 Label fall_through; |
1491 GenerateBranches(macro_assembler, | 1483 GenerateBranches(macro_assembler, |
1492 range_boundaries, | 1484 range_boundaries, |
1493 0, // start_index. | 1485 0, // start_index. |
1494 end_index, | 1486 end_index, |
1495 0, // min_char. | 1487 0, // min_char. |
1496 max_char, | 1488 max_char, |
1497 &fall_through, | 1489 &fall_through, |
1498 zeroth_entry_is_failure ? &fall_through : on_failure, | 1490 zeroth_entry_is_failure ? &fall_through : on_failure, |
1499 zeroth_entry_is_failure ? on_failure : &fall_through); | 1491 zeroth_entry_is_failure ? on_failure : &fall_through); |
1500 macro_assembler->BindBlock(&fall_through); | 1492 macro_assembler->Bind(&fall_through); |
1501 } | 1493 } |
1502 | 1494 |
1503 | 1495 |
1504 RegExpNode::~RegExpNode() { | 1496 RegExpNode::~RegExpNode() { |
1505 } | 1497 } |
1506 | 1498 |
1507 | 1499 |
1508 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1500 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
1509 Trace* trace) { | 1501 Trace* trace) { |
1510 // If we are generating a greedy loop then don't stop and don't reuse code. | 1502 // If we are generating a greedy loop then don't stop and don't reuse code. |
1511 if (trace->stop_node() != NULL) { | 1503 if (trace->stop_node() != NULL) { |
1512 return CONTINUE; | 1504 return CONTINUE; |
1513 } | 1505 } |
1514 | 1506 |
1515 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1507 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1516 if (trace->is_trivial()) { | 1508 if (trace->is_trivial()) { |
1517 if (label_.IsBound()) { | 1509 if (label_.is_bound()) { |
1518 // We are being asked to generate a generic version, but that's already | 1510 // We are being asked to generate a generic version, but that's already |
1519 // been done so just go to it. | 1511 // been done so just go to it. |
1520 macro_assembler->GoTo(&label_); | 1512 macro_assembler->GoTo(&label_); |
1521 return DONE; | 1513 return DONE; |
1522 } | 1514 } |
1523 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { | 1515 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { |
1524 // To avoid too deep recursion we push the node to the work queue and just | 1516 // To avoid too deep recursion we push the node to the work queue and just |
1525 // generate a goto here. | 1517 // generate a goto here. |
1526 compiler->AddWork(this); | 1518 compiler->AddWork(this); |
1527 macro_assembler->GoTo(&label_); | 1519 macro_assembler->GoTo(&label_); |
1528 return DONE; | 1520 return DONE; |
1529 } | 1521 } |
1530 // Generate generic version of the node and bind the label for later use. | 1522 // Generate generic version of the node and bind the label for later use. |
1531 macro_assembler->BindBlock(&label_); | 1523 macro_assembler->Bind(&label_); |
1532 return CONTINUE; | 1524 return CONTINUE; |
1533 } | 1525 } |
1534 | 1526 |
1535 // We are being asked to make a non-generic version. Keep track of how many | 1527 // We are being asked to make a non-generic version. Keep track of how many |
1536 // non-generic versions we generate so as not to overdo it. | 1528 // non-generic versions we generate so as not to overdo it. |
1537 trace_count_++; | 1529 trace_count_++; |
1538 if (kRegexpOptimization && | 1530 if (FLAG_regexp_optimization && |
1539 trace_count_ < kMaxCopiesCodeGenerated && | 1531 trace_count_ < kMaxCopiesCodeGenerated && |
1540 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | 1532 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { |
1541 return CONTINUE; | 1533 return CONTINUE; |
1542 } | 1534 } |
1543 | 1535 |
1544 // If we get here code has been generated for this node too many times or | 1536 // If we get here code has been generated for this node too many times or |
1545 // recursion is too deep. Time to switch to a generic version. The code for | 1537 // recursion is too deep. Time to switch to a generic version. The code for |
1546 // generic versions above can handle deep recursion properly. | 1538 // generic versions above can handle deep recursion properly. |
1547 trace->Flush(compiler, this); | 1539 trace->Flush(compiler, this); |
1548 return DONE; | 1540 return DONE; |
1549 } | 1541 } |
1550 | 1542 |
1551 | 1543 |
1552 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, | 1544 int ActionNode::EatsAtLeast(int still_to_find, |
1553 intptr_t budget, | 1545 int budget, |
1554 bool not_at_start) { | 1546 bool not_at_start) { |
1555 if (budget <= 0) return 0; | 1547 if (budget <= 0) return 0; |
1556 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1548 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
1557 return on_success()->EatsAtLeast(still_to_find, | 1549 return on_success()->EatsAtLeast(still_to_find, |
1558 budget - 1, | 1550 budget - 1, |
1559 not_at_start); | 1551 not_at_start); |
1560 } | 1552 } |
1561 | 1553 |
1562 | 1554 |
1563 void ActionNode::FillInBMInfo(intptr_t offset, | 1555 void ActionNode::FillInBMInfo(int offset, |
1564 intptr_t budget, | 1556 int budget, |
1565 BoyerMooreLookahead* bm, | 1557 BoyerMooreLookahead* bm, |
1566 bool not_at_start) { | 1558 bool not_at_start) { |
1567 if (action_type_ == BEGIN_SUBMATCH) { | 1559 if (action_type_ == BEGIN_SUBMATCH) { |
1568 bm->SetRest(offset); | 1560 bm->SetRest(offset); |
1569 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1561 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
1570 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1562 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1571 } | 1563 } |
1572 SaveBMInfo(bm, not_at_start, offset); | 1564 SaveBMInfo(bm, not_at_start, offset); |
1573 } | 1565 } |
1574 | 1566 |
1575 | 1567 |
1576 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, | 1568 int AssertionNode::EatsAtLeast(int still_to_find, |
1577 intptr_t budget, | 1569 int budget, |
1578 bool not_at_start) { | 1570 bool not_at_start) { |
1579 if (budget <= 0) return 0; | 1571 if (budget <= 0) return 0; |
1580 // If we know we are not at the start and we are asked "how many characters | 1572 // If we know we are not at the start and we are asked "how many characters |
1581 // will you match if you succeed?" then we can answer anything since false | 1573 // will you match if you succeed?" then we can answer anything since false |
1582 // implies false. So lets just return the max answer (still_to_find) since | 1574 // implies false. So lets just return the max answer (still_to_find) since |
1583 // that won't prevent us from preloading a lot of characters for the other | 1575 // that won't prevent us from preloading a lot of characters for the other |
1584 // branches in the node graph. | 1576 // branches in the node graph. |
1585 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1577 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
1586 return on_success()->EatsAtLeast(still_to_find, | 1578 return on_success()->EatsAtLeast(still_to_find, |
1587 budget - 1, | 1579 budget - 1, |
1588 not_at_start); | 1580 not_at_start); |
1589 } | 1581 } |
1590 | 1582 |
1591 | 1583 |
1592 void AssertionNode::FillInBMInfo(intptr_t offset, | 1584 void AssertionNode::FillInBMInfo(int offset, |
1593 intptr_t budget, | 1585 int budget, |
1594 BoyerMooreLookahead* bm, | 1586 BoyerMooreLookahead* bm, |
1595 bool not_at_start) { | 1587 bool not_at_start) { |
1596 // Match the behaviour of EatsAtLeast on this node. | 1588 // Match the behaviour of EatsAtLeast on this node. |
1597 if (assertion_type() == AT_START && not_at_start) return; | 1589 if (assertion_type() == AT_START && not_at_start) return; |
1598 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1590 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1599 SaveBMInfo(bm, not_at_start, offset); | 1591 SaveBMInfo(bm, not_at_start, offset); |
1600 } | 1592 } |
1601 | 1593 |
1602 | 1594 |
1603 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, | 1595 int BackReferenceNode::EatsAtLeast(int still_to_find, |
1604 intptr_t budget, | 1596 int budget, |
1605 bool not_at_start) { | 1597 bool not_at_start) { |
1606 if (budget <= 0) return 0; | 1598 if (budget <= 0) return 0; |
1607 return on_success()->EatsAtLeast(still_to_find, | 1599 return on_success()->EatsAtLeast(still_to_find, |
1608 budget - 1, | 1600 budget - 1, |
1609 not_at_start); | 1601 not_at_start); |
1610 } | 1602 } |
1611 | 1603 |
1612 | 1604 |
1613 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, | 1605 int TextNode::EatsAtLeast(int still_to_find, |
1614 intptr_t budget, | 1606 int budget, |
1615 bool not_at_start) { | 1607 bool not_at_start) { |
1616 intptr_t answer = Length(); | 1608 int answer = Length(); |
1617 if (answer >= still_to_find) return answer; | 1609 if (answer >= still_to_find) return answer; |
1618 if (budget <= 0) return answer; | 1610 if (budget <= 0) return answer; |
1619 // We are not at start after this node so we set the last argument to 'true'. | 1611 // We are not at start after this node so we set the last argument to 'true'. |
1620 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 1612 return answer + on_success()->EatsAtLeast(still_to_find - answer, |
1621 budget - 1, | 1613 budget - 1, |
1622 true); | 1614 true); |
1623 } | 1615 } |
1624 | 1616 |
1625 | 1617 |
1626 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1618 int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, |
1627 intptr_t budget, | 1619 int budget, |
1628 bool not_at_start) { | 1620 bool not_at_start) { |
1629 if (budget <= 0) return 0; | 1621 if (budget <= 0) return 0; |
1630 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1622 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1631 // afterwards. | 1623 // afterwards. |
1632 RegExpNode* node = (*alternatives_)[1].node(); | 1624 RegExpNode* node = alternatives_->at(1).node(); |
1633 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1625 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1634 } | 1626 } |
1635 | 1627 |
1636 | 1628 |
1637 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( | 1629 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( |
1638 QuickCheckDetails* details, | 1630 QuickCheckDetails* details, |
1639 RegExpCompiler* compiler, | 1631 RegExpCompiler* compiler, |
1640 intptr_t filled_in, | 1632 int filled_in, |
1641 bool not_at_start) { | 1633 bool not_at_start) { |
1642 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1634 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1643 // afterwards. | 1635 // afterwards. |
1644 RegExpNode* node = (*alternatives_)[1].node(); | 1636 RegExpNode* node = alternatives_->at(1).node(); |
1645 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); | 1637 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); |
1646 } | 1638 } |
1647 | 1639 |
1648 | 1640 |
1649 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, | 1641 int ChoiceNode::EatsAtLeastHelper(int still_to_find, |
1650 intptr_t budget, | 1642 int budget, |
1651 RegExpNode* ignore_this_node, | 1643 RegExpNode* ignore_this_node, |
1652 bool not_at_start) { | 1644 bool not_at_start) { |
1653 if (budget <= 0) return 0; | 1645 if (budget <= 0) return 0; |
1654 intptr_t min = 100; | 1646 int min = 100; |
1655 intptr_t choice_count = alternatives_->length(); | 1647 int choice_count = alternatives_->length(); |
1656 budget = (budget - 1) / choice_count; | 1648 budget = (budget - 1) / choice_count; |
1657 for (intptr_t i = 0; i < choice_count; i++) { | 1649 for (int i = 0; i < choice_count; i++) { |
1658 RegExpNode* node = (*alternatives_)[i].node(); | 1650 RegExpNode* node = alternatives_->at(i).node(); |
1659 if (node == ignore_this_node) continue; | 1651 if (node == ignore_this_node) continue; |
1660 intptr_t node_eats_at_least = | 1652 int node_eats_at_least = |
1661 node->EatsAtLeast(still_to_find, budget, not_at_start); | 1653 node->EatsAtLeast(still_to_find, budget, not_at_start); |
1662 if (node_eats_at_least < min) min = node_eats_at_least; | 1654 if (node_eats_at_least < min) min = node_eats_at_least; |
1663 if (min == 0) return 0; | 1655 if (min == 0) return 0; |
1664 } | 1656 } |
1665 return min; | 1657 return min; |
1666 } | 1658 } |
1667 | 1659 |
1668 | 1660 |
1669 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1661 int LoopChoiceNode::EatsAtLeast(int still_to_find, |
1670 intptr_t budget, | 1662 int budget, |
1671 bool not_at_start) { | 1663 bool not_at_start) { |
1672 return EatsAtLeastHelper(still_to_find, | 1664 return EatsAtLeastHelper(still_to_find, |
1673 budget - 1, | 1665 budget - 1, |
1674 loop_node_, | 1666 loop_node_, |
1675 not_at_start); | 1667 not_at_start); |
1676 } | 1668 } |
1677 | 1669 |
1678 | 1670 |
1679 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1671 int ChoiceNode::EatsAtLeast(int still_to_find, |
1680 intptr_t budget, | 1672 int budget, |
1681 bool not_at_start) { | 1673 bool not_at_start) { |
1682 return EatsAtLeastHelper(still_to_find, | 1674 return EatsAtLeastHelper(still_to_find, |
1683 budget, | 1675 budget, |
1684 NULL, | 1676 NULL, |
1685 not_at_start); | 1677 not_at_start); |
1686 } | 1678 } |
1687 | 1679 |
1688 | 1680 |
1689 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1681 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
1690 static inline uint32_t SmearBitsRight(uint32_t v) { | 1682 static inline uint32_t SmearBitsRight(uint32_t v) { |
1691 v |= v >> 1; | 1683 v |= v >> 1; |
1692 v |= v >> 2; | 1684 v |= v >> 2; |
1693 v |= v >> 4; | 1685 v |= v >> 4; |
1694 v |= v >> 8; | 1686 v |= v >> 8; |
1695 v |= v >> 16; | 1687 v |= v >> 16; |
1696 return v; | 1688 return v; |
1697 } | 1689 } |
1698 | 1690 |
1699 | 1691 |
1700 bool QuickCheckDetails::Rationalize(bool asc) { | 1692 bool QuickCheckDetails::Rationalize(bool asc) { |
1701 bool found_useful_op = false; | 1693 bool found_useful_op = false; |
1702 uint32_t char_mask; | 1694 uint32_t char_mask; |
1703 if (asc) { | 1695 if (asc) { |
1704 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1696 char_mask = String::kMaxOneByteCharCode; |
1705 } else { | 1697 } else { |
1706 char_mask = Utf16::kMaxCodeUnit; | 1698 char_mask = String::kMaxUtf16CodeUnit; |
1707 } | 1699 } |
1708 mask_ = 0; | 1700 mask_ = 0; |
1709 value_ = 0; | 1701 value_ = 0; |
1710 intptr_t char_shift = 0; | 1702 int char_shift = 0; |
1711 for (intptr_t i = 0; i < characters_; i++) { | 1703 for (int i = 0; i < characters_; i++) { |
1712 Position* pos = &positions_[i]; | 1704 Position* pos = &positions_[i]; |
1713 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { | 1705 if ((pos->mask & String::kMaxOneByteCharCode) != 0) { |
1714 found_useful_op = true; | 1706 found_useful_op = true; |
1715 } | 1707 } |
1716 mask_ |= (pos->mask & char_mask) << char_shift; | 1708 mask_ |= (pos->mask & char_mask) << char_shift; |
1717 value_ |= (pos->value & char_mask) << char_shift; | 1709 value_ |= (pos->value & char_mask) << char_shift; |
1718 char_shift += asc ? 8 : 16; | 1710 char_shift += asc ? 8 : 16; |
1719 } | 1711 } |
1720 return found_useful_op; | 1712 return found_useful_op; |
1721 } | 1713 } |
1722 | 1714 |
1723 | 1715 |
1724 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1716 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
1725 Trace* bounds_check_trace, | 1717 Trace* bounds_check_trace, |
1726 Trace* trace, | 1718 Trace* trace, |
1727 bool preload_has_checked_bounds, | 1719 bool preload_has_checked_bounds, |
1728 BlockLabel* on_possible_success, | 1720 Label* on_possible_success, |
1729 QuickCheckDetails* details, | 1721 QuickCheckDetails* details, |
1730 bool fall_through_on_failure) { | 1722 bool fall_through_on_failure) { |
1731 if (details->characters() == 0) return false; | 1723 if (details->characters() == 0) return false; |
1732 GetQuickCheckDetails( | 1724 GetQuickCheckDetails( |
1733 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); | 1725 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); |
1734 if (details->cannot_match()) return false; | 1726 if (details->cannot_match()) return false; |
1735 if (!details->Rationalize(compiler->one_byte())) return false; | 1727 if (!details->Rationalize(compiler->one_byte())) return false; |
1736 ASSERT(details->characters() == 1 || | 1728 DCHECK(details->characters() == 1 || |
1737 compiler->macro_assembler()->CanReadUnaligned()); | 1729 compiler->macro_assembler()->CanReadUnaligned()); |
1738 uint32_t mask = details->mask(); | 1730 uint32_t mask = details->mask(); |
1739 uint32_t value = details->value(); | 1731 uint32_t value = details->value(); |
1740 | 1732 |
1741 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1733 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1742 | 1734 |
1743 if (trace->characters_preloaded() != details->characters()) { | 1735 if (trace->characters_preloaded() != details->characters()) { |
1744 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1736 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset()); |
1745 // We are attempting to preload the minimum number of characters | 1737 // We are attempting to preload the minimum number of characters |
1746 // any choice would eat, so if the bounds check fails, then none of the | 1738 // any choice would eat, so if the bounds check fails, then none of the |
1747 // choices can succeed, so we can just immediately backtrack, rather | 1739 // choices can succeed, so we can just immediately backtrack, rather |
1748 // than go to the next choice. | 1740 // than go to the next choice. |
1749 assembler->LoadCurrentCharacter(trace->cp_offset(), | 1741 assembler->LoadCurrentCharacter(trace->cp_offset(), |
1750 bounds_check_trace->backtrack(), | 1742 bounds_check_trace->backtrack(), |
1751 !preload_has_checked_bounds, | 1743 !preload_has_checked_bounds, |
1752 details->characters()); | 1744 details->characters()); |
1753 } | 1745 } |
1754 | 1746 |
1755 | 1747 |
1756 bool need_mask = true; | 1748 bool need_mask = true; |
1757 | 1749 |
1758 if (details->characters() == 1) { | 1750 if (details->characters() == 1) { |
1759 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1751 // If number of characters preloaded is 1 then we used a byte or 16 bit |
1760 // load so the value is already masked down. | 1752 // load so the value is already masked down. |
1761 uint32_t char_mask; | 1753 uint32_t char_mask; |
1762 if (compiler->one_byte()) { | 1754 if (compiler->one_byte()) { |
1763 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1755 char_mask = String::kMaxOneByteCharCode; |
1764 } else { | 1756 } else { |
1765 char_mask = Utf16::kMaxCodeUnit; | 1757 char_mask = String::kMaxUtf16CodeUnit; |
1766 } | 1758 } |
1767 if ((mask & char_mask) == char_mask) need_mask = false; | 1759 if ((mask & char_mask) == char_mask) need_mask = false; |
1768 mask &= char_mask; | 1760 mask &= char_mask; |
1769 } else { | 1761 } else { |
1770 // For 2-character preloads in one-byte mode or 1-character preloads in | 1762 // For 2-character preloads in one-byte mode or 1-character preloads in |
1771 // two-byte mode we also use a 16 bit load with zero extend. | 1763 // two-byte mode we also use a 16 bit load with zero extend. |
1772 if (details->characters() == 2 && compiler->one_byte()) { | 1764 if (details->characters() == 2 && compiler->one_byte()) { |
1773 if ((mask & 0xffff) == 0xffff) need_mask = false; | 1765 if ((mask & 0xffff) == 0xffff) need_mask = false; |
1774 } else if (details->characters() == 1 && !compiler->one_byte()) { | 1766 } else if (details->characters() == 1 && !compiler->one_byte()) { |
1775 if ((mask & 0xffff) == 0xffff) need_mask = false; | 1767 if ((mask & 0xffff) == 0xffff) need_mask = false; |
(...skipping 22 matching lines...) Expand all Loading... |
1798 // Here is the meat of GetQuickCheckDetails (see also the comment on the | 1790 // Here is the meat of GetQuickCheckDetails (see also the comment on the |
1799 // super-class in the .h file). | 1791 // super-class in the .h file). |
1800 // | 1792 // |
1801 // We iterate along the text object, building up for each character a | 1793 // We iterate along the text object, building up for each character a |
1802 // mask and value that can be used to test for a quick failure to match. | 1794 // mask and value that can be used to test for a quick failure to match. |
1803 // The masks and values for the positions will be combined into a single | 1795 // The masks and values for the positions will be combined into a single |
1804 // machine word for the current character width in order to be used in | 1796 // machine word for the current character width in order to be used in |
1805 // generating a quick check. | 1797 // generating a quick check. |
1806 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1798 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
1807 RegExpCompiler* compiler, | 1799 RegExpCompiler* compiler, |
1808 intptr_t characters_filled_in, | 1800 int characters_filled_in, |
1809 bool not_at_start) { | 1801 bool not_at_start) { |
1810 #if defined(__GNUC__) | 1802 Isolate* isolate = compiler->macro_assembler()->zone()->isolate(); |
1811 // TODO(zerny): Make the combination code byte-order independent. | 1803 DCHECK(characters_filled_in < details->characters()); |
1812 ASSERT(details->characters() == 1 || | 1804 int characters = details->characters(); |
1813 (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)); | 1805 int char_mask; |
1814 #endif | |
1815 ASSERT(characters_filled_in < details->characters()); | |
1816 intptr_t characters = details->characters(); | |
1817 intptr_t char_mask; | |
1818 if (compiler->one_byte()) { | 1806 if (compiler->one_byte()) { |
1819 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1807 char_mask = String::kMaxOneByteCharCode; |
1820 } else { | 1808 } else { |
1821 char_mask = Utf16::kMaxCodeUnit; | 1809 char_mask = String::kMaxUtf16CodeUnit; |
1822 } | 1810 } |
1823 for (intptr_t k = 0; k < elms_->length(); k++) { | 1811 for (int k = 0; k < elms_->length(); k++) { |
1824 TextElement elm = elms_->At(k); | 1812 TextElement elm = elms_->at(k); |
1825 if (elm.text_type() == TextElement::ATOM) { | 1813 if (elm.text_type() == TextElement::ATOM) { |
1826 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); | 1814 Vector<const uc16> quarks = elm.atom()->data(); |
1827 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) { | 1815 for (int i = 0; i < characters && i < quarks.length(); i++) { |
1828 QuickCheckDetails::Position* pos = | 1816 QuickCheckDetails::Position* pos = |
1829 details->positions(characters_filled_in); | 1817 details->positions(characters_filled_in); |
1830 uint16_t c = quarks->At(i); | 1818 uc16 c = quarks[i]; |
1831 if (c > char_mask) { | 1819 if (c > char_mask) { |
1832 // If we expect a non-Latin1 character from an one-byte string, | 1820 // If we expect a non-Latin1 character from an one-byte string, |
1833 // there is no way we can match. Not even case independent | 1821 // there is no way we can match. Not even case-independent |
1834 // matching can turn an Latin1 character into non-Latin1 or | 1822 // matching can turn an Latin1 character into non-Latin1 or |
1835 // vice versa. | 1823 // vice versa. |
1836 // TODO(dcarney): issue 3550. Verify that this works as expected. | 1824 // TODO(dcarney): issue 3550. Verify that this works as expected. |
1837 // For example, \u0178 is uppercase of \u00ff (y-umlaut). | 1825 // For example, \u0178 is uppercase of \u00ff (y-umlaut). |
1838 details->set_cannot_match(); | 1826 details->set_cannot_match(); |
1839 pos->determines_perfectly = false; | 1827 pos->determines_perfectly = false; |
1840 return; | 1828 return; |
1841 } | 1829 } |
1842 if (compiler->ignore_case()) { | 1830 if (compiler->ignore_case()) { |
1843 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1831 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1844 intptr_t length = | 1832 int length = GetCaseIndependentLetters(isolate, c, |
1845 GetCaseIndependentLetters(c, compiler->one_byte(), chars); | 1833 compiler->one_byte(), chars); |
1846 ASSERT(length != 0); // Can only happen if c > char_mask (see above). | 1834 DCHECK(length != 0); // Can only happen if c > char_mask (see above). |
1847 if (length == 1) { | 1835 if (length == 1) { |
1848 // This letter has no case equivalents, so it's nice and simple | 1836 // This letter has no case equivalents, so it's nice and simple |
1849 // and the mask-compare will determine definitely whether we have | 1837 // and the mask-compare will determine definitely whether we have |
1850 // a match at this character position. | 1838 // a match at this character position. |
1851 pos->mask = char_mask; | 1839 pos->mask = char_mask; |
1852 pos->value = c; | 1840 pos->value = c; |
1853 pos->determines_perfectly = true; | 1841 pos->determines_perfectly = true; |
1854 } else { | 1842 } else { |
1855 uint32_t common_bits = char_mask; | 1843 uint32_t common_bits = char_mask; |
1856 uint32_t bits = chars[0]; | 1844 uint32_t bits = chars[0]; |
1857 for (intptr_t j = 1; j < length; j++) { | 1845 for (int j = 1; j < length; j++) { |
1858 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); | 1846 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); |
1859 common_bits ^= differing_bits; | 1847 common_bits ^= differing_bits; |
1860 bits &= common_bits; | 1848 bits &= common_bits; |
1861 } | 1849 } |
1862 // If length is 2 and common bits has only one zero in it then | 1850 // If length is 2 and common bits has only one zero in it then |
1863 // our mask and compare instruction will determine definitely | 1851 // our mask and compare instruction will determine definitely |
1864 // whether we have a match at this character position. Otherwise | 1852 // whether we have a match at this character position. Otherwise |
1865 // it can only be an approximate check. | 1853 // it can only be an approximate check. |
1866 uint32_t one_zero = (common_bits | ~char_mask); | 1854 uint32_t one_zero = (common_bits | ~char_mask); |
1867 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { | 1855 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { |
1868 pos->determines_perfectly = true; | 1856 pos->determines_perfectly = true; |
1869 } | 1857 } |
1870 pos->mask = common_bits; | 1858 pos->mask = common_bits; |
1871 pos->value = bits; | 1859 pos->value = bits; |
1872 } | 1860 } |
1873 } else { | 1861 } else { |
1874 // Don't ignore case. Nice simple case where the mask-compare will | 1862 // Don't ignore case. Nice simple case where the mask-compare will |
1875 // determine definitely whether we have a match at this character | 1863 // determine definitely whether we have a match at this character |
1876 // position. | 1864 // position. |
1877 pos->mask = char_mask; | 1865 pos->mask = char_mask; |
1878 pos->value = c; | 1866 pos->value = c; |
1879 pos->determines_perfectly = true; | 1867 pos->determines_perfectly = true; |
1880 } | 1868 } |
1881 characters_filled_in++; | 1869 characters_filled_in++; |
1882 ASSERT(characters_filled_in <= details->characters()); | 1870 DCHECK(characters_filled_in <= details->characters()); |
1883 if (characters_filled_in == details->characters()) { | 1871 if (characters_filled_in == details->characters()) { |
1884 return; | 1872 return; |
1885 } | 1873 } |
1886 } | 1874 } |
1887 } else { | 1875 } else { |
1888 QuickCheckDetails::Position* pos = | 1876 QuickCheckDetails::Position* pos = |
1889 details->positions(characters_filled_in); | 1877 details->positions(characters_filled_in); |
1890 RegExpCharacterClass* tree = elm.char_class(); | 1878 RegExpCharacterClass* tree = elm.char_class(); |
1891 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges(); | 1879 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); |
1892 if (tree->is_negated()) { | 1880 if (tree->is_negated()) { |
1893 // A quick check uses multi-character mask and compare. There is no | 1881 // A quick check uses multi-character mask and compare. There is no |
1894 // useful way to incorporate a negative char class into this scheme | 1882 // useful way to incorporate a negative char class into this scheme |
1895 // so we just conservatively create a mask and value that will always | 1883 // so we just conservatively create a mask and value that will always |
1896 // succeed. | 1884 // succeed. |
1897 pos->mask = 0; | 1885 pos->mask = 0; |
1898 pos->value = 0; | 1886 pos->value = 0; |
1899 } else { | 1887 } else { |
1900 intptr_t first_range = 0; | 1888 int first_range = 0; |
1901 while (ranges->At(first_range).from() > char_mask) { | 1889 while (ranges->at(first_range).from() > char_mask) { |
1902 first_range++; | 1890 first_range++; |
1903 if (first_range == ranges->length()) { | 1891 if (first_range == ranges->length()) { |
1904 details->set_cannot_match(); | 1892 details->set_cannot_match(); |
1905 pos->determines_perfectly = false; | 1893 pos->determines_perfectly = false; |
1906 return; | 1894 return; |
1907 } | 1895 } |
1908 } | 1896 } |
1909 CharacterRange range = ranges->At(first_range); | 1897 CharacterRange range = ranges->at(first_range); |
1910 uint16_t from = range.from(); | 1898 uc16 from = range.from(); |
1911 uint16_t to = range.to(); | 1899 uc16 to = range.to(); |
1912 if (to > char_mask) { | 1900 if (to > char_mask) { |
1913 to = char_mask; | 1901 to = char_mask; |
1914 } | 1902 } |
1915 uint32_t differing_bits = (from ^ to); | 1903 uint32_t differing_bits = (from ^ to); |
1916 // A mask and compare is only perfect if the differing bits form a | 1904 // A mask and compare is only perfect if the differing bits form a |
1917 // number like 00011111 with one single block of trailing 1s. | 1905 // number like 00011111 with one single block of trailing 1s. |
1918 if ((differing_bits & (differing_bits + 1)) == 0 && | 1906 if ((differing_bits & (differing_bits + 1)) == 0 && |
1919 from + differing_bits == to) { | 1907 from + differing_bits == to) { |
1920 pos->determines_perfectly = true; | 1908 pos->determines_perfectly = true; |
1921 } | 1909 } |
1922 uint32_t common_bits = ~SmearBitsRight(differing_bits); | 1910 uint32_t common_bits = ~SmearBitsRight(differing_bits); |
1923 uint32_t bits = (from & common_bits); | 1911 uint32_t bits = (from & common_bits); |
1924 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { | 1912 for (int i = first_range + 1; i < ranges->length(); i++) { |
1925 CharacterRange range = ranges->At(i); | 1913 CharacterRange range = ranges->at(i); |
1926 uint16_t from = range.from(); | 1914 uc16 from = range.from(); |
1927 uint16_t to = range.to(); | 1915 uc16 to = range.to(); |
1928 if (from > char_mask) continue; | 1916 if (from > char_mask) continue; |
1929 if (to > char_mask) to = char_mask; | 1917 if (to > char_mask) to = char_mask; |
1930 // Here we are combining more ranges into the mask and compare | 1918 // Here we are combining more ranges into the mask and compare |
1931 // value. With each new range the mask becomes more sparse and | 1919 // value. With each new range the mask becomes more sparse and |
1932 // so the chances of a false positive rise. A character class | 1920 // so the chances of a false positive rise. A character class |
1933 // with multiple ranges is assumed never to be equivalent to a | 1921 // with multiple ranges is assumed never to be equivalent to a |
1934 // mask and compare operation. | 1922 // mask and compare operation. |
1935 pos->determines_perfectly = false; | 1923 pos->determines_perfectly = false; |
1936 uint32_t new_common_bits = (from ^ to); | 1924 uint32_t new_common_bits = (from ^ to); |
1937 new_common_bits = ~SmearBitsRight(new_common_bits); | 1925 new_common_bits = ~SmearBitsRight(new_common_bits); |
1938 common_bits &= new_common_bits; | 1926 common_bits &= new_common_bits; |
1939 bits &= new_common_bits; | 1927 bits &= new_common_bits; |
1940 uint32_t differing_bits = (from & common_bits) ^ bits; | 1928 uint32_t differing_bits = (from & common_bits) ^ bits; |
1941 common_bits ^= differing_bits; | 1929 common_bits ^= differing_bits; |
1942 bits &= common_bits; | 1930 bits &= common_bits; |
1943 } | 1931 } |
1944 pos->mask = common_bits; | 1932 pos->mask = common_bits; |
1945 pos->value = bits; | 1933 pos->value = bits; |
1946 } | 1934 } |
1947 characters_filled_in++; | 1935 characters_filled_in++; |
1948 ASSERT(characters_filled_in <= details->characters()); | 1936 DCHECK(characters_filled_in <= details->characters()); |
1949 if (characters_filled_in == details->characters()) { | 1937 if (characters_filled_in == details->characters()) { |
1950 return; | 1938 return; |
1951 } | 1939 } |
1952 } | 1940 } |
1953 } | 1941 } |
1954 ASSERT(characters_filled_in != details->characters()); | 1942 DCHECK(characters_filled_in != details->characters()); |
1955 if (!details->cannot_match()) { | 1943 if (!details->cannot_match()) { |
1956 on_success()-> GetQuickCheckDetails(details, | 1944 on_success()-> GetQuickCheckDetails(details, |
1957 compiler, | 1945 compiler, |
1958 characters_filled_in, | 1946 characters_filled_in, |
1959 true); | 1947 true); |
1960 } | 1948 } |
1961 } | 1949 } |
1962 | 1950 |
1963 | 1951 |
1964 void QuickCheckDetails::Clear() { | 1952 void QuickCheckDetails::Clear() { |
1965 for (int i = 0; i < characters_; i++) { | 1953 for (int i = 0; i < characters_; i++) { |
1966 positions_[i].mask = 0; | 1954 positions_[i].mask = 0; |
1967 positions_[i].value = 0; | 1955 positions_[i].value = 0; |
1968 positions_[i].determines_perfectly = false; | 1956 positions_[i].determines_perfectly = false; |
1969 } | 1957 } |
1970 characters_ = 0; | 1958 characters_ = 0; |
1971 } | 1959 } |
1972 | 1960 |
1973 | 1961 |
1974 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { | 1962 void QuickCheckDetails::Advance(int by, bool one_byte) { |
1975 ASSERT(by >= 0); | 1963 DCHECK(by >= 0); |
1976 if (by >= characters_) { | 1964 if (by >= characters_) { |
1977 Clear(); | 1965 Clear(); |
1978 return; | 1966 return; |
1979 } | 1967 } |
1980 for (intptr_t i = 0; i < characters_ - by; i++) { | 1968 for (int i = 0; i < characters_ - by; i++) { |
1981 positions_[i] = positions_[by + i]; | 1969 positions_[i] = positions_[by + i]; |
1982 } | 1970 } |
1983 for (intptr_t i = characters_ - by; i < characters_; i++) { | 1971 for (int i = characters_ - by; i < characters_; i++) { |
1984 positions_[i].mask = 0; | 1972 positions_[i].mask = 0; |
1985 positions_[i].value = 0; | 1973 positions_[i].value = 0; |
1986 positions_[i].determines_perfectly = false; | 1974 positions_[i].determines_perfectly = false; |
1987 } | 1975 } |
1988 characters_ -= by; | 1976 characters_ -= by; |
1989 // We could change mask_ and value_ here but we would never advance unless | 1977 // We could change mask_ and value_ here but we would never advance unless |
1990 // they had already been used in a check and they won't be used again because | 1978 // they had already been used in a check and they won't be used again because |
1991 // it would gain us nothing. So there's no point. | 1979 // it would gain us nothing. So there's no point. |
1992 } | 1980 } |
1993 | 1981 |
1994 | 1982 |
1995 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { | 1983 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { |
1996 ASSERT(characters_ == other->characters_); | 1984 DCHECK(characters_ == other->characters_); |
1997 if (other->cannot_match_) { | 1985 if (other->cannot_match_) { |
1998 return; | 1986 return; |
1999 } | 1987 } |
2000 if (cannot_match_) { | 1988 if (cannot_match_) { |
2001 *this = *other; | 1989 *this = *other; |
2002 return; | 1990 return; |
2003 } | 1991 } |
2004 for (intptr_t i = from_index; i < characters_; i++) { | 1992 for (int i = from_index; i < characters_; i++) { |
2005 QuickCheckDetails::Position* pos = positions(i); | 1993 QuickCheckDetails::Position* pos = positions(i); |
2006 QuickCheckDetails::Position* other_pos = other->positions(i); | 1994 QuickCheckDetails::Position* other_pos = other->positions(i); |
2007 if (pos->mask != other_pos->mask || | 1995 if (pos->mask != other_pos->mask || |
2008 pos->value != other_pos->value || | 1996 pos->value != other_pos->value || |
2009 !other_pos->determines_perfectly) { | 1997 !other_pos->determines_perfectly) { |
2010 // Our mask-compare operation will be approximate unless we have the | 1998 // Our mask-compare operation will be approximate unless we have the |
2011 // exact same operation on both sides of the alternation. | 1999 // exact same operation on both sides of the alternation. |
2012 pos->determines_perfectly = false; | 2000 pos->determines_perfectly = false; |
2013 } | 2001 } |
2014 pos->mask &= other_pos->mask; | 2002 pos->mask &= other_pos->mask; |
2015 pos->value &= pos->mask; | 2003 pos->value &= pos->mask; |
2016 other_pos->value &= pos->mask; | 2004 other_pos->value &= pos->mask; |
2017 uint16_t differing_bits = (pos->value ^ other_pos->value); | 2005 uc16 differing_bits = (pos->value ^ other_pos->value); |
2018 pos->mask &= ~differing_bits; | 2006 pos->mask &= ~differing_bits; |
2019 pos->value &= pos->mask; | 2007 pos->value &= pos->mask; |
2020 } | 2008 } |
2021 } | 2009 } |
2022 | 2010 |
2023 | 2011 |
2024 class VisitMarker : public ValueObject { | 2012 class VisitMarker { |
2025 public: | 2013 public: |
2026 explicit VisitMarker(NodeInfo* info) : info_(info) { | 2014 explicit VisitMarker(NodeInfo* info) : info_(info) { |
2027 ASSERT(!info->visited); | 2015 DCHECK(!info->visited); |
2028 info->visited = true; | 2016 info->visited = true; |
2029 } | 2017 } |
2030 ~VisitMarker() { | 2018 ~VisitMarker() { |
2031 info_->visited = false; | 2019 info_->visited = false; |
2032 } | 2020 } |
2033 private: | 2021 private: |
2034 NodeInfo* info_; | 2022 NodeInfo* info_; |
2035 }; | 2023 }; |
2036 | 2024 |
2037 | 2025 |
2038 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 2026 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { |
2039 if (info()->replacement_calculated) return replacement(); | 2027 if (info()->replacement_calculated) return replacement(); |
2040 if (depth < 0) return this; | 2028 if (depth < 0) return this; |
2041 ASSERT(!info()->visited); | 2029 DCHECK(!info()->visited); |
2042 VisitMarker marker(info()); | 2030 VisitMarker marker(info()); |
2043 return FilterSuccessor(depth - 1, ignore_case); | 2031 return FilterSuccessor(depth - 1, ignore_case); |
2044 } | 2032 } |
2045 | 2033 |
2046 | 2034 |
2047 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { | 2035 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
2048 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 2036 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
2049 if (next == NULL) return set_replacement(NULL); | 2037 if (next == NULL) return set_replacement(NULL); |
2050 on_success_ = next; | 2038 on_success_ = next; |
2051 return set_replacement(this); | 2039 return set_replacement(this); |
2052 } | 2040 } |
2053 | 2041 |
2054 | 2042 |
2055 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2043 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
2056 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2044 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
2057 // TODO(dcarney): this could be a lot more efficient. | 2045 // TODO(dcarney): this could be a lot more efficient. |
2058 return range.Contains(0x39c) || | 2046 return range.Contains(0x39c) || |
2059 range.Contains(0x3bc) || range.Contains(0x178); | 2047 range.Contains(0x3bc) || range.Contains(0x178); |
2060 } | 2048 } |
2061 | 2049 |
2062 | 2050 |
2063 static bool RangesContainLatin1Equivalents( | 2051 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
2064 ZoneGrowableArray<CharacterRange>* ranges) { | 2052 for (int i = 0; i < ranges->length(); i++) { |
2065 for (intptr_t i = 0; i < ranges->length(); i++) { | |
2066 // TODO(dcarney): this could be a lot more efficient. | 2053 // TODO(dcarney): this could be a lot more efficient. |
2067 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; | 2054 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; |
2068 } | 2055 } |
2069 return false; | 2056 return false; |
2070 } | 2057 } |
2071 | 2058 |
2072 | 2059 |
2073 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { | 2060 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
2074 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); | |
2075 switch (c) { | |
2076 // This are equivalent characters in unicode. | |
2077 case 0x39c: | |
2078 case 0x3bc: | |
2079 return 0xb5; | |
2080 // This is an uppercase of a Latin-1 character | |
2081 // outside of Latin-1. | |
2082 case 0x178: | |
2083 return 0xff; | |
2084 } | |
2085 return 0; | |
2086 } | |
2087 | |
2088 | |
2089 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { | |
2090 if (info()->replacement_calculated) return replacement(); | 2061 if (info()->replacement_calculated) return replacement(); |
2091 if (depth < 0) return this; | 2062 if (depth < 0) return this; |
2092 ASSERT(!info()->visited); | 2063 DCHECK(!info()->visited); |
2093 VisitMarker marker(info()); | 2064 VisitMarker marker(info()); |
2094 intptr_t element_count = elms_->length(); | 2065 int element_count = elms_->length(); |
2095 for (intptr_t i = 0; i < element_count; i++) { | 2066 for (int i = 0; i < element_count; i++) { |
2096 TextElement elm = elms_->At(i); | 2067 TextElement elm = elms_->at(i); |
2097 if (elm.text_type() == TextElement::ATOM) { | 2068 if (elm.text_type() == TextElement::ATOM) { |
2098 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); | 2069 Vector<const uc16> quarks = elm.atom()->data(); |
2099 for (intptr_t j = 0; j < quarks->length(); j++) { | 2070 for (int j = 0; j < quarks.length(); j++) { |
2100 uint16_t c = quarks->At(j); | 2071 uint16_t c = quarks[j]; |
2101 if (c <= Symbols::kMaxOneCharCodeSymbol) continue; | 2072 if (c <= String::kMaxOneByteCharCode) continue; |
2102 if (!ignore_case) return set_replacement(NULL); | 2073 if (!ignore_case) return set_replacement(NULL); |
2103 // Here, we need to check for characters whose upper and lower cases | 2074 // Here, we need to check for characters whose upper and lower cases |
2104 // are outside the Latin-1 range. | 2075 // are outside the Latin-1 range. |
2105 uint16_t converted = ConvertNonLatin1ToLatin1(c); | 2076 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
2106 // Character is outside Latin-1 completely | 2077 // Character is outside Latin-1 completely |
2107 if (converted == 0) return set_replacement(NULL); | 2078 if (converted == 0) return set_replacement(NULL); |
2108 // Convert quark to Latin-1 in place. | 2079 // Convert quark to Latin-1 in place. |
2109 (*quarks)[0] = converted; | 2080 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| 2081 copy[j] = converted; |
2110 } | 2082 } |
2111 } else { | 2083 } else { |
2112 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2084 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); |
2113 RegExpCharacterClass* cc = elm.char_class(); | 2085 RegExpCharacterClass* cc = elm.char_class(); |
2114 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2086 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2115 if (!CharacterRange::IsCanonical(ranges)) { | 2087 if (!CharacterRange::IsCanonical(ranges)) { |
2116 CharacterRange::Canonicalize(ranges); | 2088 CharacterRange::Canonicalize(ranges); |
2117 } | 2089 } |
2118 // Now they are in order so we only need to look at the first. | 2090 // Now they are in order so we only need to look at the first. |
2119 intptr_t range_count = ranges->length(); | 2091 int range_count = ranges->length(); |
2120 if (cc->is_negated()) { | 2092 if (cc->is_negated()) { |
2121 if (range_count != 0 && | 2093 if (range_count != 0 && |
2122 ranges->At(0).from() == 0 && | 2094 ranges->at(0).from() == 0 && |
2123 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { | 2095 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
2124 // This will be handled in a later filter. | 2096 // This will be handled in a later filter. |
2125 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2097 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2126 return set_replacement(NULL); | 2098 return set_replacement(NULL); |
2127 } | 2099 } |
2128 } else { | 2100 } else { |
2129 if (range_count == 0 || | 2101 if (range_count == 0 || |
2130 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { | 2102 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
2131 // This will be handled in a later filter. | 2103 // This will be handled in a later filter. |
2132 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2104 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2133 return set_replacement(NULL); | 2105 return set_replacement(NULL); |
2134 } | 2106 } |
2135 } | 2107 } |
2136 } | 2108 } |
2137 } | 2109 } |
2138 return FilterSuccessor(depth - 1, ignore_case); | 2110 return FilterSuccessor(depth - 1, ignore_case); |
2139 } | 2111 } |
2140 | 2112 |
2141 | 2113 |
2142 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 2114 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
2143 if (info()->replacement_calculated) return replacement(); | 2115 if (info()->replacement_calculated) return replacement(); |
2144 if (depth < 0) return this; | 2116 if (depth < 0) return this; |
2145 if (info()->visited) return this; | 2117 if (info()->visited) return this; |
2146 { | 2118 { |
2147 VisitMarker marker(info()); | 2119 VisitMarker marker(info()); |
2148 | 2120 |
2149 RegExpNode* continue_replacement = | 2121 RegExpNode* continue_replacement = |
2150 continue_node_->FilterOneByte(depth - 1, ignore_case); | 2122 continue_node_->FilterOneByte(depth - 1, ignore_case); |
2151 // If we can't continue after the loop then there is no sense in doing the | 2123 // If we can't continue after the loop then there is no sense in doing the |
2152 // loop. | 2124 // loop. |
2153 if (continue_replacement == NULL) return set_replacement(NULL); | 2125 if (continue_replacement == NULL) return set_replacement(NULL); |
2154 } | 2126 } |
2155 | 2127 |
2156 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); | 2128 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
2157 } | 2129 } |
2158 | 2130 |
2159 | 2131 |
2160 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 2132 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
2161 if (info()->replacement_calculated) return replacement(); | 2133 if (info()->replacement_calculated) return replacement(); |
2162 if (depth < 0) return this; | 2134 if (depth < 0) return this; |
2163 if (info()->visited) return this; | 2135 if (info()->visited) return this; |
2164 VisitMarker marker(info()); | 2136 VisitMarker marker(info()); |
2165 intptr_t choice_count = alternatives_->length(); | 2137 int choice_count = alternatives_->length(); |
2166 | 2138 |
2167 for (intptr_t i = 0; i < choice_count; i++) { | 2139 for (int i = 0; i < choice_count; i++) { |
2168 GuardedAlternative alternative = alternatives_->At(i); | 2140 GuardedAlternative alternative = alternatives_->at(i); |
2169 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2141 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
2170 set_replacement(this); | 2142 set_replacement(this); |
2171 return this; | 2143 return this; |
2172 } | 2144 } |
2173 } | 2145 } |
2174 | 2146 |
2175 intptr_t surviving = 0; | 2147 int surviving = 0; |
2176 RegExpNode* survivor = NULL; | 2148 RegExpNode* survivor = NULL; |
2177 for (intptr_t i = 0; i < choice_count; i++) { | 2149 for (int i = 0; i < choice_count; i++) { |
2178 GuardedAlternative alternative = alternatives_->At(i); | 2150 GuardedAlternative alternative = alternatives_->at(i); |
2179 RegExpNode* replacement = | 2151 RegExpNode* replacement = |
2180 alternative.node()->FilterOneByte(depth - 1, ignore_case); | 2152 alternative.node()->FilterOneByte(depth - 1, ignore_case); |
2181 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. | 2153 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. |
2182 if (replacement != NULL) { | 2154 if (replacement != NULL) { |
2183 (*alternatives_)[i].set_node(replacement); | 2155 alternatives_->at(i).set_node(replacement); |
2184 surviving++; | 2156 surviving++; |
2185 survivor = replacement; | 2157 survivor = replacement; |
2186 } | 2158 } |
2187 } | 2159 } |
2188 if (surviving < 2) return set_replacement(survivor); | 2160 if (surviving < 2) return set_replacement(survivor); |
2189 | 2161 |
2190 set_replacement(this); | 2162 set_replacement(this); |
2191 if (surviving == choice_count) { | 2163 if (surviving == choice_count) { |
2192 return this; | 2164 return this; |
2193 } | 2165 } |
2194 // Only some of the nodes survived the filtering. We need to rebuild the | 2166 // Only some of the nodes survived the filtering. We need to rebuild the |
2195 // alternatives list. | 2167 // alternatives list. |
2196 ZoneGrowableArray<GuardedAlternative>* new_alternatives = | 2168 ZoneList<GuardedAlternative>* new_alternatives = |
2197 new(I) ZoneGrowableArray<GuardedAlternative>(surviving); | 2169 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
2198 for (intptr_t i = 0; i < choice_count; i++) { | 2170 for (int i = 0; i < choice_count; i++) { |
2199 RegExpNode* replacement = | 2171 RegExpNode* replacement = |
2200 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); | 2172 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); |
2201 if (replacement != NULL) { | 2173 if (replacement != NULL) { |
2202 (*alternatives_)[i].set_node(replacement); | 2174 alternatives_->at(i).set_node(replacement); |
2203 new_alternatives->Add((*alternatives_)[i]); | 2175 new_alternatives->Add(alternatives_->at(i), zone()); |
2204 } | 2176 } |
2205 } | 2177 } |
2206 alternatives_ = new_alternatives; | 2178 alternatives_ = new_alternatives; |
2207 return this; | 2179 return this; |
2208 } | 2180 } |
2209 | 2181 |
2210 | 2182 |
2211 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, | 2183 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth, |
2212 bool ignore_case) { | 2184 bool ignore_case) { |
2213 if (info()->replacement_calculated) return replacement(); | 2185 if (info()->replacement_calculated) return replacement(); |
2214 if (depth < 0) return this; | 2186 if (depth < 0) return this; |
2215 if (info()->visited) return this; | 2187 if (info()->visited) return this; |
2216 VisitMarker marker(info()); | 2188 VisitMarker marker(info()); |
2217 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2189 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
2218 // afterwards. | 2190 // afterwards. |
2219 RegExpNode* node = (*alternatives_)[1].node(); | 2191 RegExpNode* node = alternatives_->at(1).node(); |
2220 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); | 2192 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
2221 if (replacement == NULL) return set_replacement(NULL); | 2193 if (replacement == NULL) return set_replacement(NULL); |
2222 (*alternatives_)[1].set_node(replacement); | 2194 alternatives_->at(1).set_node(replacement); |
2223 | 2195 |
2224 RegExpNode* neg_node = (*alternatives_)[0].node(); | 2196 RegExpNode* neg_node = alternatives_->at(0).node(); |
2225 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); | 2197 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
2226 // If the negative lookahead is always going to fail then | 2198 // If the negative lookahead is always going to fail then |
2227 // we don't need to check it. | 2199 // we don't need to check it. |
2228 if (neg_replacement == NULL) return set_replacement(replacement); | 2200 if (neg_replacement == NULL) return set_replacement(replacement); |
2229 (*alternatives_)[0].set_node(neg_replacement); | 2201 alternatives_->at(0).set_node(neg_replacement); |
2230 return set_replacement(this); | 2202 return set_replacement(this); |
2231 } | 2203 } |
2232 | 2204 |
2233 | 2205 |
2234 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2206 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2235 RegExpCompiler* compiler, | 2207 RegExpCompiler* compiler, |
2236 intptr_t characters_filled_in, | 2208 int characters_filled_in, |
2237 bool not_at_start) { | 2209 bool not_at_start) { |
2238 if (body_can_be_zero_length_ || info()->visited) return; | 2210 if (body_can_be_zero_length_ || info()->visited) return; |
2239 VisitMarker marker(info()); | 2211 VisitMarker marker(info()); |
2240 return ChoiceNode::GetQuickCheckDetails(details, | 2212 return ChoiceNode::GetQuickCheckDetails(details, |
2241 compiler, | 2213 compiler, |
2242 characters_filled_in, | 2214 characters_filled_in, |
2243 not_at_start); | 2215 not_at_start); |
2244 } | 2216 } |
2245 | 2217 |
2246 | 2218 |
2247 void LoopChoiceNode::FillInBMInfo(intptr_t offset, | 2219 void LoopChoiceNode::FillInBMInfo(int offset, |
2248 intptr_t budget, | 2220 int budget, |
2249 BoyerMooreLookahead* bm, | 2221 BoyerMooreLookahead* bm, |
2250 bool not_at_start) { | 2222 bool not_at_start) { |
2251 if (body_can_be_zero_length_ || budget <= 0) { | 2223 if (body_can_be_zero_length_ || budget <= 0) { |
2252 bm->SetRest(offset); | 2224 bm->SetRest(offset); |
2253 SaveBMInfo(bm, not_at_start, offset); | 2225 SaveBMInfo(bm, not_at_start, offset); |
2254 return; | 2226 return; |
2255 } | 2227 } |
2256 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2228 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
2257 SaveBMInfo(bm, not_at_start, offset); | 2229 SaveBMInfo(bm, not_at_start, offset); |
2258 } | 2230 } |
2259 | 2231 |
2260 | 2232 |
2261 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2233 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2262 RegExpCompiler* compiler, | 2234 RegExpCompiler* compiler, |
2263 intptr_t characters_filled_in, | 2235 int characters_filled_in, |
2264 bool not_at_start) { | 2236 bool not_at_start) { |
2265 not_at_start = (not_at_start || not_at_start_); | 2237 not_at_start = (not_at_start || not_at_start_); |
2266 intptr_t choice_count = alternatives_->length(); | 2238 int choice_count = alternatives_->length(); |
2267 ASSERT(choice_count > 0); | 2239 DCHECK(choice_count > 0); |
2268 (*alternatives_)[0].node()->GetQuickCheckDetails(details, | 2240 alternatives_->at(0).node()->GetQuickCheckDetails(details, |
2269 compiler, | 2241 compiler, |
2270 characters_filled_in, | 2242 characters_filled_in, |
2271 not_at_start); | 2243 not_at_start); |
2272 for (intptr_t i = 1; i < choice_count; i++) { | 2244 for (int i = 1; i < choice_count; i++) { |
2273 QuickCheckDetails new_details(details->characters()); | 2245 QuickCheckDetails new_details(details->characters()); |
2274 RegExpNode* node = (*alternatives_)[i].node(); | 2246 RegExpNode* node = alternatives_->at(i).node(); |
2275 node->GetQuickCheckDetails(&new_details, compiler, | 2247 node->GetQuickCheckDetails(&new_details, compiler, |
2276 characters_filled_in, | 2248 characters_filled_in, |
2277 not_at_start); | 2249 not_at_start); |
2278 // Here we merge the quick match details of the two branches. | 2250 // Here we merge the quick match details of the two branches. |
2279 details->Merge(&new_details, characters_filled_in); | 2251 details->Merge(&new_details, characters_filled_in); |
2280 } | 2252 } |
2281 } | 2253 } |
2282 | 2254 |
2283 | 2255 |
2284 // Check for [0-9A-Z_a-z]. | 2256 // Check for [0-9A-Z_a-z]. |
2285 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2257 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
2286 BlockLabel* word, | 2258 Label* word, |
2287 BlockLabel* non_word, | 2259 Label* non_word, |
2288 bool fall_through_on_word) { | 2260 bool fall_through_on_word) { |
2289 if (assembler->CheckSpecialCharacterClass( | 2261 if (assembler->CheckSpecialCharacterClass( |
2290 fall_through_on_word ? 'w' : 'W', | 2262 fall_through_on_word ? 'w' : 'W', |
2291 fall_through_on_word ? non_word : word)) { | 2263 fall_through_on_word ? non_word : word)) { |
2292 // Optimized implementation available. | 2264 // Optimized implementation available. |
2293 return; | 2265 return; |
2294 } | 2266 } |
2295 assembler->CheckCharacterGT('z', non_word); | 2267 assembler->CheckCharacterGT('z', non_word); |
2296 assembler->CheckCharacterLT('0', non_word); | 2268 assembler->CheckCharacterLT('0', non_word); |
2297 assembler->CheckCharacterGT('a' - 1, word); | 2269 assembler->CheckCharacterGT('a' - 1, word); |
(...skipping 12 matching lines...) Expand all Loading... |
2310 // that matches newline or the start of input). | 2282 // that matches newline or the start of input). |
2311 static void EmitHat(RegExpCompiler* compiler, | 2283 static void EmitHat(RegExpCompiler* compiler, |
2312 RegExpNode* on_success, | 2284 RegExpNode* on_success, |
2313 Trace* trace) { | 2285 Trace* trace) { |
2314 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2286 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2315 // We will be loading the previous character into the current character | 2287 // We will be loading the previous character into the current character |
2316 // register. | 2288 // register. |
2317 Trace new_trace(*trace); | 2289 Trace new_trace(*trace); |
2318 new_trace.InvalidateCurrentCharacter(); | 2290 new_trace.InvalidateCurrentCharacter(); |
2319 | 2291 |
2320 BlockLabel ok; | 2292 Label ok; |
2321 if (new_trace.cp_offset() == 0) { | 2293 if (new_trace.cp_offset() == 0) { |
2322 // The start of input counts as a newline in this context, so skip to | 2294 // The start of input counts as a newline in this context, so skip to |
2323 // ok if we are at the start. | 2295 // ok if we are at the start. |
2324 assembler->CheckAtStart(&ok); | 2296 assembler->CheckAtStart(&ok); |
2325 } | 2297 } |
2326 // We already checked that we are not at the start of input so it must be | 2298 // We already checked that we are not at the start of input so it must be |
2327 // OK to load the previous character. | 2299 // OK to load the previous character. |
2328 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, | 2300 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, |
2329 new_trace.backtrack(), | 2301 new_trace.backtrack(), |
2330 false); | 2302 false); |
2331 if (!assembler->CheckSpecialCharacterClass('n', | 2303 if (!assembler->CheckSpecialCharacterClass('n', |
2332 new_trace.backtrack())) { | 2304 new_trace.backtrack())) { |
2333 // Newline means \n, \r, 0x2028 or 0x2029. | 2305 // Newline means \n, \r, 0x2028 or 0x2029. |
2334 if (!compiler->one_byte()) { | 2306 if (!compiler->one_byte()) { |
2335 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2307 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
2336 } | 2308 } |
2337 assembler->CheckCharacter('\n', &ok); | 2309 assembler->CheckCharacter('\n', &ok); |
2338 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2310 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
2339 } | 2311 } |
2340 assembler->BindBlock(&ok); | 2312 assembler->Bind(&ok); |
2341 on_success->Emit(compiler, &new_trace); | 2313 on_success->Emit(compiler, &new_trace); |
2342 } | 2314 } |
2343 | 2315 |
2344 | 2316 |
2345 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2317 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
2346 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2318 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
2347 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2319 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2348 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2320 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
2349 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2321 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
2350 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2322 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
2351 if (lookahead == NULL) { | 2323 if (lookahead == NULL) { |
2352 intptr_t eats_at_least = | 2324 int eats_at_least = |
2353 Utils::Minimum(kMaxLookaheadForBoyerMoore, | 2325 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, |
2354 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 2326 kRecursionBudget, |
2355 kRecursionBudget, | 2327 not_at_start)); |
2356 not_at_start)); | |
2357 if (eats_at_least >= 1) { | 2328 if (eats_at_least >= 1) { |
2358 BoyerMooreLookahead* bm = | 2329 BoyerMooreLookahead* bm = |
2359 new(I) BoyerMooreLookahead(eats_at_least, compiler, I); | 2330 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); |
2360 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); | 2331 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); |
2361 if (bm->at(0)->is_non_word()) | 2332 if (bm->at(0)->is_non_word()) |
2362 next_is_word_character = Trace::FALSE_VALUE; | 2333 next_is_word_character = Trace::FALSE_VALUE; |
2363 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 2334 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
2364 } | 2335 } |
2365 } else { | 2336 } else { |
2366 if (lookahead->at(0)->is_non_word()) | 2337 if (lookahead->at(0)->is_non_word()) |
2367 next_is_word_character = Trace::FALSE_VALUE; | 2338 next_is_word_character = Trace::FALSE_VALUE; |
2368 if (lookahead->at(0)->is_word()) | 2339 if (lookahead->at(0)->is_word()) |
2369 next_is_word_character = Trace::TRUE_VALUE; | 2340 next_is_word_character = Trace::TRUE_VALUE; |
2370 } | 2341 } |
2371 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); | 2342 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); |
2372 if (next_is_word_character == Trace::UNKNOWN) { | 2343 if (next_is_word_character == Trace::UNKNOWN) { |
2373 BlockLabel before_non_word; | 2344 Label before_non_word; |
2374 BlockLabel before_word; | 2345 Label before_word; |
2375 if (trace->characters_preloaded() != 1) { | 2346 if (trace->characters_preloaded() != 1) { |
2376 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 2347 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
2377 } | 2348 } |
2378 // Fall through on non-word. | 2349 // Fall through on non-word. |
2379 EmitWordCheck(assembler, &before_word, &before_non_word, false); | 2350 EmitWordCheck(assembler, &before_word, &before_non_word, false); |
2380 // Next character is not a word character. | 2351 // Next character is not a word character. |
2381 assembler->BindBlock(&before_non_word); | 2352 assembler->Bind(&before_non_word); |
2382 BlockLabel ok; | 2353 Label ok; |
2383 // Backtrack on \B (non-boundary check) if previous is a word, | |
2384 // since we know next *is not* a word and this would be a boundary. | |
2385 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2354 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
| 2355 assembler->GoTo(&ok); |
2386 | 2356 |
2387 if (!assembler->IsClosed()) { | 2357 assembler->Bind(&before_word); |
2388 assembler->GoTo(&ok); | |
2389 } | |
2390 | |
2391 assembler->BindBlock(&before_word); | |
2392 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2358 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2393 assembler->BindBlock(&ok); | 2359 assembler->Bind(&ok); |
2394 } else if (next_is_word_character == Trace::TRUE_VALUE) { | 2360 } else if (next_is_word_character == Trace::TRUE_VALUE) { |
2395 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2361 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2396 } else { | 2362 } else { |
2397 ASSERT(next_is_word_character == Trace::FALSE_VALUE); | 2363 DCHECK(next_is_word_character == Trace::FALSE_VALUE); |
2398 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2364 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
2399 } | 2365 } |
2400 } | 2366 } |
2401 | 2367 |
2402 | 2368 |
2403 void AssertionNode::BacktrackIfPrevious( | 2369 void AssertionNode::BacktrackIfPrevious( |
2404 RegExpCompiler* compiler, | 2370 RegExpCompiler* compiler, |
2405 Trace* trace, | 2371 Trace* trace, |
2406 AssertionNode::IfPrevious backtrack_if_previous) { | 2372 AssertionNode::IfPrevious backtrack_if_previous) { |
2407 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2373 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2408 Trace new_trace(*trace); | 2374 Trace new_trace(*trace); |
2409 new_trace.InvalidateCurrentCharacter(); | 2375 new_trace.InvalidateCurrentCharacter(); |
2410 | 2376 |
2411 BlockLabel fall_through, dummy; | 2377 Label fall_through, dummy; |
2412 | 2378 |
2413 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? | 2379 Label* non_word = backtrack_if_previous == kIsNonWord ? |
2414 new_trace.backtrack() : | 2380 new_trace.backtrack() : |
2415 &fall_through; | 2381 &fall_through; |
2416 BlockLabel* word = backtrack_if_previous == kIsNonWord ? | 2382 Label* word = backtrack_if_previous == kIsNonWord ? |
2417 &fall_through : | 2383 &fall_through : |
2418 new_trace.backtrack(); | 2384 new_trace.backtrack(); |
2419 | 2385 |
2420 if (new_trace.cp_offset() == 0) { | 2386 if (new_trace.cp_offset() == 0) { |
2421 // The start of input counts as a non-word character, so the question is | 2387 // The start of input counts as a non-word character, so the question is |
2422 // decided if we are at the start. | 2388 // decided if we are at the start. |
2423 assembler->CheckAtStart(non_word); | 2389 assembler->CheckAtStart(non_word); |
2424 } | 2390 } |
2425 // We already checked that we are not at the start of input so it must be | 2391 // We already checked that we are not at the start of input so it must be |
2426 // OK to load the previous character. | 2392 // OK to load the previous character. |
2427 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2393 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
2428 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2394 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
2429 | 2395 |
2430 assembler->BindBlock(&fall_through); | 2396 assembler->Bind(&fall_through); |
2431 on_success()->Emit(compiler, &new_trace); | 2397 on_success()->Emit(compiler, &new_trace); |
2432 } | 2398 } |
2433 | 2399 |
2434 | 2400 |
2435 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2401 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2436 RegExpCompiler* compiler, | 2402 RegExpCompiler* compiler, |
2437 intptr_t filled_in, | 2403 int filled_in, |
2438 bool not_at_start) { | 2404 bool not_at_start) { |
2439 if (assertion_type_ == AT_START && not_at_start) { | 2405 if (assertion_type_ == AT_START && not_at_start) { |
2440 details->set_cannot_match(); | 2406 details->set_cannot_match(); |
2441 return; | 2407 return; |
2442 } | 2408 } |
2443 return on_success()->GetQuickCheckDetails(details, | 2409 return on_success()->GetQuickCheckDetails(details, |
2444 compiler, | 2410 compiler, |
2445 filled_in, | 2411 filled_in, |
2446 not_at_start); | 2412 not_at_start); |
2447 } | 2413 } |
2448 | 2414 |
2449 | 2415 |
2450 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2416 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2451 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2417 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2452 switch (assertion_type_) { | 2418 switch (assertion_type_) { |
2453 case AT_END: { | 2419 case AT_END: { |
2454 BlockLabel ok; | 2420 Label ok; |
2455 assembler->CheckPosition(trace->cp_offset(), &ok); | 2421 assembler->CheckPosition(trace->cp_offset(), &ok); |
2456 assembler->GoTo(trace->backtrack()); | 2422 assembler->GoTo(trace->backtrack()); |
2457 assembler->BindBlock(&ok); | 2423 assembler->Bind(&ok); |
2458 break; | 2424 break; |
2459 } | 2425 } |
2460 case AT_START: { | 2426 case AT_START: { |
2461 if (trace->at_start() == Trace::FALSE_VALUE) { | 2427 if (trace->at_start() == Trace::FALSE_VALUE) { |
2462 assembler->GoTo(trace->backtrack()); | 2428 assembler->GoTo(trace->backtrack()); |
2463 return; | 2429 return; |
2464 } | 2430 } |
2465 if (trace->at_start() == Trace::UNKNOWN) { | 2431 if (trace->at_start() == Trace::UNKNOWN) { |
2466 assembler->CheckNotAtStart(trace->backtrack()); | 2432 assembler->CheckNotAtStart(trace->backtrack()); |
2467 Trace at_start_trace = *trace; | 2433 Trace at_start_trace = *trace; |
2468 at_start_trace.set_at_start(true); | 2434 at_start_trace.set_at_start(true); |
2469 on_success()->Emit(compiler, &at_start_trace); | 2435 on_success()->Emit(compiler, &at_start_trace); |
2470 return; | 2436 return; |
2471 } | 2437 } |
2472 } | 2438 } |
2473 break; | 2439 break; |
2474 case AFTER_NEWLINE: | 2440 case AFTER_NEWLINE: |
2475 EmitHat(compiler, on_success(), trace); | 2441 EmitHat(compiler, on_success(), trace); |
2476 return; | 2442 return; |
2477 case AT_BOUNDARY: | 2443 case AT_BOUNDARY: |
2478 case AT_NON_BOUNDARY: { | 2444 case AT_NON_BOUNDARY: { |
2479 EmitBoundaryCheck(compiler, trace); | 2445 EmitBoundaryCheck(compiler, trace); |
2480 return; | 2446 return; |
2481 } | 2447 } |
2482 } | 2448 } |
2483 on_success()->Emit(compiler, trace); | 2449 on_success()->Emit(compiler, trace); |
2484 } | 2450 } |
2485 | 2451 |
2486 | 2452 |
2487 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { | 2453 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { |
2488 if (quick_check == NULL) return false; | 2454 if (quick_check == NULL) return false; |
2489 if (offset >= quick_check->characters()) return false; | 2455 if (offset >= quick_check->characters()) return false; |
2490 return quick_check->positions(offset)->determines_perfectly; | 2456 return quick_check->positions(offset)->determines_perfectly; |
2491 } | 2457 } |
2492 | 2458 |
2493 | 2459 |
2494 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { | 2460 static void UpdateBoundsCheck(int index, int* checked_up_to) { |
2495 if (index > *checked_up_to) { | 2461 if (index > *checked_up_to) { |
2496 *checked_up_to = index; | 2462 *checked_up_to = index; |
2497 } | 2463 } |
2498 } | 2464 } |
2499 | 2465 |
2500 | 2466 |
2501 // We call this repeatedly to generate code for each pass over the text node. | 2467 // We call this repeatedly to generate code for each pass over the text node. |
2502 // The passes are in increasing order of difficulty because we hope one | 2468 // The passes are in increasing order of difficulty because we hope one |
2503 // of the first passes will fail in which case we are saved the work of the | 2469 // of the first passes will fail in which case we are saved the work of the |
2504 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ | 2470 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ |
(...skipping 20 matching lines...) Expand all Loading... |
2525 // order to get to the code we are now generating. The quick check can involve | 2491 // order to get to the code we are now generating. The quick check can involve |
2526 // loading characters, which means we do not need to recheck the bounds | 2492 // loading characters, which means we do not need to recheck the bounds |
2527 // up to the limit the quick check already checked. In addition the quick | 2493 // up to the limit the quick check already checked. In addition the quick |
2528 // check can have involved a mask and compare operation which may simplify | 2494 // check can have involved a mask and compare operation which may simplify |
2529 // or obviate the need for further checks at some character positions. | 2495 // or obviate the need for further checks at some character positions. |
2530 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 2496 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
2531 TextEmitPassType pass, | 2497 TextEmitPassType pass, |
2532 bool preloaded, | 2498 bool preloaded, |
2533 Trace* trace, | 2499 Trace* trace, |
2534 bool first_element_checked, | 2500 bool first_element_checked, |
2535 intptr_t* checked_up_to) { | 2501 int* checked_up_to) { |
2536 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2502 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2503 Isolate* isolate = assembler->zone()->isolate(); |
2537 bool one_byte = compiler->one_byte(); | 2504 bool one_byte = compiler->one_byte(); |
2538 BlockLabel* backtrack = trace->backtrack(); | 2505 Label* backtrack = trace->backtrack(); |
2539 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 2506 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
2540 intptr_t element_count = elms_->length(); | 2507 int element_count = elms_->length(); |
2541 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 2508 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
2542 TextElement elm = elms_->At(i); | 2509 TextElement elm = elms_->at(i); |
2543 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset(); | 2510 int cp_offset = trace->cp_offset() + elm.cp_offset(); |
2544 if (elm.text_type() == TextElement::ATOM) { | 2511 if (elm.text_type() == TextElement::ATOM) { |
2545 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); | 2512 Vector<const uc16> quarks = elm.atom()->data(); |
2546 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) { | 2513 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
2547 if (first_element_checked && i == 0 && j == 0) continue; | 2514 if (first_element_checked && i == 0 && j == 0) continue; |
2548 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 2515 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
2549 EmitCharacterFunction* emit_function = NULL; | 2516 EmitCharacterFunction* emit_function = NULL; |
2550 switch (pass) { | 2517 switch (pass) { |
2551 case NON_LATIN1_MATCH: | 2518 case NON_LATIN1_MATCH: |
2552 ASSERT(one_byte); | 2519 DCHECK(one_byte); |
2553 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) { | 2520 if (quarks[j] > String::kMaxOneByteCharCode) { |
2554 assembler->GoTo(backtrack); | 2521 assembler->GoTo(backtrack); |
2555 return; | 2522 return; |
2556 } | 2523 } |
2557 break; | 2524 break; |
2558 case NON_LETTER_CHARACTER_MATCH: | 2525 case NON_LETTER_CHARACTER_MATCH: |
2559 emit_function = &EmitAtomNonLetter; | 2526 emit_function = &EmitAtomNonLetter; |
2560 break; | 2527 break; |
2561 case SIMPLE_CHARACTER_MATCH: | 2528 case SIMPLE_CHARACTER_MATCH: |
2562 emit_function = &EmitSimpleCharacter; | 2529 emit_function = &EmitSimpleCharacter; |
2563 break; | 2530 break; |
2564 case CASE_CHARACTER_MATCH: | 2531 case CASE_CHARACTER_MATCH: |
2565 emit_function = &EmitAtomLetter; | 2532 emit_function = &EmitAtomLetter; |
2566 break; | 2533 break; |
2567 default: | 2534 default: |
2568 break; | 2535 break; |
2569 } | 2536 } |
2570 if (emit_function != NULL) { | 2537 if (emit_function != NULL) { |
2571 bool bound_checked = emit_function(I, | 2538 bool bound_checked = emit_function(isolate, |
2572 compiler, | 2539 compiler, |
2573 quarks->At(j), | 2540 quarks[j], |
2574 backtrack, | 2541 backtrack, |
2575 cp_offset + j, | 2542 cp_offset + j, |
2576 *checked_up_to < cp_offset + j, | 2543 *checked_up_to < cp_offset + j, |
2577 preloaded); | 2544 preloaded); |
2578 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2545 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
2579 } | 2546 } |
2580 } | 2547 } |
2581 } else { | 2548 } else { |
2582 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2549 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
2583 if (pass == CHARACTER_CLASS_MATCH) { | 2550 if (pass == CHARACTER_CLASS_MATCH) { |
2584 if (first_element_checked && i == 0) continue; | 2551 if (first_element_checked && i == 0) continue; |
2585 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2552 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
2586 RegExpCharacterClass* cc = elm.char_class(); | 2553 RegExpCharacterClass* cc = elm.char_class(); |
2587 EmitCharClass(assembler, | 2554 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
2588 cc, | 2555 *checked_up_to < cp_offset, preloaded, zone()); |
2589 one_byte, | |
2590 backtrack, | |
2591 cp_offset, | |
2592 *checked_up_to < cp_offset, | |
2593 preloaded, | |
2594 I); | |
2595 UpdateBoundsCheck(cp_offset, checked_up_to); | 2556 UpdateBoundsCheck(cp_offset, checked_up_to); |
2596 } | 2557 } |
2597 } | 2558 } |
2598 } | 2559 } |
2599 } | 2560 } |
2600 | 2561 |
2601 | 2562 |
2602 intptr_t TextNode::Length() { | 2563 int TextNode::Length() { |
2603 TextElement elm = elms_->Last(); | 2564 TextElement elm = elms_->last(); |
2604 ASSERT(elm.cp_offset() >= 0); | 2565 DCHECK(elm.cp_offset() >= 0); |
2605 return elm.cp_offset() + elm.length(); | 2566 return elm.cp_offset() + elm.length(); |
2606 } | 2567 } |
2607 | 2568 |
2608 | 2569 |
2609 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { | 2570 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
2610 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); | 2571 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
2611 if (ignore_case) { | 2572 if (ignore_case) { |
2612 return pass == SIMPLE_CHARACTER_MATCH; | 2573 return pass == SIMPLE_CHARACTER_MATCH; |
2613 } else { | 2574 } else { |
2614 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 2575 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
2615 } | 2576 } |
2616 } | 2577 } |
2617 | 2578 |
2618 | 2579 |
2619 // This generates the code to match a text node. A text node can contain | 2580 // This generates the code to match a text node. A text node can contain |
2620 // straight character sequences (possibly to be matched in a case-independent | 2581 // straight character sequences (possibly to be matched in a case-independent |
2621 // way) and character classes. For efficiency we do not do this in a single | 2582 // way) and character classes. For efficiency we do not do this in a single |
2622 // pass from left to right. Instead we pass over the text node several times, | 2583 // pass from left to right. Instead we pass over the text node several times, |
2623 // emitting code for some character positions every time. See the comment on | 2584 // emitting code for some character positions every time. See the comment on |
2624 // TextEmitPass for details. | 2585 // TextEmitPass for details. |
2625 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2586 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2626 LimitResult limit_result = LimitVersions(compiler, trace); | 2587 LimitResult limit_result = LimitVersions(compiler, trace); |
2627 if (limit_result == DONE) return; | 2588 if (limit_result == DONE) return; |
2628 ASSERT(limit_result == CONTINUE); | 2589 DCHECK(limit_result == CONTINUE); |
2629 | 2590 |
2630 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { | 2591 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { |
2631 compiler->SetRegExpTooBig(); | 2592 compiler->SetRegExpTooBig(); |
2632 return; | 2593 return; |
2633 } | 2594 } |
2634 | 2595 |
2635 if (compiler->one_byte()) { | 2596 if (compiler->one_byte()) { |
2636 intptr_t dummy = 0; | 2597 int dummy = 0; |
2637 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); | 2598 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); |
2638 } | 2599 } |
2639 | 2600 |
2640 bool first_elt_done = false; | 2601 bool first_elt_done = false; |
2641 intptr_t bound_checked_to = trace->cp_offset() - 1; | 2602 int bound_checked_to = trace->cp_offset() - 1; |
2642 bound_checked_to += trace->bound_checked_up_to(); | 2603 bound_checked_to += trace->bound_checked_up_to(); |
2643 | 2604 |
2644 // If a character is preloaded into the current character register then | 2605 // If a character is preloaded into the current character register then |
2645 // check that now. | 2606 // check that now. |
2646 if (trace->characters_preloaded() == 1) { | 2607 if (trace->characters_preloaded() == 1) { |
2647 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2608 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2648 if (!SkipPass(pass, compiler->ignore_case())) { | 2609 if (!SkipPass(pass, compiler->ignore_case())) { |
2649 TextEmitPass(compiler, | 2610 TextEmitPass(compiler, |
2650 static_cast<TextEmitPassType>(pass), | 2611 static_cast<TextEmitPassType>(pass), |
2651 true, | 2612 true, |
2652 trace, | 2613 trace, |
2653 false, | 2614 false, |
2654 &bound_checked_to); | 2615 &bound_checked_to); |
2655 } | 2616 } |
2656 } | 2617 } |
2657 first_elt_done = true; | 2618 first_elt_done = true; |
2658 } | 2619 } |
2659 | 2620 |
2660 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2621 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2661 if (!SkipPass(pass, compiler->ignore_case())) { | 2622 if (!SkipPass(pass, compiler->ignore_case())) { |
2662 TextEmitPass(compiler, | 2623 TextEmitPass(compiler, |
2663 static_cast<TextEmitPassType>(pass), | 2624 static_cast<TextEmitPassType>(pass), |
2664 false, | 2625 false, |
2665 trace, | 2626 trace, |
2666 first_elt_done, | 2627 first_elt_done, |
2667 &bound_checked_to); | 2628 &bound_checked_to); |
2668 } | 2629 } |
2669 } | 2630 } |
2670 | 2631 |
2671 Trace successor_trace(*trace); | 2632 Trace successor_trace(*trace); |
2672 successor_trace.set_at_start(false); | 2633 successor_trace.set_at_start(false); |
2673 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2634 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
2674 RecursionCheck rc(compiler); | 2635 RecursionCheck rc(compiler); |
2675 on_success()->Emit(compiler, &successor_trace); | 2636 on_success()->Emit(compiler, &successor_trace); |
2676 } | 2637 } |
2677 | 2638 |
2678 | 2639 |
2679 void Trace::InvalidateCurrentCharacter() { | 2640 void Trace::InvalidateCurrentCharacter() { |
2680 characters_preloaded_ = 0; | 2641 characters_preloaded_ = 0; |
2681 } | 2642 } |
2682 | 2643 |
2683 | 2644 |
2684 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, | 2645 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { |
2685 RegExpCompiler* compiler) { | 2646 DCHECK(by > 0); |
2686 ASSERT(by > 0); | |
2687 // We don't have an instruction for shifting the current character register | 2647 // We don't have an instruction for shifting the current character register |
2688 // down or for using a shifted value for anything so lets just forget that | 2648 // down or for using a shifted value for anything so lets just forget that |
2689 // we preloaded any characters into it. | 2649 // we preloaded any characters into it. |
2690 characters_preloaded_ = 0; | 2650 characters_preloaded_ = 0; |
2691 // Adjust the offsets of the quick check performed information. This | 2651 // Adjust the offsets of the quick check performed information. This |
2692 // information is used to find out what we already determined about the | 2652 // information is used to find out what we already determined about the |
2693 // characters by means of mask and compare. | 2653 // characters by means of mask and compare. |
2694 quick_check_performed_.Advance(by, compiler->one_byte()); | 2654 quick_check_performed_.Advance(by, compiler->one_byte()); |
2695 cp_offset_ += by; | 2655 cp_offset_ += by; |
2696 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2656 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
2697 compiler->SetRegExpTooBig(); | 2657 compiler->SetRegExpTooBig(); |
2698 cp_offset_ = 0; | 2658 cp_offset_ = 0; |
2699 } | 2659 } |
2700 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), | 2660 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
2701 bound_checked_up_to_ - by); | |
2702 } | 2661 } |
2703 | 2662 |
2704 | 2663 |
2705 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2664 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
2706 intptr_t element_count = elms_->length(); | 2665 int element_count = elms_->length(); |
2707 for (intptr_t i = 0; i < element_count; i++) { | 2666 for (int i = 0; i < element_count; i++) { |
2708 TextElement elm = elms_->At(i); | 2667 TextElement elm = elms_->at(i); |
2709 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2668 if (elm.text_type() == TextElement::CHAR_CLASS) { |
2710 RegExpCharacterClass* cc = elm.char_class(); | 2669 RegExpCharacterClass* cc = elm.char_class(); |
2711 // None of the standard character classes is different in the case | 2670 // None of the standard character classes is different in the case |
2712 // independent case and it slows us down if we don't know that. | 2671 // independent case and it slows us down if we don't know that. |
2713 if (cc->is_standard()) continue; | 2672 if (cc->is_standard(zone())) continue; |
2714 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2673 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2715 intptr_t range_count = ranges->length(); | 2674 int range_count = ranges->length(); |
2716 for (intptr_t j = 0; j < range_count; j++) { | 2675 for (int j = 0; j < range_count; j++) { |
2717 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, I); | 2676 ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone()); |
2718 } | 2677 } |
2719 } | 2678 } |
2720 } | 2679 } |
2721 } | 2680 } |
2722 | 2681 |
2723 | 2682 |
2724 intptr_t TextNode::GreedyLoopTextLength() { | 2683 int TextNode::GreedyLoopTextLength() { |
2725 TextElement elm = elms_->At(elms_->length() - 1); | 2684 TextElement elm = elms_->at(elms_->length() - 1); |
2726 return elm.cp_offset() + elm.length(); | 2685 return elm.cp_offset() + elm.length(); |
2727 } | 2686 } |
2728 | 2687 |
2729 | 2688 |
2730 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 2689 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
2731 RegExpCompiler* compiler) { | 2690 RegExpCompiler* compiler) { |
2732 if (elms_->length() != 1) return NULL; | 2691 if (elms_->length() != 1) return NULL; |
2733 TextElement elm = elms_->At(0); | 2692 TextElement elm = elms_->at(0); |
2734 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 2693 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
2735 RegExpCharacterClass* node = elm.char_class(); | 2694 RegExpCharacterClass* node = elm.char_class(); |
2736 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); | 2695 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
2737 if (!CharacterRange::IsCanonical(ranges)) { | 2696 if (!CharacterRange::IsCanonical(ranges)) { |
2738 CharacterRange::Canonicalize(ranges); | 2697 CharacterRange::Canonicalize(ranges); |
2739 } | 2698 } |
2740 if (node->is_negated()) { | 2699 if (node->is_negated()) { |
2741 return ranges->length() == 0 ? on_success() : NULL; | 2700 return ranges->length() == 0 ? on_success() : NULL; |
2742 } | 2701 } |
2743 if (ranges->length() != 1) return NULL; | 2702 if (ranges->length() != 1) return NULL; |
2744 uint32_t max_char; | 2703 uint32_t max_char; |
2745 if (compiler->one_byte()) { | 2704 if (compiler->one_byte()) { |
2746 max_char = Symbols::kMaxOneCharCodeSymbol; | 2705 max_char = String::kMaxOneByteCharCode; |
2747 } else { | 2706 } else { |
2748 max_char = Utf16::kMaxCodeUnit; | 2707 max_char = String::kMaxUtf16CodeUnit; |
2749 } | 2708 } |
2750 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; | 2709 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; |
2751 } | 2710 } |
2752 | 2711 |
2753 | 2712 |
2754 // Finds the fixed match length of a sequence of nodes that goes from | 2713 // Finds the fixed match length of a sequence of nodes that goes from |
2755 // this alternative and back to this choice node. If there are variable | 2714 // this alternative and back to this choice node. If there are variable |
2756 // length nodes or other complications in the way then return a sentinel | 2715 // length nodes or other complications in the way then return a sentinel |
2757 // value indicating that a greedy loop cannot be constructed. | 2716 // value indicating that a greedy loop cannot be constructed. |
2758 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( | 2717 int ChoiceNode::GreedyLoopTextLengthForAlternative( |
2759 GuardedAlternative* alternative) { | 2718 GuardedAlternative* alternative) { |
2760 intptr_t length = 0; | 2719 int length = 0; |
2761 RegExpNode* node = alternative->node(); | 2720 RegExpNode* node = alternative->node(); |
2762 // Later we will generate code for all these text nodes using recursion | 2721 // Later we will generate code for all these text nodes using recursion |
2763 // so we have to limit the max number. | 2722 // so we have to limit the max number. |
2764 intptr_t recursion_depth = 0; | 2723 int recursion_depth = 0; |
2765 while (node != this) { | 2724 while (node != this) { |
2766 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 2725 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
2767 return kNodeIsTooComplexForGreedyLoops; | 2726 return kNodeIsTooComplexForGreedyLoops; |
2768 } | 2727 } |
2769 intptr_t node_length = node->GreedyLoopTextLength(); | 2728 int node_length = node->GreedyLoopTextLength(); |
2770 if (node_length == kNodeIsTooComplexForGreedyLoops) { | 2729 if (node_length == kNodeIsTooComplexForGreedyLoops) { |
2771 return kNodeIsTooComplexForGreedyLoops; | 2730 return kNodeIsTooComplexForGreedyLoops; |
2772 } | 2731 } |
2773 length += node_length; | 2732 length += node_length; |
2774 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | 2733 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); |
2775 node = seq_node->on_success(); | 2734 node = seq_node->on_success(); |
2776 } | 2735 } |
2777 return length; | 2736 return length; |
2778 } | 2737 } |
2779 | 2738 |
2780 | 2739 |
2781 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { | 2740 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { |
2782 ASSERT(loop_node_ == NULL); | 2741 DCHECK_EQ(loop_node_, NULL); |
2783 AddAlternative(alt); | 2742 AddAlternative(alt); |
2784 loop_node_ = alt.node(); | 2743 loop_node_ = alt.node(); |
2785 } | 2744 } |
2786 | 2745 |
2787 | 2746 |
2788 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { | 2747 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { |
2789 ASSERT(continue_node_ == NULL); | 2748 DCHECK_EQ(continue_node_, NULL); |
2790 AddAlternative(alt); | 2749 AddAlternative(alt); |
2791 continue_node_ = alt.node(); | 2750 continue_node_ = alt.node(); |
2792 } | 2751 } |
2793 | 2752 |
2794 | 2753 |
2795 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2754 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2796 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2755 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
2797 if (trace->stop_node() == this) { | 2756 if (trace->stop_node() == this) { |
2798 // Back edge of greedy optimized loop node graph. | 2757 // Back edge of greedy optimized loop node graph. |
2799 intptr_t text_length = | 2758 int text_length = |
2800 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 2759 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); |
2801 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | 2760 DCHECK(text_length != kNodeIsTooComplexForGreedyLoops); |
2802 // Update the counter-based backtracking info on the stack. This is an | 2761 // Update the counter-based backtracking info on the stack. This is an |
2803 // optimization for greedy loops (see below). | 2762 // optimization for greedy loops (see below). |
2804 ASSERT(trace->cp_offset() == text_length); | 2763 DCHECK(trace->cp_offset() == text_length); |
2805 macro_assembler->AdvanceCurrentPosition(text_length); | 2764 macro_assembler->AdvanceCurrentPosition(text_length); |
2806 macro_assembler->GoTo(trace->loop_label()); | 2765 macro_assembler->GoTo(trace->loop_label()); |
2807 return; | 2766 return; |
2808 } | 2767 } |
2809 ASSERT(trace->stop_node() == NULL); | 2768 DCHECK(trace->stop_node() == NULL); |
2810 if (!trace->is_trivial()) { | 2769 if (!trace->is_trivial()) { |
2811 trace->Flush(compiler, this); | 2770 trace->Flush(compiler, this); |
2812 return; | 2771 return; |
2813 } | 2772 } |
2814 ChoiceNode::Emit(compiler, trace); | 2773 ChoiceNode::Emit(compiler, trace); |
2815 } | 2774 } |
2816 | 2775 |
2817 | 2776 |
2818 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2777 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
2819 intptr_t eats_at_least) { | 2778 int eats_at_least) { |
2820 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), | 2779 int preload_characters = Min(4, eats_at_least); |
2821 eats_at_least); | |
2822 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2780 if (compiler->macro_assembler()->CanReadUnaligned()) { |
2823 bool one_byte = compiler->one_byte(); | 2781 bool one_byte = compiler->one_byte(); |
2824 if (one_byte) { | 2782 if (one_byte) { |
2825 if (preload_characters > 4) preload_characters = 4; | 2783 if (preload_characters > 4) preload_characters = 4; |
2826 // We can't preload 3 characters because there is no machine instruction | 2784 // We can't preload 3 characters because there is no machine instruction |
2827 // to do that. We can't just load 4 because we could be reading | 2785 // to do that. We can't just load 4 because we could be reading |
2828 // beyond the end of the string, which could cause a memory fault. | 2786 // beyond the end of the string, which could cause a memory fault. |
2829 if (preload_characters == 3) preload_characters = 2; | 2787 if (preload_characters == 3) preload_characters = 2; |
2830 } else { | 2788 } else { |
2831 if (preload_characters > 2) preload_characters = 2; | 2789 if (preload_characters > 2) preload_characters = 2; |
2832 } | 2790 } |
2833 } else { | 2791 } else { |
2834 if (preload_characters > 1) preload_characters = 1; | 2792 if (preload_characters > 1) preload_characters = 1; |
2835 } | 2793 } |
2836 return preload_characters; | 2794 return preload_characters; |
2837 } | 2795 } |
2838 | 2796 |
2839 | 2797 |
2840 // This structure is used when generating the alternatives in a choice node. It | 2798 // This class is used when generating the alternatives in a choice node. It |
2841 // records the way the alternative is being code generated. | 2799 // records the way the alternative is being code generated. |
2842 struct AlternativeGeneration { | 2800 class AlternativeGeneration: public Malloced { |
| 2801 public: |
2843 AlternativeGeneration() | 2802 AlternativeGeneration() |
2844 : possible_success(), | 2803 : possible_success(), |
2845 expects_preload(false), | 2804 expects_preload(false), |
2846 after(), | 2805 after(), |
2847 quick_check_details() { } | 2806 quick_check_details() { } |
2848 BlockLabel possible_success; | 2807 Label possible_success; |
2849 bool expects_preload; | 2808 bool expects_preload; |
2850 BlockLabel after; | 2809 Label after; |
2851 QuickCheckDetails quick_check_details; | 2810 QuickCheckDetails quick_check_details; |
2852 }; | 2811 }; |
2853 | 2812 |
2854 | 2813 |
2855 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2814 // Creates a list of AlternativeGenerations. If the list has a reasonable |
2856 // size then it is on the stack, otherwise the excess is on the heap. | 2815 // size then it is on the stack, otherwise the excess is on the heap. |
2857 class AlternativeGenerationList { | 2816 class AlternativeGenerationList { |
2858 public: | 2817 public: |
2859 explicit AlternativeGenerationList(intptr_t count) | 2818 AlternativeGenerationList(int count, Zone* zone) |
2860 : alt_gens_(count) { | 2819 : alt_gens_(count, zone) { |
2861 for (intptr_t i = 0; i < count && i < kAFew; i++) { | 2820 for (int i = 0; i < count && i < kAFew; i++) { |
2862 alt_gens_.Add(a_few_alt_gens_ + i); | 2821 alt_gens_.Add(a_few_alt_gens_ + i, zone); |
2863 } | 2822 } |
2864 for (intptr_t i = kAFew; i < count; i++) { | 2823 for (int i = kAFew; i < count; i++) { |
2865 alt_gens_.Add(new AlternativeGeneration()); | 2824 alt_gens_.Add(new AlternativeGeneration(), zone); |
2866 } | 2825 } |
2867 } | 2826 } |
2868 ~AlternativeGenerationList() { | 2827 ~AlternativeGenerationList() { |
2869 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { | 2828 for (int i = kAFew; i < alt_gens_.length(); i++) { |
2870 delete alt_gens_[i]; | 2829 delete alt_gens_[i]; |
2871 alt_gens_[i] = NULL; | 2830 alt_gens_[i] = NULL; |
2872 } | 2831 } |
2873 } | 2832 } |
2874 | 2833 |
2875 AlternativeGeneration* at(intptr_t i) { | 2834 AlternativeGeneration* at(int i) { |
2876 return alt_gens_[i]; | 2835 return alt_gens_[i]; |
2877 } | 2836 } |
2878 | 2837 |
2879 private: | 2838 private: |
2880 static const intptr_t kAFew = 10; | 2839 static const int kAFew = 10; |
2881 GrowableArray<AlternativeGeneration*> alt_gens_; | 2840 ZoneList<AlternativeGeneration*> alt_gens_; |
2882 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2841 AlternativeGeneration a_few_alt_gens_[kAFew]; |
2883 | |
2884 DISALLOW_ALLOCATION(); | |
2885 }; | 2842 }; |
2886 | 2843 |
2887 | 2844 |
2888 // The '2' variant is inclusive from and exclusive to. | 2845 // The '2' variant is has inclusive from and exclusive to. |
2889 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2846 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
2890 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2847 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
2891 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, | 2848 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, |
2892 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, | 2849 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, |
2893 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, | 2850 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, |
2894 0xFEFF, 0xFF00, 0x10000 }; | 2851 0xFEFF, 0xFF00, 0x10000 }; |
2895 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); | 2852 static const int kSpaceRangeCount = arraysize(kSpaceRanges); |
2896 static const intptr_t kWordRanges[] = { | 2853 |
| 2854 static const int kWordRanges[] = { |
2897 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; | 2855 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; |
2898 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); | 2856 static const int kWordRangeCount = arraysize(kWordRanges); |
2899 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; | 2857 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 }; |
2900 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); | 2858 static const int kDigitRangeCount = arraysize(kDigitRanges); |
2901 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; | 2859 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; |
2902 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); | 2860 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges); |
2903 static const intptr_t kLineTerminatorRanges[] = { | 2861 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E, |
2904 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; | 2862 0x2028, 0x202A, 0x10000 }; |
2905 static const intptr_t kLineTerminatorRangeCount = | 2863 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges); |
2906 ARRAY_SIZE(kLineTerminatorRanges); | |
2907 | 2864 |
2908 | 2865 |
2909 void BoyerMoorePositionInfo::Set(intptr_t character) { | 2866 void BoyerMoorePositionInfo::Set(int character) { |
2910 SetInterval(Interval(character, character)); | 2867 SetInterval(Interval(character, character)); |
2911 } | 2868 } |
2912 | 2869 |
2913 | 2870 |
2914 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2871 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
2915 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2872 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
2916 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2873 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
2917 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2874 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
2918 surrogate_ = | 2875 surrogate_ = |
2919 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2876 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
2920 if (interval.to() - interval.from() >= kMapSize - 1) { | 2877 if (interval.to() - interval.from() >= kMapSize - 1) { |
2921 if (map_count_ != kMapSize) { | 2878 if (map_count_ != kMapSize) { |
2922 map_count_ = kMapSize; | 2879 map_count_ = kMapSize; |
2923 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2880 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
2924 } | 2881 } |
2925 return; | 2882 return; |
2926 } | 2883 } |
2927 for (intptr_t i = interval.from(); i <= interval.to(); i++) { | 2884 for (int i = interval.from(); i <= interval.to(); i++) { |
2928 intptr_t mod_character = (i & kMask); | 2885 int mod_character = (i & kMask); |
2929 if (!map_->At(mod_character)) { | 2886 if (!map_->at(mod_character)) { |
2930 map_count_++; | 2887 map_count_++; |
2931 (*map_)[mod_character] = true; | 2888 map_->at(mod_character) = true; |
2932 } | 2889 } |
2933 if (map_count_ == kMapSize) return; | 2890 if (map_count_ == kMapSize) return; |
2934 } | 2891 } |
2935 } | 2892 } |
2936 | 2893 |
2937 | 2894 |
2938 void BoyerMoorePositionInfo::SetAll() { | 2895 void BoyerMoorePositionInfo::SetAll() { |
2939 s_ = w_ = d_ = kLatticeUnknown; | 2896 s_ = w_ = d_ = kLatticeUnknown; |
2940 if (map_count_ != kMapSize) { | 2897 if (map_count_ != kMapSize) { |
2941 map_count_ = kMapSize; | 2898 map_count_ = kMapSize; |
2942 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2899 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
2943 } | 2900 } |
2944 } | 2901 } |
2945 | 2902 |
2946 | 2903 |
2947 BoyerMooreLookahead::BoyerMooreLookahead( | 2904 BoyerMooreLookahead::BoyerMooreLookahead( |
2948 intptr_t length, RegExpCompiler* compiler, Isolate* isolate) | 2905 int length, RegExpCompiler* compiler, Zone* zone) |
2949 : length_(length), | 2906 : length_(length), |
2950 compiler_(compiler) { | 2907 compiler_(compiler) { |
2951 if (compiler->one_byte()) { | 2908 if (compiler->one_byte()) { |
2952 max_char_ = Symbols::kMaxOneCharCodeSymbol; | 2909 max_char_ = String::kMaxOneByteCharCode; |
2953 } else { | 2910 } else { |
2954 max_char_ = Utf16::kMaxCodeUnit; | 2911 max_char_ = String::kMaxUtf16CodeUnit; |
2955 } | 2912 } |
2956 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); | 2913 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); |
2957 for (intptr_t i = 0; i < length; i++) { | 2914 for (int i = 0; i < length; i++) { |
2958 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate)); | 2915 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); |
2959 } | 2916 } |
2960 } | 2917 } |
2961 | 2918 |
2962 | 2919 |
2963 // Find the longest range of lookahead that has the fewest number of different | 2920 // Find the longest range of lookahead that has the fewest number of different |
2964 // characters that can occur at a given position. Since we are optimizing two | 2921 // characters that can occur at a given position. Since we are optimizing two |
2965 // different parameters at once this is a tradeoff. | 2922 // different parameters at once this is a tradeoff. |
2966 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { | 2923 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { |
2967 intptr_t biggest_points = 0; | 2924 int biggest_points = 0; |
2968 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2925 // If more than 32 characters out of 128 can occur it is unlikely that we can |
2969 // be lucky enough to step forwards much of the time. | 2926 // be lucky enough to step forwards much of the time. |
2970 const intptr_t kMaxMax = 32; | 2927 const int kMaxMax = 32; |
2971 for (intptr_t max_number_of_chars = 4; | 2928 for (int max_number_of_chars = 4; |
2972 max_number_of_chars < kMaxMax; | 2929 max_number_of_chars < kMaxMax; |
2973 max_number_of_chars *= 2) { | 2930 max_number_of_chars *= 2) { |
2974 biggest_points = | 2931 biggest_points = |
2975 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2932 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
2976 } | 2933 } |
2977 if (biggest_points == 0) return false; | 2934 if (biggest_points == 0) return false; |
2978 return true; | 2935 return true; |
2979 } | 2936 } |
2980 | 2937 |
2981 | 2938 |
2982 // Find the highest-points range between 0 and length_ where the character | 2939 // Find the highest-points range between 0 and length_ where the character |
2983 // information is not too vague. 'Too vague' means that there are more than | 2940 // information is not too vague. 'Too vague' means that there are more than |
2984 // max_number_of_chars that can occur at this position. Calculates the number | 2941 // max_number_of_chars that can occur at this position. Calculates the number |
2985 // of points as the product of width-of-the-range and | 2942 // of points as the product of width-of-the-range and |
2986 // probability-of-finding-one-of-the-characters, where the probability is | 2943 // probability-of-finding-one-of-the-characters, where the probability is |
2987 // calculated using the frequency distribution of the sample subject string. | 2944 // calculated using the frequency distribution of the sample subject string. |
2988 intptr_t BoyerMooreLookahead::FindBestInterval( | 2945 int BoyerMooreLookahead::FindBestInterval( |
2989 intptr_t max_number_of_chars, | 2946 int max_number_of_chars, int old_biggest_points, int* from, int* to) { |
2990 intptr_t old_biggest_points, | 2947 int biggest_points = old_biggest_points; |
2991 intptr_t* from, | 2948 static const int kSize = RegExpMacroAssembler::kTableSize; |
2992 intptr_t* to) { | 2949 for (int i = 0; i < length_; ) { |
2993 intptr_t biggest_points = old_biggest_points; | |
2994 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | |
2995 for (intptr_t i = 0; i < length_; ) { | |
2996 while (i < length_ && Count(i) > max_number_of_chars) i++; | 2950 while (i < length_ && Count(i) > max_number_of_chars) i++; |
2997 if (i == length_) break; | 2951 if (i == length_) break; |
2998 intptr_t remembered_from = i; | 2952 int remembered_from = i; |
2999 bool union_map[kSize]; | 2953 bool union_map[kSize]; |
3000 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; | 2954 for (int j = 0; j < kSize; j++) union_map[j] = false; |
3001 while (i < length_ && Count(i) <= max_number_of_chars) { | 2955 while (i < length_ && Count(i) <= max_number_of_chars) { |
3002 BoyerMoorePositionInfo* map = bitmaps_->At(i); | 2956 BoyerMoorePositionInfo* map = bitmaps_->at(i); |
3003 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); | 2957 for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j); |
3004 i++; | 2958 i++; |
3005 } | 2959 } |
3006 intptr_t frequency = 0; | 2960 int frequency = 0; |
3007 for (intptr_t j = 0; j < kSize; j++) { | 2961 for (int j = 0; j < kSize; j++) { |
3008 if (union_map[j]) { | 2962 if (union_map[j]) { |
3009 // Add 1 to the frequency to give a small per-character boost for | 2963 // Add 1 to the frequency to give a small per-character boost for |
3010 // the cases where our sampling is not good enough and many | 2964 // the cases where our sampling is not good enough and many |
3011 // characters have a frequency of zero. This means the frequency | 2965 // characters have a frequency of zero. This means the frequency |
3012 // can theoretically be up to 2*kSize though we treat it mostly as | 2966 // can theoretically be up to 2*kSize though we treat it mostly as |
3013 // a fraction of kSize. | 2967 // a fraction of kSize. |
3014 frequency += compiler_->frequency_collator()->Frequency(j) + 1; | 2968 frequency += compiler_->frequency_collator()->Frequency(j) + 1; |
3015 } | 2969 } |
3016 } | 2970 } |
3017 // We use the probability of skipping times the distance we are skipping to | 2971 // We use the probability of skipping times the distance we are skipping to |
3018 // judge the effectiveness of this. Actually we have a cut-off: By | 2972 // judge the effectiveness of this. Actually we have a cut-off: By |
3019 // dividing by 2 we switch off the skipping if the probability of skipping | 2973 // dividing by 2 we switch off the skipping if the probability of skipping |
3020 // is less than 50%. This is because the multibyte mask-and-compare | 2974 // is less than 50%. This is because the multibyte mask-and-compare |
3021 // skipping in quickcheck is more likely to do well on this case. | 2975 // skipping in quickcheck is more likely to do well on this case. |
3022 bool in_quickcheck_range = ((i - remembered_from < 4) || | 2976 bool in_quickcheck_range = |
3023 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); | 2977 ((i - remembered_from < 4) || |
| 2978 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); |
3024 // Called 'probability' but it is only a rough estimate and can actually | 2979 // Called 'probability' but it is only a rough estimate and can actually |
3025 // be outside the 0-kSize range. | 2980 // be outside the 0-kSize range. |
3026 intptr_t probability = | 2981 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency; |
3027 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; | 2982 int points = (i - remembered_from) * probability; |
3028 intptr_t points = (i - remembered_from) * probability; | |
3029 if (points > biggest_points) { | 2983 if (points > biggest_points) { |
3030 *from = remembered_from; | 2984 *from = remembered_from; |
3031 *to = i - 1; | 2985 *to = i - 1; |
3032 biggest_points = points; | 2986 biggest_points = points; |
3033 } | 2987 } |
3034 } | 2988 } |
3035 return biggest_points; | 2989 return biggest_points; |
3036 } | 2990 } |
3037 | 2991 |
3038 | 2992 |
3039 // Take all the characters that will not prevent a successful match if they | 2993 // Take all the characters that will not prevent a successful match if they |
3040 // occur in the subject string in the range between min_lookahead and | 2994 // occur in the subject string in the range between min_lookahead and |
3041 // max_lookahead (inclusive) measured from the current position. If the | 2995 // max_lookahead (inclusive) measured from the current position. If the |
3042 // character at max_lookahead offset is not one of these characters, then we | 2996 // character at max_lookahead offset is not one of these characters, then we |
3043 // can safely skip forwards by the number of characters in the range. | 2997 // can safely skip forwards by the number of characters in the range. |
3044 intptr_t BoyerMooreLookahead::GetSkipTable( | 2998 int BoyerMooreLookahead::GetSkipTable(int min_lookahead, |
3045 intptr_t min_lookahead, | 2999 int max_lookahead, |
3046 intptr_t max_lookahead, | 3000 Handle<ByteArray> boolean_skip_table) { |
3047 const TypedData& boolean_skip_table) { | 3001 const int kSize = RegExpMacroAssembler::kTableSize; |
3048 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | |
3049 | 3002 |
3050 const intptr_t kSkipArrayEntry = 0; | 3003 const int kSkipArrayEntry = 0; |
3051 const intptr_t kDontSkipArrayEntry = 1; | 3004 const int kDontSkipArrayEntry = 1; |
3052 | 3005 |
3053 for (intptr_t i = 0; i < kSize; i++) { | 3006 for (int i = 0; i < kSize; i++) { |
3054 boolean_skip_table.SetUint8(i, kSkipArrayEntry); | 3007 boolean_skip_table->set(i, kSkipArrayEntry); |
3055 } | 3008 } |
3056 intptr_t skip = max_lookahead + 1 - min_lookahead; | 3009 int skip = max_lookahead + 1 - min_lookahead; |
3057 | 3010 |
3058 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { | 3011 for (int i = max_lookahead; i >= min_lookahead; i--) { |
3059 BoyerMoorePositionInfo* map = bitmaps_->At(i); | 3012 BoyerMoorePositionInfo* map = bitmaps_->at(i); |
3060 for (intptr_t j = 0; j < kSize; j++) { | 3013 for (int j = 0; j < kSize; j++) { |
3061 if (map->at(j)) { | 3014 if (map->at(j)) { |
3062 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); | 3015 boolean_skip_table->set(j, kDontSkipArrayEntry); |
3063 } | 3016 } |
3064 } | 3017 } |
3065 } | 3018 } |
3066 | 3019 |
3067 return skip; | 3020 return skip; |
3068 } | 3021 } |
3069 | 3022 |
3070 | 3023 |
3071 // See comment above on the implementation of GetSkipTable. | 3024 // See comment above on the implementation of GetSkipTable. |
3072 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { | 3025 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
3073 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 3026 const int kSize = RegExpMacroAssembler::kTableSize; |
3074 | 3027 |
3075 intptr_t min_lookahead = 0; | 3028 int min_lookahead = 0; |
3076 intptr_t max_lookahead = 0; | 3029 int max_lookahead = 0; |
3077 | 3030 |
3078 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; | 3031 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; |
3079 | 3032 |
3080 bool found_single_character = false; | 3033 bool found_single_character = false; |
3081 intptr_t single_character = 0; | 3034 int single_character = 0; |
3082 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { | 3035 for (int i = max_lookahead; i >= min_lookahead; i--) { |
3083 BoyerMoorePositionInfo* map = bitmaps_->At(i); | 3036 BoyerMoorePositionInfo* map = bitmaps_->at(i); |
3084 if (map->map_count() > 1 || | 3037 if (map->map_count() > 1 || |
3085 (found_single_character && map->map_count() != 0)) { | 3038 (found_single_character && map->map_count() != 0)) { |
3086 found_single_character = false; | 3039 found_single_character = false; |
3087 break; | 3040 break; |
3088 } | 3041 } |
3089 for (intptr_t j = 0; j < kSize; j++) { | 3042 for (int j = 0; j < kSize; j++) { |
3090 if (map->at(j)) { | 3043 if (map->at(j)) { |
3091 found_single_character = true; | 3044 found_single_character = true; |
3092 single_character = j; | 3045 single_character = j; |
3093 break; | 3046 break; |
3094 } | 3047 } |
3095 } | 3048 } |
3096 } | 3049 } |
3097 | 3050 |
3098 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead; | 3051 int lookahead_width = max_lookahead + 1 - min_lookahead; |
3099 | 3052 |
3100 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { | 3053 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { |
3101 // The mask-compare can probably handle this better. | 3054 // The mask-compare can probably handle this better. |
3102 return; | 3055 return; |
3103 } | 3056 } |
3104 | 3057 |
3105 if (found_single_character) { | 3058 if (found_single_character) { |
3106 BlockLabel cont, again; | 3059 Label cont, again; |
3107 masm->BindBlock(&again); | 3060 masm->Bind(&again); |
3108 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3061 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3109 if (max_char_ > kSize) { | 3062 if (max_char_ > kSize) { |
3110 masm->CheckCharacterAfterAnd(single_character, | 3063 masm->CheckCharacterAfterAnd(single_character, |
3111 RegExpMacroAssembler::kTableMask, | 3064 RegExpMacroAssembler::kTableMask, |
3112 &cont); | 3065 &cont); |
3113 } else { | 3066 } else { |
3114 masm->CheckCharacter(single_character, &cont); | 3067 masm->CheckCharacter(single_character, &cont); |
3115 } | 3068 } |
3116 masm->AdvanceCurrentPosition(lookahead_width); | 3069 masm->AdvanceCurrentPosition(lookahead_width); |
3117 masm->GoTo(&again); | 3070 masm->GoTo(&again); |
3118 masm->BindBlock(&cont); | 3071 masm->Bind(&cont); |
3119 return; | 3072 return; |
3120 } | 3073 } |
3121 | 3074 |
3122 const TypedData& boolean_skip_table = TypedData::ZoneHandle( | 3075 Factory* factory = masm->zone()->isolate()->factory(); |
3123 compiler_->isolate(), | 3076 Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED); |
3124 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 3077 int skip_distance = GetSkipTable( |
3125 intptr_t skip_distance = GetSkipTable( | |
3126 min_lookahead, max_lookahead, boolean_skip_table); | 3078 min_lookahead, max_lookahead, boolean_skip_table); |
3127 ASSERT(skip_distance != 0); | 3079 DCHECK(skip_distance != 0); |
3128 | 3080 |
3129 BlockLabel cont, again; | 3081 Label cont, again; |
3130 | 3082 masm->Bind(&again); |
3131 masm->BindBlock(&again); | |
3132 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3083 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3133 masm->CheckBitInTable(boolean_skip_table, &cont); | 3084 masm->CheckBitInTable(boolean_skip_table, &cont); |
3134 masm->AdvanceCurrentPosition(skip_distance); | 3085 masm->AdvanceCurrentPosition(skip_distance); |
3135 masm->GoTo(&again); | 3086 masm->GoTo(&again); |
3136 masm->BindBlock(&cont); | 3087 masm->Bind(&cont); |
3137 | |
3138 return; | |
3139 } | 3088 } |
3140 | 3089 |
3141 | 3090 |
3142 /* Code generation for choice nodes. | 3091 /* Code generation for choice nodes. |
3143 * | 3092 * |
3144 * We generate quick checks that do a mask and compare to eliminate a | 3093 * We generate quick checks that do a mask and compare to eliminate a |
3145 * choice. If the quick check succeeds then it jumps to the continuation to | 3094 * choice. If the quick check succeeds then it jumps to the continuation to |
3146 * do slow checks and check subsequent nodes. If it fails (the common case) | 3095 * do slow checks and check subsequent nodes. If it fails (the common case) |
3147 * it falls through to the next choice. | 3096 * it falls through to the next choice. |
3148 * | 3097 * |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3214 */ | 3163 */ |
3215 | 3164 |
3216 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3165 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
3217 counter_backtrack_trace_.set_backtrack(&label_); | 3166 counter_backtrack_trace_.set_backtrack(&label_); |
3218 if (not_at_start) counter_backtrack_trace_.set_at_start(false); | 3167 if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
3219 } | 3168 } |
3220 | 3169 |
3221 | 3170 |
3222 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3171 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
3223 #ifdef DEBUG | 3172 #ifdef DEBUG |
3224 intptr_t choice_count = alternatives_->length(); | 3173 int choice_count = alternatives_->length(); |
3225 for (intptr_t i = 0; i < choice_count - 1; i++) { | 3174 for (int i = 0; i < choice_count - 1; i++) { |
3226 GuardedAlternative alternative = alternatives_->At(i); | 3175 GuardedAlternative alternative = alternatives_->at(i); |
3227 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3176 ZoneList<Guard*>* guards = alternative.guards(); |
3228 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3177 int guard_count = (guards == NULL) ? 0 : guards->length(); |
3229 for (intptr_t j = 0; j < guard_count; j++) { | 3178 for (int j = 0; j < guard_count; j++) { |
3230 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); | 3179 DCHECK(!trace->mentions_reg(guards->at(j)->reg())); |
3231 } | 3180 } |
3232 } | 3181 } |
3233 #endif | 3182 #endif |
3234 } | 3183 } |
3235 | 3184 |
3236 | 3185 |
3237 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3186 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
3238 Trace* current_trace, | 3187 Trace* current_trace, |
3239 PreloadState* state) { | 3188 PreloadState* state) { |
3240 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3189 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
3241 // Save some time by looking at most one machine word ahead. | 3190 // Save some time by looking at most one machine word ahead. |
3242 state->eats_at_least_ = | 3191 state->eats_at_least_ = |
3243 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3192 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
3244 current_trace->at_start() == Trace::FALSE_VALUE); | 3193 current_trace->at_start() == Trace::FALSE_VALUE); |
3245 } | 3194 } |
3246 state->preload_characters_ = | 3195 state->preload_characters_ = |
3247 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3196 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
3248 | 3197 |
3249 state->preload_is_current_ = | 3198 state->preload_is_current_ = |
3250 (current_trace->characters_preloaded() == state->preload_characters_); | 3199 (current_trace->characters_preloaded() == state->preload_characters_); |
3251 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3200 state->preload_has_checked_bounds_ = state->preload_is_current_; |
3252 } | 3201 } |
3253 | 3202 |
3254 | 3203 |
3255 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3204 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3256 intptr_t choice_count = alternatives_->length(); | 3205 int choice_count = alternatives_->length(); |
3257 | 3206 |
3258 AssertGuardsMentionRegisters(trace); | 3207 AssertGuardsMentionRegisters(trace); |
3259 | 3208 |
3260 LimitResult limit_result = LimitVersions(compiler, trace); | 3209 LimitResult limit_result = LimitVersions(compiler, trace); |
3261 if (limit_result == DONE) return; | 3210 if (limit_result == DONE) return; |
3262 ASSERT(limit_result == CONTINUE); | 3211 DCHECK(limit_result == CONTINUE); |
3263 | 3212 |
3264 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for | 3213 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for |
3265 // other choice nodes we only flush if we are out of code size budget. | 3214 // other choice nodes we only flush if we are out of code size budget. |
3266 if (trace->flush_budget() == 0 && trace->actions() != NULL) { | 3215 if (trace->flush_budget() == 0 && trace->actions() != NULL) { |
3267 trace->Flush(compiler, this); | 3216 trace->Flush(compiler, this); |
3268 return; | 3217 return; |
3269 } | 3218 } |
3270 | 3219 |
3271 RecursionCheck rc(compiler); | 3220 RecursionCheck rc(compiler); |
3272 | 3221 |
3273 PreloadState preload; | 3222 PreloadState preload; |
3274 preload.init(); | 3223 preload.init(); |
3275 GreedyLoopState greedy_loop_state(not_at_start()); | 3224 GreedyLoopState greedy_loop_state(not_at_start()); |
3276 | 3225 |
3277 intptr_t text_length = | 3226 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0)); |
3278 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 3227 AlternativeGenerationList alt_gens(choice_count, zone()); |
3279 AlternativeGenerationList alt_gens(choice_count); | |
3280 | 3228 |
3281 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 3229 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
3282 trace = EmitGreedyLoop(compiler, | 3230 trace = EmitGreedyLoop(compiler, |
3283 trace, | 3231 trace, |
3284 &alt_gens, | 3232 &alt_gens, |
3285 &preload, | 3233 &preload, |
3286 &greedy_loop_state, | 3234 &greedy_loop_state, |
3287 text_length); | 3235 text_length); |
3288 } else { | 3236 } else { |
3289 // TODO(erikcorry): Delete this. We don't need this label, but it makes us | 3237 // TODO(erikcorry): Delete this. We don't need this label, but it makes us |
3290 // match the traces produced pre-cleanup. | 3238 // match the traces produced pre-cleanup. |
3291 BlockLabel second_choice; | 3239 Label second_choice; |
3292 compiler->macro_assembler()->BindBlock(&second_choice); | 3240 compiler->macro_assembler()->Bind(&second_choice); |
3293 | 3241 |
3294 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); | 3242 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); |
3295 | 3243 |
3296 EmitChoices(compiler, | 3244 EmitChoices(compiler, |
3297 &alt_gens, | 3245 &alt_gens, |
3298 0, | 3246 0, |
3299 trace, | 3247 trace, |
3300 &preload); | 3248 &preload); |
3301 } | 3249 } |
3302 | 3250 |
3303 // At this point we need to generate slow checks for the alternatives where | 3251 // At this point we need to generate slow checks for the alternatives where |
3304 // the quick check was inlined. We can recognize these because the associated | 3252 // the quick check was inlined. We can recognize these because the associated |
3305 // label was bound. | 3253 // label was bound. |
3306 intptr_t new_flush_budget = trace->flush_budget() / choice_count; | 3254 int new_flush_budget = trace->flush_budget() / choice_count; |
3307 for (intptr_t i = 0; i < choice_count; i++) { | 3255 for (int i = 0; i < choice_count; i++) { |
3308 AlternativeGeneration* alt_gen = alt_gens.at(i); | 3256 AlternativeGeneration* alt_gen = alt_gens.at(i); |
3309 Trace new_trace(*trace); | 3257 Trace new_trace(*trace); |
3310 // If there are actions to be flushed we have to limit how many times | 3258 // If there are actions to be flushed we have to limit how many times |
3311 // they are flushed. Take the budget of the parent trace and distribute | 3259 // they are flushed. Take the budget of the parent trace and distribute |
3312 // it fairly amongst the children. | 3260 // it fairly amongst the children. |
3313 if (new_trace.actions() != NULL) { | 3261 if (new_trace.actions() != NULL) { |
3314 new_trace.set_flush_budget(new_flush_budget); | 3262 new_trace.set_flush_budget(new_flush_budget); |
3315 } | 3263 } |
3316 bool next_expects_preload = | 3264 bool next_expects_preload = |
3317 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; | 3265 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; |
3318 EmitOutOfLineContinuation(compiler, | 3266 EmitOutOfLineContinuation(compiler, |
3319 &new_trace, | 3267 &new_trace, |
3320 alternatives_->At(i), | 3268 alternatives_->at(i), |
3321 alt_gen, | 3269 alt_gen, |
3322 preload.preload_characters_, | 3270 preload.preload_characters_, |
3323 next_expects_preload); | 3271 next_expects_preload); |
3324 } | 3272 } |
3325 } | 3273 } |
3326 | 3274 |
| 3275 |
3327 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, | 3276 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, |
3328 Trace* trace, | 3277 Trace* trace, |
3329 AlternativeGenerationList* alt_gens, | 3278 AlternativeGenerationList* alt_gens, |
3330 PreloadState* preload, | 3279 PreloadState* preload, |
3331 GreedyLoopState* greedy_loop_state, | 3280 GreedyLoopState* greedy_loop_state, |
3332 intptr_t text_length) { | 3281 int text_length) { |
3333 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3334 // Here we have special handling for greedy loops containing only text nodes | 3283 // Here we have special handling for greedy loops containing only text nodes |
3335 // and other simple nodes. These are handled by pushing the current | 3284 // and other simple nodes. These are handled by pushing the current |
3336 // position on the stack and then incrementing the current position each | 3285 // position on the stack and then incrementing the current position each |
3337 // time around the switch. On backtrack we decrement the current position | 3286 // time around the switch. On backtrack we decrement the current position |
3338 // and check it against the pushed value. This avoids pushing backtrack | 3287 // and check it against the pushed value. This avoids pushing backtrack |
3339 // information for each iteration of the loop, which could take up a lot of | 3288 // information for each iteration of the loop, which could take up a lot of |
3340 // space. | 3289 // space. |
3341 ASSERT(trace->stop_node() == NULL); | 3290 DCHECK(trace->stop_node() == NULL); |
3342 macro_assembler->PushCurrentPosition(); | 3291 macro_assembler->PushCurrentPosition(); |
3343 BlockLabel greedy_match_failed; | 3292 Label greedy_match_failed; |
3344 Trace greedy_match_trace; | 3293 Trace greedy_match_trace; |
3345 if (not_at_start()) greedy_match_trace.set_at_start(false); | 3294 if (not_at_start()) greedy_match_trace.set_at_start(false); |
3346 greedy_match_trace.set_backtrack(&greedy_match_failed); | 3295 greedy_match_trace.set_backtrack(&greedy_match_failed); |
3347 BlockLabel loop_label; | 3296 Label loop_label; |
3348 macro_assembler->BindBlock(&loop_label); | 3297 macro_assembler->Bind(&loop_label); |
3349 greedy_match_trace.set_stop_node(this); | 3298 greedy_match_trace.set_stop_node(this); |
3350 greedy_match_trace.set_loop_label(&loop_label); | 3299 greedy_match_trace.set_loop_label(&loop_label); |
3351 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); | 3300 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); |
3352 macro_assembler->BindBlock(&greedy_match_failed); | 3301 macro_assembler->Bind(&greedy_match_failed); |
3353 | 3302 |
3354 BlockLabel second_choice; // For use in greedy matches. | 3303 Label second_choice; // For use in greedy matches. |
3355 macro_assembler->BindBlock(&second_choice); | 3304 macro_assembler->Bind(&second_choice); |
3356 | 3305 |
3357 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); | 3306 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); |
3358 | 3307 |
3359 EmitChoices(compiler, | 3308 EmitChoices(compiler, |
3360 alt_gens, | 3309 alt_gens, |
3361 1, | 3310 1, |
3362 new_trace, | 3311 new_trace, |
3363 preload); | 3312 preload); |
3364 | 3313 |
3365 macro_assembler->BindBlock(greedy_loop_state->label()); | 3314 macro_assembler->Bind(greedy_loop_state->label()); |
3366 // If we have unwound to the bottom then backtrack. | 3315 // If we have unwound to the bottom then backtrack. |
3367 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3316 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
3368 // Otherwise try the second priority at an earlier position. | 3317 // Otherwise try the second priority at an earlier position. |
3369 macro_assembler->AdvanceCurrentPosition(-text_length); | 3318 macro_assembler->AdvanceCurrentPosition(-text_length); |
3370 macro_assembler->GoTo(&second_choice); | 3319 macro_assembler->GoTo(&second_choice); |
3371 return new_trace; | 3320 return new_trace; |
3372 } | 3321 } |
3373 | 3322 |
3374 | 3323 int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
3375 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, | 3324 Trace* trace) { |
3376 Trace* trace) { | 3325 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; |
3377 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; | |
3378 if (alternatives_->length() != 2) return eats_at_least; | 3326 if (alternatives_->length() != 2) return eats_at_least; |
3379 | 3327 |
3380 GuardedAlternative alt1 = alternatives_->At(1); | 3328 GuardedAlternative alt1 = alternatives_->at(1); |
3381 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { | 3329 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { |
3382 return eats_at_least; | 3330 return eats_at_least; |
3383 } | 3331 } |
3384 RegExpNode* eats_anything_node = alt1.node(); | 3332 RegExpNode* eats_anything_node = alt1.node(); |
3385 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { | 3333 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { |
3386 return eats_at_least; | 3334 return eats_at_least; |
3387 } | 3335 } |
3388 | 3336 |
3389 // Really we should be creating a new trace when we execute this function, | 3337 // Really we should be creating a new trace when we execute this function, |
3390 // but there is no need, because the code it generates cannot backtrack, and | 3338 // but there is no need, because the code it generates cannot backtrack, and |
3391 // we always arrive here with a trivial trace (since it's the entry to a | 3339 // we always arrive here with a trivial trace (since it's the entry to a |
3392 // loop. That also implies that there are no preloaded characters, which is | 3340 // loop. That also implies that there are no preloaded characters, which is |
3393 // good, because it means we won't be violating any assumptions by | 3341 // good, because it means we won't be violating any assumptions by |
3394 // overwriting those characters with new load instructions. | 3342 // overwriting those characters with new load instructions. |
3395 ASSERT(trace->is_trivial()); | 3343 DCHECK(trace->is_trivial()); |
3396 | 3344 |
3397 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3345 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3398 // At this point we know that we are at a non-greedy loop that will eat | 3346 // At this point we know that we are at a non-greedy loop that will eat |
3399 // any character one at a time. Any non-anchored regexp has such a | 3347 // any character one at a time. Any non-anchored regexp has such a |
3400 // loop prepended to it in order to find where it starts. We look for | 3348 // loop prepended to it in order to find where it starts. We look for |
3401 // a pattern of the form ...abc... where we can look 6 characters ahead | 3349 // a pattern of the form ...abc... where we can look 6 characters ahead |
3402 // and step forwards 3 if the character is not one of abc. Abc need | 3350 // and step forwards 3 if the character is not one of abc. Abc need |
3403 // not be atoms, they can be any reasonably limited character class or | 3351 // not be atoms, they can be any reasonably limited character class or |
3404 // small alternation. | 3352 // small alternation. |
3405 BoyerMooreLookahead* bm = bm_info(false); | 3353 BoyerMooreLookahead* bm = bm_info(false); |
3406 if (bm == NULL) { | 3354 if (bm == NULL) { |
3407 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, | 3355 eats_at_least = Min(kMaxLookaheadForBoyerMoore, |
3408 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3356 EatsAtLeast(kMaxLookaheadForBoyerMoore, |
3409 kRecursionBudget, | 3357 kRecursionBudget, |
3410 false)); | 3358 false)); |
3411 if (eats_at_least >= 1) { | 3359 if (eats_at_least >= 1) { |
3412 bm = new(I) BoyerMooreLookahead(eats_at_least, compiler, I); | 3360 bm = new(zone()) BoyerMooreLookahead(eats_at_least, |
3413 GuardedAlternative alt0 = alternatives_->At(0); | 3361 compiler, |
| 3362 zone()); |
| 3363 GuardedAlternative alt0 = alternatives_->at(0); |
3414 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3364 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
3415 } | 3365 } |
3416 } | 3366 } |
3417 if (bm != NULL) { | 3367 if (bm != NULL) { |
3418 bm->EmitSkipInstructions(macro_assembler); | 3368 bm->EmitSkipInstructions(macro_assembler); |
3419 } | 3369 } |
3420 return eats_at_least; | 3370 return eats_at_least; |
3421 } | 3371 } |
3422 | 3372 |
3423 | 3373 |
3424 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 3374 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
3425 AlternativeGenerationList* alt_gens, | 3375 AlternativeGenerationList* alt_gens, |
3426 intptr_t first_choice, | 3376 int first_choice, |
3427 Trace* trace, | 3377 Trace* trace, |
3428 PreloadState* preload) { | 3378 PreloadState* preload) { |
3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3379 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3430 SetUpPreLoad(compiler, trace, preload); | 3380 SetUpPreLoad(compiler, trace, preload); |
3431 | 3381 |
3432 // For now we just call all choices one after the other. The idea ultimately | 3382 // For now we just call all choices one after the other. The idea ultimately |
3433 // is to use the Dispatch table to try only the relevant ones. | 3383 // is to use the Dispatch table to try only the relevant ones. |
3434 intptr_t choice_count = alternatives_->length(); | 3384 int choice_count = alternatives_->length(); |
3435 | 3385 |
3436 intptr_t new_flush_budget = trace->flush_budget() / choice_count; | 3386 int new_flush_budget = trace->flush_budget() / choice_count; |
3437 | 3387 |
3438 for (intptr_t i = first_choice; i < choice_count; i++) { | 3388 for (int i = first_choice; i < choice_count; i++) { |
3439 bool is_last = i == choice_count - 1; | 3389 bool is_last = i == choice_count - 1; |
3440 bool fall_through_on_failure = !is_last; | 3390 bool fall_through_on_failure = !is_last; |
3441 GuardedAlternative alternative = alternatives_->At(i); | 3391 GuardedAlternative alternative = alternatives_->at(i); |
3442 AlternativeGeneration* alt_gen = alt_gens->at(i); | 3392 AlternativeGeneration* alt_gen = alt_gens->at(i); |
3443 alt_gen->quick_check_details.set_characters(preload->preload_characters_); | 3393 alt_gen->quick_check_details.set_characters(preload->preload_characters_); |
3444 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3394 ZoneList<Guard*>* guards = alternative.guards(); |
3445 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3395 int guard_count = (guards == NULL) ? 0 : guards->length(); |
3446 Trace new_trace(*trace); | 3396 Trace new_trace(*trace); |
3447 new_trace.set_characters_preloaded(preload->preload_is_current_ ? | 3397 new_trace.set_characters_preloaded(preload->preload_is_current_ ? |
3448 preload->preload_characters_ : | 3398 preload->preload_characters_ : |
3449 0); | 3399 0); |
3450 if (preload->preload_has_checked_bounds_) { | 3400 if (preload->preload_has_checked_bounds_) { |
3451 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3401 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
3452 } | 3402 } |
3453 new_trace.quick_check_performed()->Clear(); | 3403 new_trace.quick_check_performed()->Clear(); |
3454 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); | 3404 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); |
3455 if (!is_last) { | 3405 if (!is_last) { |
3456 new_trace.set_backtrack(&alt_gen->after); | 3406 new_trace.set_backtrack(&alt_gen->after); |
3457 } | 3407 } |
3458 alt_gen->expects_preload = preload->preload_is_current_; | 3408 alt_gen->expects_preload = preload->preload_is_current_; |
3459 bool generate_full_check_inline = false; | 3409 bool generate_full_check_inline = false; |
3460 if (kRegexpOptimization && | 3410 if (FLAG_regexp_optimization && |
3461 try_to_emit_quick_check_for_alternative(i == 0) && | 3411 try_to_emit_quick_check_for_alternative(i == 0) && |
3462 alternative.node()->EmitQuickCheck(compiler, | 3412 alternative.node()->EmitQuickCheck(compiler, |
3463 trace, | 3413 trace, |
3464 &new_trace, | 3414 &new_trace, |
3465 preload->preload_has_checked_bounds_, | 3415 preload->preload_has_checked_bounds_, |
3466 &alt_gen->possible_success, | 3416 &alt_gen->possible_success, |
3467 &alt_gen->quick_check_details, | 3417 &alt_gen->quick_check_details, |
3468 fall_through_on_failure)) { | 3418 fall_through_on_failure)) { |
3469 // Quick check was generated for this choice. | 3419 // Quick check was generated for this choice. |
3470 preload->preload_is_current_ = true; | 3420 preload->preload_is_current_ = true; |
3471 preload->preload_has_checked_bounds_ = true; | 3421 preload->preload_has_checked_bounds_ = true; |
3472 // If we generated the quick check to fall through on possible success, | 3422 // If we generated the quick check to fall through on possible success, |
3473 // we now need to generate the full check inline. | 3423 // we now need to generate the full check inline. |
3474 if (!fall_through_on_failure) { | 3424 if (!fall_through_on_failure) { |
3475 macro_assembler->BindBlock(&alt_gen->possible_success); | 3425 macro_assembler->Bind(&alt_gen->possible_success); |
3476 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3426 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
3477 new_trace.set_characters_preloaded(preload->preload_characters_); | 3427 new_trace.set_characters_preloaded(preload->preload_characters_); |
3478 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3428 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
3479 generate_full_check_inline = true; | 3429 generate_full_check_inline = true; |
3480 } | 3430 } |
3481 } else if (alt_gen->quick_check_details.cannot_match()) { | 3431 } else if (alt_gen->quick_check_details.cannot_match()) { |
3482 if (!fall_through_on_failure) { | 3432 if (!fall_through_on_failure) { |
3483 macro_assembler->GoTo(trace->backtrack()); | 3433 macro_assembler->GoTo(trace->backtrack()); |
3484 } | 3434 } |
3485 continue; | 3435 continue; |
3486 } else { | 3436 } else { |
3487 // No quick check was generated. Put the full code here. | 3437 // No quick check was generated. Put the full code here. |
3488 // If this is not the first choice then there could be slow checks from | 3438 // If this is not the first choice then there could be slow checks from |
3489 // previous cases that go here when they fail. There's no reason to | 3439 // previous cases that go here when they fail. There's no reason to |
3490 // insist that they preload characters since the slow check we are about | 3440 // insist that they preload characters since the slow check we are about |
3491 // to generate probably can't use it. | 3441 // to generate probably can't use it. |
3492 if (i != first_choice) { | 3442 if (i != first_choice) { |
3493 alt_gen->expects_preload = false; | 3443 alt_gen->expects_preload = false; |
3494 new_trace.InvalidateCurrentCharacter(); | 3444 new_trace.InvalidateCurrentCharacter(); |
3495 } | 3445 } |
3496 generate_full_check_inline = true; | 3446 generate_full_check_inline = true; |
3497 } | 3447 } |
3498 if (generate_full_check_inline) { | 3448 if (generate_full_check_inline) { |
3499 if (new_trace.actions() != NULL) { | 3449 if (new_trace.actions() != NULL) { |
3500 new_trace.set_flush_budget(new_flush_budget); | 3450 new_trace.set_flush_budget(new_flush_budget); |
3501 } | 3451 } |
3502 for (intptr_t j = 0; j < guard_count; j++) { | 3452 for (int j = 0; j < guard_count; j++) { |
3503 GenerateGuard(macro_assembler, guards->At(j), &new_trace); | 3453 GenerateGuard(macro_assembler, guards->at(j), &new_trace); |
3504 } | 3454 } |
3505 alternative.node()->Emit(compiler, &new_trace); | 3455 alternative.node()->Emit(compiler, &new_trace); |
3506 preload->preload_is_current_ = false; | 3456 preload->preload_is_current_ = false; |
3507 } | 3457 } |
3508 macro_assembler->BindBlock(&alt_gen->after); | 3458 macro_assembler->Bind(&alt_gen->after); |
3509 } | 3459 } |
3510 } | 3460 } |
3511 | 3461 |
3512 | 3462 |
3513 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, | 3463 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, |
3514 Trace* trace, | 3464 Trace* trace, |
3515 GuardedAlternative alternative, | 3465 GuardedAlternative alternative, |
3516 AlternativeGeneration* alt_gen, | 3466 AlternativeGeneration* alt_gen, |
3517 intptr_t preload_characters, | 3467 int preload_characters, |
3518 bool next_expects_preload) { | 3468 bool next_expects_preload) { |
3519 if (!alt_gen->possible_success.IsLinked()) return; | 3469 if (!alt_gen->possible_success.is_linked()) return; |
3520 | 3470 |
3521 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3471 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3522 macro_assembler->BindBlock(&alt_gen->possible_success); | 3472 macro_assembler->Bind(&alt_gen->possible_success); |
3523 Trace out_of_line_trace(*trace); | 3473 Trace out_of_line_trace(*trace); |
3524 out_of_line_trace.set_characters_preloaded(preload_characters); | 3474 out_of_line_trace.set_characters_preloaded(preload_characters); |
3525 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3475 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
3526 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); | 3476 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); |
3527 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3477 ZoneList<Guard*>* guards = alternative.guards(); |
3528 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3478 int guard_count = (guards == NULL) ? 0 : guards->length(); |
3529 if (next_expects_preload) { | 3479 if (next_expects_preload) { |
3530 BlockLabel reload_current_char; | 3480 Label reload_current_char; |
3531 out_of_line_trace.set_backtrack(&reload_current_char); | 3481 out_of_line_trace.set_backtrack(&reload_current_char); |
3532 for (intptr_t j = 0; j < guard_count; j++) { | 3482 for (int j = 0; j < guard_count; j++) { |
3533 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3483 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); |
3534 } | 3484 } |
3535 alternative.node()->Emit(compiler, &out_of_line_trace); | 3485 alternative.node()->Emit(compiler, &out_of_line_trace); |
3536 macro_assembler->BindBlock(&reload_current_char); | 3486 macro_assembler->Bind(&reload_current_char); |
3537 // Reload the current character, since the next quick check expects that. | 3487 // Reload the current character, since the next quick check expects that. |
3538 // We don't need to check bounds here because we only get into this | 3488 // We don't need to check bounds here because we only get into this |
3539 // code through a quick check which already did the checked load. | 3489 // code through a quick check which already did the checked load. |
3540 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), | 3490 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), |
3541 NULL, | 3491 NULL, |
3542 false, | 3492 false, |
3543 preload_characters); | 3493 preload_characters); |
3544 macro_assembler->GoTo(&(alt_gen->after)); | 3494 macro_assembler->GoTo(&(alt_gen->after)); |
3545 } else { | 3495 } else { |
3546 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3496 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
3547 for (intptr_t j = 0; j < guard_count; j++) { | 3497 for (int j = 0; j < guard_count; j++) { |
3548 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3498 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); |
3549 } | 3499 } |
3550 alternative.node()->Emit(compiler, &out_of_line_trace); | 3500 alternative.node()->Emit(compiler, &out_of_line_trace); |
3551 } | 3501 } |
3552 } | 3502 } |
3553 | 3503 |
3554 | 3504 |
3555 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3505 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3556 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3506 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3557 LimitResult limit_result = LimitVersions(compiler, trace); | 3507 LimitResult limit_result = LimitVersions(compiler, trace); |
3558 if (limit_result == DONE) return; | 3508 if (limit_result == DONE) return; |
3559 ASSERT(limit_result == CONTINUE); | 3509 DCHECK(limit_result == CONTINUE); |
3560 | 3510 |
3561 RecursionCheck rc(compiler); | 3511 RecursionCheck rc(compiler); |
3562 | 3512 |
3563 switch (action_type_) { | 3513 switch (action_type_) { |
3564 case STORE_POSITION: { | 3514 case STORE_POSITION: { |
3565 Trace::DeferredCapture | 3515 Trace::DeferredCapture |
3566 new_capture(data_.u_position_register.reg, | 3516 new_capture(data_.u_position_register.reg, |
3567 data_.u_position_register.is_capture, | 3517 data_.u_position_register.is_capture, |
3568 trace); | 3518 trace); |
3569 Trace new_trace = *trace; | 3519 Trace new_trace = *trace; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3601 trace->Flush(compiler, this); | 3551 trace->Flush(compiler, this); |
3602 } else { | 3552 } else { |
3603 assembler->WriteCurrentPositionToRegister( | 3553 assembler->WriteCurrentPositionToRegister( |
3604 data_.u_submatch.current_position_register, 0); | 3554 data_.u_submatch.current_position_register, 0); |
3605 assembler->WriteStackPointerToRegister( | 3555 assembler->WriteStackPointerToRegister( |
3606 data_.u_submatch.stack_pointer_register); | 3556 data_.u_submatch.stack_pointer_register); |
3607 on_success()->Emit(compiler, trace); | 3557 on_success()->Emit(compiler, trace); |
3608 } | 3558 } |
3609 break; | 3559 break; |
3610 case EMPTY_MATCH_CHECK: { | 3560 case EMPTY_MATCH_CHECK: { |
3611 intptr_t start_pos_reg = data_.u_empty_match_check.start_register; | 3561 int start_pos_reg = data_.u_empty_match_check.start_register; |
3612 intptr_t stored_pos = 0; | 3562 int stored_pos = 0; |
3613 intptr_t rep_reg = data_.u_empty_match_check.repetition_register; | 3563 int rep_reg = data_.u_empty_match_check.repetition_register; |
3614 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); | 3564 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); |
3615 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); | 3565 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); |
3616 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { | 3566 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { |
3617 // If we know we haven't advanced and there is no minimum we | 3567 // If we know we haven't advanced and there is no minimum we |
3618 // can just backtrack immediately. | 3568 // can just backtrack immediately. |
3619 assembler->GoTo(trace->backtrack()); | 3569 assembler->GoTo(trace->backtrack()); |
3620 } else if (know_dist && stored_pos < trace->cp_offset()) { | 3570 } else if (know_dist && stored_pos < trace->cp_offset()) { |
3621 // If we know we've advanced we can generate the continuation | 3571 // If we know we've advanced we can generate the continuation |
3622 // immediately. | 3572 // immediately. |
3623 on_success()->Emit(compiler, trace); | 3573 on_success()->Emit(compiler, trace); |
3624 } else if (!trace->is_trivial()) { | 3574 } else if (!trace->is_trivial()) { |
3625 trace->Flush(compiler, this); | 3575 trace->Flush(compiler, this); |
3626 } else { | 3576 } else { |
3627 BlockLabel skip_empty_check; | 3577 Label skip_empty_check; |
3628 // If we have a minimum number of repetitions we check the current | 3578 // If we have a minimum number of repetitions we check the current |
3629 // number first and skip the empty check if it's not enough. | 3579 // number first and skip the empty check if it's not enough. |
3630 if (has_minimum) { | 3580 if (has_minimum) { |
3631 intptr_t limit = data_.u_empty_match_check.repetition_limit; | 3581 int limit = data_.u_empty_match_check.repetition_limit; |
3632 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); | 3582 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); |
3633 } | 3583 } |
3634 // If the match is empty we bail out, otherwise we fall through | 3584 // If the match is empty we bail out, otherwise we fall through |
3635 // to the on-success continuation. | 3585 // to the on-success continuation. |
3636 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, | 3586 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, |
3637 trace->backtrack()); | 3587 trace->backtrack()); |
3638 assembler->BindBlock(&skip_empty_check); | 3588 assembler->Bind(&skip_empty_check); |
3639 on_success()->Emit(compiler, trace); | 3589 on_success()->Emit(compiler, trace); |
3640 } | 3590 } |
3641 break; | 3591 break; |
3642 } | 3592 } |
3643 case POSITIVE_SUBMATCH_SUCCESS: { | 3593 case POSITIVE_SUBMATCH_SUCCESS: { |
3644 if (!trace->is_trivial()) { | 3594 if (!trace->is_trivial()) { |
3645 trace->Flush(compiler, this); | 3595 trace->Flush(compiler, this); |
3646 return; | 3596 return; |
3647 } | 3597 } |
3648 assembler->ReadCurrentPositionFromRegister( | 3598 assembler->ReadCurrentPositionFromRegister( |
3649 data_.u_submatch.current_position_register); | 3599 data_.u_submatch.current_position_register); |
3650 assembler->ReadStackPointerFromRegister( | 3600 assembler->ReadStackPointerFromRegister( |
3651 data_.u_submatch.stack_pointer_register); | 3601 data_.u_submatch.stack_pointer_register); |
3652 intptr_t clear_register_count = data_.u_submatch.clear_register_count; | 3602 int clear_register_count = data_.u_submatch.clear_register_count; |
3653 if (clear_register_count == 0) { | 3603 if (clear_register_count == 0) { |
3654 on_success()->Emit(compiler, trace); | 3604 on_success()->Emit(compiler, trace); |
3655 return; | 3605 return; |
3656 } | 3606 } |
3657 intptr_t clear_registers_from = data_.u_submatch.clear_register_from; | 3607 int clear_registers_from = data_.u_submatch.clear_register_from; |
3658 BlockLabel clear_registers_backtrack; | 3608 Label clear_registers_backtrack; |
3659 Trace new_trace = *trace; | 3609 Trace new_trace = *trace; |
3660 new_trace.set_backtrack(&clear_registers_backtrack); | 3610 new_trace.set_backtrack(&clear_registers_backtrack); |
3661 on_success()->Emit(compiler, &new_trace); | 3611 on_success()->Emit(compiler, &new_trace); |
3662 | 3612 |
3663 assembler->BindBlock(&clear_registers_backtrack); | 3613 assembler->Bind(&clear_registers_backtrack); |
3664 intptr_t clear_registers_to = | 3614 int clear_registers_to = clear_registers_from + clear_register_count - 1; |
3665 clear_registers_from + clear_register_count - 1; | |
3666 assembler->ClearRegisters(clear_registers_from, clear_registers_to); | 3615 assembler->ClearRegisters(clear_registers_from, clear_registers_to); |
3667 | 3616 |
3668 ASSERT(trace->backtrack() == NULL); | 3617 DCHECK(trace->backtrack() == NULL); |
3669 assembler->Backtrack(); | 3618 assembler->Backtrack(); |
3670 return; | 3619 return; |
3671 } | 3620 } |
3672 default: | 3621 default: |
3673 UNREACHABLE(); | 3622 UNREACHABLE(); |
3674 } | 3623 } |
3675 } | 3624 } |
3676 | 3625 |
3677 | 3626 |
3678 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3627 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3679 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3628 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3680 if (!trace->is_trivial()) { | 3629 if (!trace->is_trivial()) { |
3681 trace->Flush(compiler, this); | 3630 trace->Flush(compiler, this); |
3682 return; | 3631 return; |
3683 } | 3632 } |
3684 | 3633 |
3685 LimitResult limit_result = LimitVersions(compiler, trace); | 3634 LimitResult limit_result = LimitVersions(compiler, trace); |
3686 if (limit_result == DONE) return; | 3635 if (limit_result == DONE) return; |
3687 ASSERT(limit_result == CONTINUE); | 3636 DCHECK(limit_result == CONTINUE); |
3688 | 3637 |
3689 RecursionCheck rc(compiler); | 3638 RecursionCheck rc(compiler); |
3690 | 3639 |
3691 ASSERT(start_reg_ + 1 == end_reg_); | 3640 DCHECK_EQ(start_reg_ + 1, end_reg_); |
3692 if (compiler->ignore_case()) { | 3641 if (compiler->ignore_case()) { |
3693 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 3642 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
3694 trace->backtrack()); | 3643 trace->backtrack()); |
3695 } else { | 3644 } else { |
3696 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3645 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
3697 } | 3646 } |
3698 on_success()->Emit(compiler, trace); | 3647 on_success()->Emit(compiler, trace); |
3699 } | 3648 } |
3700 | 3649 |
3701 | 3650 |
3702 // ------------------------------------------------------------------- | 3651 // ------------------------------------------------------------------- |
3703 // Dot/dotty output | 3652 // Dot/dotty output |
3704 | 3653 |
3705 | 3654 |
3706 #ifdef DEBUG | 3655 #ifdef DEBUG |
3707 | 3656 |
3708 | 3657 |
3709 class DotPrinter: public NodeVisitor { | 3658 class DotPrinter: public NodeVisitor { |
3710 public: | 3659 public: |
3711 explicit DotPrinter(bool ignore_case) | 3660 DotPrinter(OStream& os, bool ignore_case) // NOLINT |
3712 : ignore_case_(ignore_case) {} | 3661 : os_(os), |
| 3662 ignore_case_(ignore_case) {} |
3713 void PrintNode(const char* label, RegExpNode* node); | 3663 void PrintNode(const char* label, RegExpNode* node); |
3714 void Visit(RegExpNode* node); | 3664 void Visit(RegExpNode* node); |
3715 void PrintAttributes(RegExpNode* from); | 3665 void PrintAttributes(RegExpNode* from); |
3716 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3666 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
3717 #define DECLARE_VISIT(Type) \ | 3667 #define DECLARE_VISIT(Type) \ |
3718 virtual void Visit##Type(Type##Node* that); | 3668 virtual void Visit##Type(Type##Node* that); |
3719 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 3669 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
3720 #undef DECLARE_VISIT | 3670 #undef DECLARE_VISIT |
3721 private: | 3671 private: |
| 3672 OStream& os_; |
3722 bool ignore_case_; | 3673 bool ignore_case_; |
3723 }; | 3674 }; |
3724 | 3675 |
3725 | 3676 |
3726 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3677 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
3727 OS::Print("digraph G {\n graph [label=\""); | 3678 os_ << "digraph G {\n graph [label=\""; |
3728 for (intptr_t i = 0; label[i]; i++) { | 3679 for (int i = 0; label[i]; i++) { |
3729 switch (label[i]) { | 3680 switch (label[i]) { |
3730 case '\\': | 3681 case '\\': |
3731 OS::Print("\\\\"); | 3682 os_ << "\\\\"; |
3732 break; | 3683 break; |
3733 case '"': | 3684 case '"': |
3734 OS::Print("\""); | 3685 os_ << "\""; |
3735 break; | 3686 break; |
3736 default: | 3687 default: |
3737 OS::Print("%c", label[i]); | 3688 os_ << label[i]; |
3738 break; | 3689 break; |
3739 } | 3690 } |
3740 } | 3691 } |
3741 OS::Print("\"];\n"); | 3692 os_ << "\"];\n"; |
3742 Visit(node); | 3693 Visit(node); |
3743 OS::Print("}\n"); | 3694 os_ << "}" << endl; |
3744 } | 3695 } |
3745 | 3696 |
3746 | 3697 |
3747 void DotPrinter::Visit(RegExpNode* node) { | 3698 void DotPrinter::Visit(RegExpNode* node) { |
3748 if (node->info()->visited) return; | 3699 if (node->info()->visited) return; |
3749 node->info()->visited = true; | 3700 node->info()->visited = true; |
3750 node->Accept(this); | 3701 node->Accept(this); |
3751 } | 3702 } |
3752 | 3703 |
3753 | 3704 |
3754 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 3705 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
3755 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); | 3706 os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n"; |
3756 Visit(on_failure); | 3707 Visit(on_failure); |
3757 } | 3708 } |
3758 | 3709 |
3759 | 3710 |
3760 class AttributePrinter : public ValueObject { | 3711 class TableEntryBodyPrinter { |
3761 public: | 3712 public: |
3762 AttributePrinter() : first_(true) {} | 3713 TableEntryBodyPrinter(OStream& os, ChoiceNode* choice) // NOLINT |
| 3714 : os_(os), |
| 3715 choice_(choice) {} |
| 3716 void Call(uc16 from, DispatchTable::Entry entry) { |
| 3717 OutSet* out_set = entry.out_set(); |
| 3718 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 3719 if (out_set->Get(i)) { |
| 3720 os_ << " n" << choice() << ":s" << from << "o" << i << " -> n" |
| 3721 << choice()->alternatives()->at(i).node() << ";\n"; |
| 3722 } |
| 3723 } |
| 3724 } |
| 3725 private: |
| 3726 ChoiceNode* choice() { return choice_; } |
| 3727 OStream& os_; |
| 3728 ChoiceNode* choice_; |
| 3729 }; |
| 3730 |
| 3731 |
| 3732 class TableEntryHeaderPrinter { |
| 3733 public: |
| 3734 explicit TableEntryHeaderPrinter(OStream& os) // NOLINT |
| 3735 : first_(true), |
| 3736 os_(os) {} |
| 3737 void Call(uc16 from, DispatchTable::Entry entry) { |
| 3738 if (first_) { |
| 3739 first_ = false; |
| 3740 } else { |
| 3741 os_ << "|"; |
| 3742 } |
| 3743 os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{"; |
| 3744 OutSet* out_set = entry.out_set(); |
| 3745 int priority = 0; |
| 3746 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 3747 if (out_set->Get(i)) { |
| 3748 if (priority > 0) os_ << "|"; |
| 3749 os_ << "<s" << from << "o" << i << "> " << priority; |
| 3750 priority++; |
| 3751 } |
| 3752 } |
| 3753 os_ << "}}"; |
| 3754 } |
| 3755 |
| 3756 private: |
| 3757 bool first_; |
| 3758 OStream& os_; |
| 3759 }; |
| 3760 |
| 3761 |
| 3762 class AttributePrinter { |
| 3763 public: |
| 3764 explicit AttributePrinter(OStream& os) // NOLINT |
| 3765 : os_(os), |
| 3766 first_(true) {} |
3763 void PrintSeparator() { | 3767 void PrintSeparator() { |
3764 if (first_) { | 3768 if (first_) { |
3765 first_ = false; | 3769 first_ = false; |
3766 } else { | 3770 } else { |
3767 OS::Print("|"); | 3771 os_ << "|"; |
3768 } | 3772 } |
3769 } | 3773 } |
3770 void PrintBit(const char* name, bool value) { | 3774 void PrintBit(const char* name, bool value) { |
3771 if (!value) return; | 3775 if (!value) return; |
3772 PrintSeparator(); | 3776 PrintSeparator(); |
3773 OS::Print("{%s}", name); | 3777 os_ << "{" << name << "}"; |
3774 } | 3778 } |
3775 void PrintPositive(const char* name, intptr_t value) { | 3779 void PrintPositive(const char* name, int value) { |
3776 if (value < 0) return; | 3780 if (value < 0) return; |
3777 PrintSeparator(); | 3781 PrintSeparator(); |
3778 OS::Print("{%s|%" Pd "}", name, value); | 3782 os_ << "{" << name << "|" << value << "}"; |
3779 } | 3783 } |
3780 | 3784 |
3781 private: | 3785 private: |
| 3786 OStream& os_; |
3782 bool first_; | 3787 bool first_; |
3783 }; | 3788 }; |
3784 | 3789 |
3785 | 3790 |
3786 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3791 void DotPrinter::PrintAttributes(RegExpNode* that) { |
3787 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " | 3792 os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, " |
3788 "margin=0.1, fontsize=10, label=\"{", that); | 3793 << "margin=0.1, fontsize=10, label=\"{"; |
3789 AttributePrinter printer; | 3794 AttributePrinter printer(os_); |
3790 NodeInfo* info = that->info(); | 3795 NodeInfo* info = that->info(); |
3791 printer.PrintBit("NI", info->follows_newline_interest); | 3796 printer.PrintBit("NI", info->follows_newline_interest); |
3792 printer.PrintBit("WI", info->follows_word_interest); | 3797 printer.PrintBit("WI", info->follows_word_interest); |
3793 printer.PrintBit("SI", info->follows_start_interest); | 3798 printer.PrintBit("SI", info->follows_start_interest); |
3794 BlockLabel* label = that->label(); | 3799 Label* label = that->label(); |
3795 if (label->IsBound()) | 3800 if (label->is_bound()) |
3796 printer.PrintPositive("@", label->Position()); | 3801 printer.PrintPositive("@", label->pos()); |
3797 OS::Print("}\"];\n" | 3802 os_ << "}\"];\n" |
3798 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", | 3803 << " a" << that << " -> n" << that |
3799 that, that); | 3804 << " [style=dashed, color=grey, arrowhead=none];\n"; |
3800 } | 3805 } |
3801 | 3806 |
3802 | 3807 |
| 3808 static const bool kPrintDispatchTable = false; |
3803 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3809 void DotPrinter::VisitChoice(ChoiceNode* that) { |
3804 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 3810 if (kPrintDispatchTable) { |
3805 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3811 os_ << " n" << that << " [shape=Mrecord, label=\""; |
3806 GuardedAlternative alt = that->alternatives()->At(i); | 3812 TableEntryHeaderPrinter header_printer(os_); |
3807 OS::Print(" n%p -> n%p", that, alt.node()); | 3813 that->GetTable(ignore_case_)->ForEach(&header_printer); |
3808 } | 3814 os_ << "\"]\n"; |
3809 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3815 PrintAttributes(that); |
3810 GuardedAlternative alt = that->alternatives()->At(i); | 3816 TableEntryBodyPrinter body_printer(os_, that); |
| 3817 that->GetTable(ignore_case_)->ForEach(&body_printer); |
| 3818 } else { |
| 3819 os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n"; |
| 3820 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 3821 GuardedAlternative alt = that->alternatives()->at(i); |
| 3822 os_ << " n" << that << " -> n" << alt.node(); |
| 3823 } |
| 3824 } |
| 3825 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 3826 GuardedAlternative alt = that->alternatives()->at(i); |
3811 alt.node()->Accept(this); | 3827 alt.node()->Accept(this); |
3812 } | 3828 } |
3813 } | 3829 } |
3814 | 3830 |
3815 | 3831 |
3816 void DotPrinter::VisitText(TextNode* that) { | 3832 void DotPrinter::VisitText(TextNode* that) { |
3817 OS::Print(" n%p [label=\"", that); | 3833 Zone* zone = that->zone(); |
3818 for (intptr_t i = 0; i < that->elements()->length(); i++) { | 3834 os_ << " n" << that << " [label=\""; |
3819 if (i > 0) OS::Print(" "); | 3835 for (int i = 0; i < that->elements()->length(); i++) { |
3820 TextElement elm = that->elements()->At(i); | 3836 if (i > 0) os_ << " "; |
| 3837 TextElement elm = that->elements()->at(i); |
3821 switch (elm.text_type()) { | 3838 switch (elm.text_type()) { |
3822 case TextElement::ATOM: { | 3839 case TextElement::ATOM: { |
3823 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); | 3840 Vector<const uc16> data = elm.atom()->data(); |
3824 for (intptr_t i = 0; i < data->length(); i++) { | 3841 for (int i = 0; i < data.length(); i++) { |
3825 OS::Print("%c", static_cast<char>(data->At(i))); | 3842 os_ << static_cast<char>(data[i]); |
3826 } | 3843 } |
3827 break; | 3844 break; |
3828 } | 3845 } |
3829 case TextElement::CHAR_CLASS: { | 3846 case TextElement::CHAR_CLASS: { |
3830 RegExpCharacterClass* node = elm.char_class(); | 3847 RegExpCharacterClass* node = elm.char_class(); |
3831 OS::Print("["); | 3848 os_ << "["; |
3832 if (node->is_negated()) OS::Print("^"); | 3849 if (node->is_negated()) os_ << "^"; |
3833 for (intptr_t j = 0; j < node->ranges()->length(); j++) { | 3850 for (int j = 0; j < node->ranges(zone)->length(); j++) { |
3834 CharacterRange range = node->ranges()->At(j); | 3851 CharacterRange range = node->ranges(zone)->at(j); |
3835 PrintUtf16(range.from()); | 3852 os_ << AsUC16(range.from()) << "-" << AsUC16(range.to()); |
3836 OS::Print("-"); | |
3837 PrintUtf16(range.to()); | |
3838 } | 3853 } |
3839 OS::Print("]"); | 3854 os_ << "]"; |
3840 break; | 3855 break; |
3841 } | 3856 } |
3842 default: | 3857 default: |
3843 UNREACHABLE(); | 3858 UNREACHABLE(); |
3844 } | 3859 } |
3845 } | 3860 } |
3846 OS::Print("\", shape=box, peripheries=2];\n"); | 3861 os_ << "\", shape=box, peripheries=2];\n"; |
3847 PrintAttributes(that); | 3862 PrintAttributes(that); |
3848 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3863 os_ << " n" << that << " -> n" << that->on_success() << ";\n"; |
3849 Visit(that->on_success()); | 3864 Visit(that->on_success()); |
3850 } | 3865 } |
3851 | 3866 |
3852 | 3867 |
3853 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 3868 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
3854 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", | 3869 os_ << " n" << that << " [label=\"$" << that->start_register() << "..$" |
3855 that, that->start_register(), that->end_register()); | 3870 << that->end_register() << "\", shape=doubleoctagon];\n"; |
3856 PrintAttributes(that); | 3871 PrintAttributes(that); |
3857 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3872 os_ << " n" << that << " -> n" << that->on_success() << ";\n"; |
3858 Visit(that->on_success()); | 3873 Visit(that->on_success()); |
3859 } | 3874 } |
3860 | 3875 |
3861 | 3876 |
3862 void DotPrinter::VisitEnd(EndNode* that) { | 3877 void DotPrinter::VisitEnd(EndNode* that) { |
3863 OS::Print(" n%p [style=bold, shape=point];\n", that); | 3878 os_ << " n" << that << " [style=bold, shape=point];\n"; |
3864 PrintAttributes(that); | 3879 PrintAttributes(that); |
3865 } | 3880 } |
3866 | 3881 |
3867 | 3882 |
3868 void DotPrinter::VisitAssertion(AssertionNode* that) { | 3883 void DotPrinter::VisitAssertion(AssertionNode* that) { |
3869 OS::Print(" n%p [", that); | 3884 os_ << " n" << that << " ["; |
3870 switch (that->assertion_type()) { | 3885 switch (that->assertion_type()) { |
3871 case AssertionNode::AT_END: | 3886 case AssertionNode::AT_END: |
3872 OS::Print("label=\"$\", shape=septagon"); | 3887 os_ << "label=\"$\", shape=septagon"; |
3873 break; | 3888 break; |
3874 case AssertionNode::AT_START: | 3889 case AssertionNode::AT_START: |
3875 OS::Print("label=\"^\", shape=septagon"); | 3890 os_ << "label=\"^\", shape=septagon"; |
3876 break; | 3891 break; |
3877 case AssertionNode::AT_BOUNDARY: | 3892 case AssertionNode::AT_BOUNDARY: |
3878 OS::Print("label=\"\\b\", shape=septagon"); | 3893 os_ << "label=\"\\b\", shape=septagon"; |
3879 break; | 3894 break; |
3880 case AssertionNode::AT_NON_BOUNDARY: | 3895 case AssertionNode::AT_NON_BOUNDARY: |
3881 OS::Print("label=\"\\B\", shape=septagon"); | 3896 os_ << "label=\"\\B\", shape=septagon"; |
3882 break; | 3897 break; |
3883 case AssertionNode::AFTER_NEWLINE: | 3898 case AssertionNode::AFTER_NEWLINE: |
3884 OS::Print("label=\"(?<=\\n)\", shape=septagon"); | 3899 os_ << "label=\"(?<=\\n)\", shape=septagon"; |
3885 break; | 3900 break; |
3886 } | 3901 } |
3887 OS::Print("];\n"); | 3902 os_ << "];\n"; |
3888 PrintAttributes(that); | 3903 PrintAttributes(that); |
3889 RegExpNode* successor = that->on_success(); | 3904 RegExpNode* successor = that->on_success(); |
3890 OS::Print(" n%p -> n%p;\n", that, successor); | 3905 os_ << " n" << that << " -> n" << successor << ";\n"; |
3891 Visit(successor); | 3906 Visit(successor); |
3892 } | 3907 } |
3893 | 3908 |
3894 | 3909 |
3895 void DotPrinter::VisitAction(ActionNode* that) { | 3910 void DotPrinter::VisitAction(ActionNode* that) { |
3896 OS::Print(" n%p [", that); | 3911 os_ << " n" << that << " ["; |
3897 switch (that->action_type_) { | 3912 switch (that->action_type_) { |
3898 case ActionNode::SET_REGISTER: | 3913 case ActionNode::SET_REGISTER: |
3899 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", | 3914 os_ << "label=\"$" << that->data_.u_store_register.reg |
3900 that->data_.u_store_register.reg, | 3915 << ":=" << that->data_.u_store_register.value << "\", shape=octagon"; |
3901 that->data_.u_store_register.value); | |
3902 break; | 3916 break; |
3903 case ActionNode::INCREMENT_REGISTER: | 3917 case ActionNode::INCREMENT_REGISTER: |
3904 OS::Print("label=\"$%" Pd "++\", shape=octagon", | 3918 os_ << "label=\"$" << that->data_.u_increment_register.reg |
3905 that->data_.u_increment_register.reg); | 3919 << "++\", shape=octagon"; |
3906 break; | 3920 break; |
3907 case ActionNode::STORE_POSITION: | 3921 case ActionNode::STORE_POSITION: |
3908 OS::Print("label=\"$%" Pd ":=$pos\", shape=octagon", | 3922 os_ << "label=\"$" << that->data_.u_position_register.reg |
3909 that->data_.u_position_register.reg); | 3923 << ":=$pos\", shape=octagon"; |
3910 break; | 3924 break; |
3911 case ActionNode::BEGIN_SUBMATCH: | 3925 case ActionNode::BEGIN_SUBMATCH: |
3912 OS::Print("label=\"$%" Pd ":=$pos,begin\", shape=septagon", | 3926 os_ << "label=\"$" << that->data_.u_submatch.current_position_register |
3913 that->data_.u_submatch.current_position_register); | 3927 << ":=$pos,begin\", shape=septagon"; |
3914 break; | 3928 break; |
3915 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: | 3929 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: |
3916 OS::Print("label=\"escape\", shape=septagon"); | 3930 os_ << "label=\"escape\", shape=septagon"; |
3917 break; | 3931 break; |
3918 case ActionNode::EMPTY_MATCH_CHECK: | 3932 case ActionNode::EMPTY_MATCH_CHECK: |
3919 OS::Print("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon", | 3933 os_ << "label=\"$" << that->data_.u_empty_match_check.start_register |
3920 that->data_.u_empty_match_check.start_register, | 3934 << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register |
3921 that->data_.u_empty_match_check.repetition_register, | 3935 << "<" << that->data_.u_empty_match_check.repetition_limit |
3922 that->data_.u_empty_match_check.repetition_limit); | 3936 << "?\", shape=septagon"; |
3923 break; | 3937 break; |
3924 case ActionNode::CLEAR_CAPTURES: { | 3938 case ActionNode::CLEAR_CAPTURES: { |
3925 OS::Print("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon", | 3939 os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from |
3926 that->data_.u_clear_captures.range_from, | 3940 << " to $" << that->data_.u_clear_captures.range_to |
3927 that->data_.u_clear_captures.range_to); | 3941 << "\", shape=septagon"; |
3928 break; | 3942 break; |
3929 } | 3943 } |
3930 } | 3944 } |
3931 OS::Print("];\n"); | 3945 os_ << "];\n"; |
3932 PrintAttributes(that); | 3946 PrintAttributes(that); |
3933 RegExpNode* successor = that->on_success(); | 3947 RegExpNode* successor = that->on_success(); |
3934 OS::Print(" n%p -> n%p;\n", that, successor); | 3948 os_ << " n" << that << " -> n" << successor << ";\n"; |
3935 Visit(successor); | 3949 Visit(successor); |
3936 } | 3950 } |
3937 | 3951 |
3938 | 3952 |
| 3953 class DispatchTableDumper { |
| 3954 public: |
| 3955 explicit DispatchTableDumper(OStream& os) : os_(os) {} |
| 3956 void Call(uc16 key, DispatchTable::Entry entry); |
| 3957 private: |
| 3958 OStream& os_; |
| 3959 }; |
| 3960 |
| 3961 |
| 3962 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) { |
| 3963 os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {"; |
| 3964 OutSet* set = entry.out_set(); |
| 3965 bool first = true; |
| 3966 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 3967 if (set->Get(i)) { |
| 3968 if (first) { |
| 3969 first = false; |
| 3970 } else { |
| 3971 os_ << ", "; |
| 3972 } |
| 3973 os_ << i; |
| 3974 } |
| 3975 } |
| 3976 os_ << "}\n"; |
| 3977 } |
| 3978 |
| 3979 |
| 3980 void DispatchTable::Dump() { |
| 3981 OFStream os(stderr); |
| 3982 DispatchTableDumper dumper(os); |
| 3983 tree()->ForEach(&dumper); |
| 3984 } |
| 3985 |
| 3986 |
3939 void RegExpEngine::DotPrint(const char* label, | 3987 void RegExpEngine::DotPrint(const char* label, |
3940 RegExpNode* node, | 3988 RegExpNode* node, |
3941 bool ignore_case) { | 3989 bool ignore_case) { |
3942 DotPrinter printer(ignore_case); | 3990 OFStream os(stdout); |
| 3991 DotPrinter printer(os, ignore_case); |
3943 printer.PrintNode(label, node); | 3992 printer.PrintNode(label, node); |
3944 } | 3993 } |
3945 | 3994 |
3946 | 3995 |
3947 #endif // DEBUG | 3996 #endif // DEBUG |
3948 | 3997 |
3949 | 3998 |
3950 // ------------------------------------------------------------------- | 3999 // ------------------------------------------------------------------- |
3951 // Tree to graph conversion | 4000 // Tree to graph conversion |
3952 | 4001 |
3953 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 4002 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
3954 RegExpNode* on_success) { | 4003 RegExpNode* on_success) { |
3955 ZoneGrowableArray<TextElement>* elms = | 4004 ZoneList<TextElement>* elms = |
3956 new(CI) ZoneGrowableArray<TextElement>(1); | 4005 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); |
3957 elms->Add(TextElement::Atom(this)); | 4006 elms->Add(TextElement::Atom(this), compiler->zone()); |
3958 return new(CI) TextNode(elms, on_success); | 4007 return new(compiler->zone()) TextNode(elms, on_success); |
3959 } | 4008 } |
3960 | 4009 |
3961 | 4010 |
3962 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 4011 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
3963 RegExpNode* on_success) { | 4012 RegExpNode* on_success) { |
3964 ZoneGrowableArray<TextElement>* elms = | 4013 return new(compiler->zone()) TextNode(elements(), on_success); |
3965 new(CI) ZoneGrowableArray<TextElement>(1); | |
3966 for (intptr_t i = 0; i < elements()->length(); i++) { | |
3967 elms->Add(elements()->At(i)); | |
3968 } | |
3969 return new(CI) TextNode(elms, on_success); | |
3970 } | 4014 } |
3971 | 4015 |
3972 | 4016 |
3973 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, | 4017 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, |
3974 const intptr_t* special_class, | 4018 const int* special_class, |
3975 intptr_t length) { | 4019 int length) { |
3976 length--; // Remove final 0x10000. | 4020 length--; // Remove final 0x10000. |
3977 ASSERT(special_class[length] == 0x10000); | 4021 DCHECK(special_class[length] == 0x10000); |
3978 ASSERT(ranges->length() != 0); | 4022 DCHECK(ranges->length() != 0); |
3979 ASSERT(length != 0); | 4023 DCHECK(length != 0); |
3980 ASSERT(special_class[0] != 0); | 4024 DCHECK(special_class[0] != 0); |
3981 if (ranges->length() != (length >> 1) + 1) { | 4025 if (ranges->length() != (length >> 1) + 1) { |
3982 return false; | 4026 return false; |
3983 } | 4027 } |
3984 CharacterRange range = ranges->At(0); | 4028 CharacterRange range = ranges->at(0); |
3985 if (range.from() != 0) { | 4029 if (range.from() != 0) { |
3986 return false; | 4030 return false; |
3987 } | 4031 } |
3988 for (intptr_t i = 0; i < length; i += 2) { | 4032 for (int i = 0; i < length; i += 2) { |
3989 if (special_class[i] != (range.to() + 1)) { | 4033 if (special_class[i] != (range.to() + 1)) { |
3990 return false; | 4034 return false; |
3991 } | 4035 } |
3992 range = ranges->At((i >> 1) + 1); | 4036 range = ranges->at((i >> 1) + 1); |
3993 if (special_class[i+1] != range.from()) { | 4037 if (special_class[i+1] != range.from()) { |
3994 return false; | 4038 return false; |
3995 } | 4039 } |
3996 } | 4040 } |
3997 if (range.to() != 0xffff) { | 4041 if (range.to() != 0xffff) { |
3998 return false; | 4042 return false; |
3999 } | 4043 } |
4000 return true; | 4044 return true; |
4001 } | 4045 } |
4002 | 4046 |
4003 | 4047 |
4004 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, | 4048 static bool CompareRanges(ZoneList<CharacterRange>* ranges, |
4005 const intptr_t* special_class, | 4049 const int* special_class, |
4006 intptr_t length) { | 4050 int length) { |
4007 length--; // Remove final 0x10000. | 4051 length--; // Remove final 0x10000. |
4008 ASSERT(special_class[length] == 0x10000); | 4052 DCHECK(special_class[length] == 0x10000); |
4009 if (ranges->length() * 2 != length) { | 4053 if (ranges->length() * 2 != length) { |
4010 return false; | 4054 return false; |
4011 } | 4055 } |
4012 for (intptr_t i = 0; i < length; i += 2) { | 4056 for (int i = 0; i < length; i += 2) { |
4013 CharacterRange range = ranges->At(i >> 1); | 4057 CharacterRange range = ranges->at(i >> 1); |
4014 if (range.from() != special_class[i] || | 4058 if (range.from() != special_class[i] || |
4015 range.to() != special_class[i + 1] - 1) { | 4059 range.to() != special_class[i + 1] - 1) { |
4016 return false; | 4060 return false; |
4017 } | 4061 } |
4018 } | 4062 } |
4019 return true; | 4063 return true; |
4020 } | 4064 } |
4021 | 4065 |
4022 | 4066 |
4023 bool RegExpCharacterClass::is_standard() { | 4067 bool RegExpCharacterClass::is_standard(Zone* zone) { |
4024 // TODO(lrn): Remove need for this function, by not throwing away information | 4068 // TODO(lrn): Remove need for this function, by not throwing away information |
4025 // along the way. | 4069 // along the way. |
4026 if (is_negated_) { | 4070 if (is_negated_) { |
4027 return false; | 4071 return false; |
4028 } | 4072 } |
4029 if (set_.is_standard()) { | 4073 if (set_.is_standard()) { |
4030 return true; | 4074 return true; |
4031 } | 4075 } |
4032 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 4076 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { |
4033 set_.set_standard_set_type('s'); | 4077 set_.set_standard_set_type('s'); |
4034 return true; | 4078 return true; |
4035 } | 4079 } |
4036 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 4080 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { |
4037 set_.set_standard_set_type('S'); | 4081 set_.set_standard_set_type('S'); |
4038 return true; | 4082 return true; |
4039 } | 4083 } |
4040 if (CompareInverseRanges(set_.ranges(), | 4084 if (CompareInverseRanges(set_.ranges(zone), |
4041 kLineTerminatorRanges, | 4085 kLineTerminatorRanges, |
4042 kLineTerminatorRangeCount)) { | 4086 kLineTerminatorRangeCount)) { |
4043 set_.set_standard_set_type('.'); | 4087 set_.set_standard_set_type('.'); |
4044 return true; | 4088 return true; |
4045 } | 4089 } |
4046 if (CompareRanges(set_.ranges(), | 4090 if (CompareRanges(set_.ranges(zone), |
4047 kLineTerminatorRanges, | 4091 kLineTerminatorRanges, |
4048 kLineTerminatorRangeCount)) { | 4092 kLineTerminatorRangeCount)) { |
4049 set_.set_standard_set_type('n'); | 4093 set_.set_standard_set_type('n'); |
4050 return true; | 4094 return true; |
4051 } | 4095 } |
4052 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 4096 if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { |
4053 set_.set_standard_set_type('w'); | 4097 set_.set_standard_set_type('w'); |
4054 return true; | 4098 return true; |
4055 } | 4099 } |
4056 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 4100 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { |
4057 set_.set_standard_set_type('W'); | 4101 set_.set_standard_set_type('W'); |
4058 return true; | 4102 return true; |
4059 } | 4103 } |
4060 return false; | 4104 return false; |
4061 } | 4105 } |
4062 | 4106 |
4063 | 4107 |
4064 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4108 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4065 RegExpNode* on_success) { | 4109 RegExpNode* on_success) { |
4066 return new(CI) TextNode(this, on_success); | 4110 return new(compiler->zone()) TextNode(this, on_success); |
4067 } | 4111 } |
4068 | 4112 |
4069 | 4113 |
4070 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 4114 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
4071 RegExpNode* on_success) { | 4115 RegExpNode* on_success) { |
4072 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 4116 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
4073 intptr_t length = alternatives->length(); | 4117 int length = alternatives->length(); |
4074 ChoiceNode* result = | 4118 ChoiceNode* result = |
4075 new(CI) ChoiceNode(length, CI); | 4119 new(compiler->zone()) ChoiceNode(length, compiler->zone()); |
4076 for (intptr_t i = 0; i < length; i++) { | 4120 for (int i = 0; i < length; i++) { |
4077 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, | 4121 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
4078 on_success)); | 4122 on_success)); |
4079 result->AddAlternative(alternative); | 4123 result->AddAlternative(alternative); |
4080 } | 4124 } |
4081 return result; | 4125 return result; |
4082 } | 4126 } |
4083 | 4127 |
4084 | 4128 |
4085 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 4129 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
4086 RegExpNode* on_success) { | 4130 RegExpNode* on_success) { |
4087 return ToNode(min(), | 4131 return ToNode(min(), |
4088 max(), | 4132 max(), |
4089 is_greedy(), | 4133 is_greedy(), |
4090 body(), | 4134 body(), |
4091 compiler, | 4135 compiler, |
4092 on_success); | 4136 on_success); |
4093 } | 4137 } |
4094 | 4138 |
4095 | 4139 |
4096 // Scoped object to keep track of how much we unroll quantifier loops in the | 4140 // Scoped object to keep track of how much we unroll quantifier loops in the |
4097 // regexp graph generator. | 4141 // regexp graph generator. |
4098 class RegExpExpansionLimiter : public ValueObject { | 4142 class RegExpExpansionLimiter { |
4099 public: | 4143 public: |
4100 static const intptr_t kMaxExpansionFactor = 6; | 4144 static const int kMaxExpansionFactor = 6; |
4101 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) | 4145 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) |
4102 : compiler_(compiler), | 4146 : compiler_(compiler), |
4103 saved_expansion_factor_(compiler->current_expansion_factor()), | 4147 saved_expansion_factor_(compiler->current_expansion_factor()), |
4104 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { | 4148 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { |
4105 ASSERT(factor > 0); | 4149 DCHECK(factor > 0); |
4106 if (ok_to_expand_) { | 4150 if (ok_to_expand_) { |
4107 if (factor > kMaxExpansionFactor) { | 4151 if (factor > kMaxExpansionFactor) { |
4108 // Avoid integer overflow of the current expansion factor. | 4152 // Avoid integer overflow of the current expansion factor. |
4109 ok_to_expand_ = false; | 4153 ok_to_expand_ = false; |
4110 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); | 4154 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); |
4111 } else { | 4155 } else { |
4112 intptr_t new_factor = saved_expansion_factor_ * factor; | 4156 int new_factor = saved_expansion_factor_ * factor; |
4113 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); | 4157 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); |
4114 compiler->set_current_expansion_factor(new_factor); | 4158 compiler->set_current_expansion_factor(new_factor); |
4115 } | 4159 } |
4116 } | 4160 } |
4117 } | 4161 } |
4118 | 4162 |
4119 ~RegExpExpansionLimiter() { | 4163 ~RegExpExpansionLimiter() { |
4120 compiler_->set_current_expansion_factor(saved_expansion_factor_); | 4164 compiler_->set_current_expansion_factor(saved_expansion_factor_); |
4121 } | 4165 } |
4122 | 4166 |
4123 bool ok_to_expand() { return ok_to_expand_; } | 4167 bool ok_to_expand() { return ok_to_expand_; } |
4124 | 4168 |
4125 private: | 4169 private: |
4126 RegExpCompiler* compiler_; | 4170 RegExpCompiler* compiler_; |
4127 intptr_t saved_expansion_factor_; | 4171 int saved_expansion_factor_; |
4128 bool ok_to_expand_; | 4172 bool ok_to_expand_; |
4129 | 4173 |
4130 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | 4174 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
4131 }; | 4175 }; |
4132 | 4176 |
4133 | 4177 |
4134 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, | 4178 RegExpNode* RegExpQuantifier::ToNode(int min, |
4135 intptr_t max, | 4179 int max, |
4136 bool is_greedy, | 4180 bool is_greedy, |
4137 RegExpTree* body, | 4181 RegExpTree* body, |
4138 RegExpCompiler* compiler, | 4182 RegExpCompiler* compiler, |
4139 RegExpNode* on_success, | 4183 RegExpNode* on_success, |
4140 bool not_at_start) { | 4184 bool not_at_start) { |
4141 // x{f, t} becomes this: | 4185 // x{f, t} becomes this: |
4142 // | 4186 // |
4143 // (r++)<-. | 4187 // (r++)<-. |
4144 // | ` | 4188 // | ` |
4145 // | (x) | 4189 // | (x) |
4146 // v ^ | 4190 // v ^ |
4147 // (r=0)-->(?)---/ [if r < t] | 4191 // (r=0)-->(?)---/ [if r < t] |
4148 // | | 4192 // | |
4149 // [if r >= f] \----> ... | 4193 // [if r >= f] \----> ... |
4150 // | 4194 // |
4151 | 4195 |
4152 // 15.10.2.5 RepeatMatcher algorithm. | 4196 // 15.10.2.5 RepeatMatcher algorithm. |
4153 // The parser has already eliminated the case where max is 0. In the case | 4197 // The parser has already eliminated the case where max is 0. In the case |
4154 // where max_match is zero the parser has removed the quantifier if min was | 4198 // where max_match is zero the parser has removed the quantifier if min was |
4155 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 4199 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
4156 | 4200 |
4157 // If we know that we cannot match zero length then things are a little | 4201 // If we know that we cannot match zero length then things are a little |
4158 // simpler since we don't need to make the special zero length match check | 4202 // simpler since we don't need to make the special zero length match check |
4159 // from step 2.1. If the min and max are small we can unroll a little in | 4203 // from step 2.1. If the min and max are small we can unroll a little in |
4160 // this case. | 4204 // this case. |
4161 // Unroll (foo)+ and (foo){3,} | 4205 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} |
4162 static const intptr_t kMaxUnrolledMinMatches = 3; | 4206 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} |
4163 // Unroll (foo)? and (foo){x,3} | |
4164 static const intptr_t kMaxUnrolledMaxMatches = 3; | |
4165 if (max == 0) return on_success; // This can happen due to recursion. | 4207 if (max == 0) return on_success; // This can happen due to recursion. |
4166 bool body_can_be_empty = (body->min_match() == 0); | 4208 bool body_can_be_empty = (body->min_match() == 0); |
4167 intptr_t body_start_reg = RegExpCompiler::kNoRegister; | 4209 int body_start_reg = RegExpCompiler::kNoRegister; |
4168 Interval capture_registers = body->CaptureRegisters(); | 4210 Interval capture_registers = body->CaptureRegisters(); |
4169 bool needs_capture_clearing = !capture_registers.is_empty(); | 4211 bool needs_capture_clearing = !capture_registers.is_empty(); |
4170 Isolate* isolate = compiler->isolate(); | 4212 Zone* zone = compiler->zone(); |
4171 | 4213 |
4172 if (body_can_be_empty) { | 4214 if (body_can_be_empty) { |
4173 body_start_reg = compiler->AllocateRegister(); | 4215 body_start_reg = compiler->AllocateRegister(); |
4174 } else if (kRegexpOptimization && !needs_capture_clearing) { | 4216 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
4175 // Only unroll if there are no captures and the body can't be | 4217 // Only unroll if there are no captures and the body can't be |
4176 // empty. | 4218 // empty. |
4177 { | 4219 { |
4178 RegExpExpansionLimiter limiter( | 4220 RegExpExpansionLimiter limiter( |
4179 compiler, min + ((max != min) ? 1 : 0)); | 4221 compiler, min + ((max != min) ? 1 : 0)); |
4180 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { | 4222 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
4181 intptr_t new_max = (max == kInfinity) ? max : max - min; | 4223 int new_max = (max == kInfinity) ? max : max - min; |
4182 // Recurse once to get the loop or optional matches after the fixed | 4224 // Recurse once to get the loop or optional matches after the fixed |
4183 // ones. | 4225 // ones. |
4184 RegExpNode* answer = ToNode( | 4226 RegExpNode* answer = ToNode( |
4185 0, new_max, is_greedy, body, compiler, on_success, true); | 4227 0, new_max, is_greedy, body, compiler, on_success, true); |
4186 // Unroll the forced matches from 0 to min. This can cause chains of | 4228 // Unroll the forced matches from 0 to min. This can cause chains of |
4187 // TextNodes (which the parser does not generate). These should be | 4229 // TextNodes (which the parser does not generate). These should be |
4188 // combined if it turns out they hinder good code generation. | 4230 // combined if it turns out they hinder good code generation. |
4189 for (intptr_t i = 0; i < min; i++) { | 4231 for (int i = 0; i < min; i++) { |
4190 answer = body->ToNode(compiler, answer); | 4232 answer = body->ToNode(compiler, answer); |
4191 } | 4233 } |
4192 return answer; | 4234 return answer; |
4193 } | 4235 } |
4194 } | 4236 } |
4195 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 4237 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
4196 ASSERT(max > 0); // Due to the 'if' above. | 4238 DCHECK(max > 0); // Due to the 'if' above. |
4197 RegExpExpansionLimiter limiter(compiler, max); | 4239 RegExpExpansionLimiter limiter(compiler, max); |
4198 if (limiter.ok_to_expand()) { | 4240 if (limiter.ok_to_expand()) { |
4199 // Unroll the optional matches up to max. | 4241 // Unroll the optional matches up to max. |
4200 RegExpNode* answer = on_success; | 4242 RegExpNode* answer = on_success; |
4201 for (intptr_t i = 0; i < max; i++) { | 4243 for (int i = 0; i < max; i++) { |
4202 ChoiceNode* alternation = new(isolate) ChoiceNode(2, isolate); | 4244 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); |
4203 if (is_greedy) { | 4245 if (is_greedy) { |
4204 alternation->AddAlternative( | 4246 alternation->AddAlternative( |
4205 GuardedAlternative(body->ToNode(compiler, answer))); | 4247 GuardedAlternative(body->ToNode(compiler, answer))); |
4206 alternation->AddAlternative(GuardedAlternative(on_success)); | 4248 alternation->AddAlternative(GuardedAlternative(on_success)); |
4207 } else { | 4249 } else { |
4208 alternation->AddAlternative(GuardedAlternative(on_success)); | 4250 alternation->AddAlternative(GuardedAlternative(on_success)); |
4209 alternation->AddAlternative( | 4251 alternation->AddAlternative( |
4210 GuardedAlternative(body->ToNode(compiler, answer))); | 4252 GuardedAlternative(body->ToNode(compiler, answer))); |
4211 } | 4253 } |
4212 answer = alternation; | 4254 answer = alternation; |
4213 if (not_at_start) alternation->set_not_at_start(); | 4255 if (not_at_start) alternation->set_not_at_start(); |
4214 } | 4256 } |
4215 return answer; | 4257 return answer; |
4216 } | 4258 } |
4217 } | 4259 } |
4218 } | 4260 } |
4219 bool has_min = min > 0; | 4261 bool has_min = min > 0; |
4220 bool has_max = max < RegExpTree::kInfinity; | 4262 bool has_max = max < RegExpTree::kInfinity; |
4221 bool needs_counter = has_min || has_max; | 4263 bool needs_counter = has_min || has_max; |
4222 intptr_t reg_ctr = needs_counter | 4264 int reg_ctr = needs_counter |
4223 ? compiler->AllocateRegister() | 4265 ? compiler->AllocateRegister() |
4224 : RegExpCompiler::kNoRegister; | 4266 : RegExpCompiler::kNoRegister; |
4225 LoopChoiceNode* center = new(isolate) LoopChoiceNode(body->min_match() == 0, | 4267 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, |
4226 isolate); | 4268 zone); |
4227 if (not_at_start) center->set_not_at_start(); | 4269 if (not_at_start) center->set_not_at_start(); |
4228 RegExpNode* loop_return = needs_counter | 4270 RegExpNode* loop_return = needs_counter |
4229 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 4271 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
4230 : static_cast<RegExpNode*>(center); | 4272 : static_cast<RegExpNode*>(center); |
4231 if (body_can_be_empty) { | 4273 if (body_can_be_empty) { |
4232 // If the body can be empty we need to check if it was and then | 4274 // If the body can be empty we need to check if it was and then |
4233 // backtrack. | 4275 // backtrack. |
4234 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 4276 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, |
4235 reg_ctr, | 4277 reg_ctr, |
4236 min, | 4278 min, |
4237 loop_return); | 4279 loop_return); |
4238 } | 4280 } |
4239 RegExpNode* body_node = body->ToNode(compiler, loop_return); | 4281 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
4240 if (body_can_be_empty) { | 4282 if (body_can_be_empty) { |
4241 // If the body can be empty we need to store the start position | 4283 // If the body can be empty we need to store the start position |
4242 // so we can bail out if it was empty. | 4284 // so we can bail out if it was empty. |
4243 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); | 4285 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); |
4244 } | 4286 } |
4245 if (needs_capture_clearing) { | 4287 if (needs_capture_clearing) { |
4246 // Before entering the body of this loop we need to clear captures. | 4288 // Before entering the body of this loop we need to clear captures. |
4247 body_node = ActionNode::ClearCaptures(capture_registers, body_node); | 4289 body_node = ActionNode::ClearCaptures(capture_registers, body_node); |
4248 } | 4290 } |
4249 GuardedAlternative body_alt(body_node); | 4291 GuardedAlternative body_alt(body_node); |
4250 if (has_max) { | 4292 if (has_max) { |
4251 Guard* body_guard = | 4293 Guard* body_guard = |
4252 new(isolate) Guard(reg_ctr, Guard::LT, max); | 4294 new(zone) Guard(reg_ctr, Guard::LT, max); |
4253 body_alt.AddGuard(body_guard, isolate); | 4295 body_alt.AddGuard(body_guard, zone); |
4254 } | 4296 } |
4255 GuardedAlternative rest_alt(on_success); | 4297 GuardedAlternative rest_alt(on_success); |
4256 if (has_min) { | 4298 if (has_min) { |
4257 Guard* rest_guard = new(isolate) Guard(reg_ctr, Guard::GEQ, min); | 4299 Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min); |
4258 rest_alt.AddGuard(rest_guard, isolate); | 4300 rest_alt.AddGuard(rest_guard, zone); |
4259 } | 4301 } |
4260 if (is_greedy) { | 4302 if (is_greedy) { |
4261 center->AddLoopAlternative(body_alt); | 4303 center->AddLoopAlternative(body_alt); |
4262 center->AddContinueAlternative(rest_alt); | 4304 center->AddContinueAlternative(rest_alt); |
4263 } else { | 4305 } else { |
4264 center->AddContinueAlternative(rest_alt); | 4306 center->AddContinueAlternative(rest_alt); |
4265 center->AddLoopAlternative(body_alt); | 4307 center->AddLoopAlternative(body_alt); |
4266 } | 4308 } |
4267 if (needs_counter) { | 4309 if (needs_counter) { |
4268 return ActionNode::SetRegister(reg_ctr, 0, center); | 4310 return ActionNode::SetRegister(reg_ctr, 0, center); |
4269 } else { | 4311 } else { |
4270 return center; | 4312 return center; |
4271 } | 4313 } |
4272 } | 4314 } |
4273 | 4315 |
4274 | 4316 |
4275 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 4317 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
4276 RegExpNode* on_success) { | 4318 RegExpNode* on_success) { |
| 4319 NodeInfo info; |
| 4320 Zone* zone = compiler->zone(); |
| 4321 |
4277 switch (assertion_type()) { | 4322 switch (assertion_type()) { |
4278 case START_OF_LINE: | 4323 case START_OF_LINE: |
4279 return AssertionNode::AfterNewline(on_success); | 4324 return AssertionNode::AfterNewline(on_success); |
4280 case START_OF_INPUT: | 4325 case START_OF_INPUT: |
4281 return AssertionNode::AtStart(on_success); | 4326 return AssertionNode::AtStart(on_success); |
4282 case BOUNDARY: | 4327 case BOUNDARY: |
4283 return AssertionNode::AtBoundary(on_success); | 4328 return AssertionNode::AtBoundary(on_success); |
4284 case NON_BOUNDARY: | 4329 case NON_BOUNDARY: |
4285 return AssertionNode::AtNonBoundary(on_success); | 4330 return AssertionNode::AtNonBoundary(on_success); |
4286 case END_OF_INPUT: | 4331 case END_OF_INPUT: |
4287 return AssertionNode::AtEnd(on_success); | 4332 return AssertionNode::AtEnd(on_success); |
4288 case END_OF_LINE: { | 4333 case END_OF_LINE: { |
4289 // Compile $ in multiline regexps as an alternation with a positive | 4334 // Compile $ in multiline regexps as an alternation with a positive |
4290 // lookahead in one side and an end-of-input on the other side. | 4335 // lookahead in one side and an end-of-input on the other side. |
4291 // We need two registers for the lookahead. | 4336 // We need two registers for the lookahead. |
4292 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4337 int stack_pointer_register = compiler->AllocateRegister(); |
4293 intptr_t position_register = compiler->AllocateRegister(); | 4338 int position_register = compiler->AllocateRegister(); |
4294 // The ChoiceNode to distinguish between a newline and end-of-input. | 4339 // The ChoiceNode to distinguish between a newline and end-of-input. |
4295 ChoiceNode* result = new ChoiceNode(2, on_success->isolate()); | 4340 ChoiceNode* result = new(zone) ChoiceNode(2, zone); |
4296 // Create a newline atom. | 4341 // Create a newline atom. |
4297 ZoneGrowableArray<CharacterRange>* newline_ranges = | 4342 ZoneList<CharacterRange>* newline_ranges = |
4298 new ZoneGrowableArray<CharacterRange>(3); | 4343 new(zone) ZoneList<CharacterRange>(3, zone); |
4299 CharacterRange::AddClassEscape('n', newline_ranges); | 4344 CharacterRange::AddClassEscape('n', newline_ranges, zone); |
4300 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); | 4345 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); |
4301 TextNode* newline_matcher = new TextNode( | 4346 TextNode* newline_matcher = new(zone) TextNode( |
4302 newline_atom, | 4347 newline_atom, |
4303 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4348 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
4304 position_register, | 4349 position_register, |
4305 0, // No captures inside. | 4350 0, // No captures inside. |
4306 -1, // Ignored if no captures. | 4351 -1, // Ignored if no captures. |
4307 on_success)); | 4352 on_success)); |
4308 // Create an end-of-input matcher. | 4353 // Create an end-of-input matcher. |
4309 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 4354 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
4310 stack_pointer_register, | 4355 stack_pointer_register, |
4311 position_register, | 4356 position_register, |
4312 newline_matcher); | 4357 newline_matcher); |
4313 // Add the two alternatives to the ChoiceNode. | 4358 // Add the two alternatives to the ChoiceNode. |
4314 GuardedAlternative eol_alternative(end_of_line); | 4359 GuardedAlternative eol_alternative(end_of_line); |
4315 result->AddAlternative(eol_alternative); | 4360 result->AddAlternative(eol_alternative); |
4316 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4361 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
4317 result->AddAlternative(end_alternative); | 4362 result->AddAlternative(end_alternative); |
4318 return result; | 4363 return result; |
4319 } | 4364 } |
4320 default: | 4365 default: |
4321 UNREACHABLE(); | 4366 UNREACHABLE(); |
4322 } | 4367 } |
4323 return on_success; | 4368 return on_success; |
4324 } | 4369 } |
4325 | 4370 |
4326 | 4371 |
4327 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4372 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
4328 RegExpNode* on_success) { | 4373 RegExpNode* on_success) { |
4329 return new(CI) | 4374 return new(compiler->zone()) |
4330 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4375 BackReferenceNode(RegExpCapture::StartRegister(index()), |
4331 RegExpCapture::EndRegister(index()), | 4376 RegExpCapture::EndRegister(index()), |
4332 on_success); | 4377 on_success); |
4333 } | 4378 } |
4334 | 4379 |
4335 | 4380 |
4336 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4381 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
4337 RegExpNode* on_success) { | 4382 RegExpNode* on_success) { |
4338 return on_success; | 4383 return on_success; |
4339 } | 4384 } |
4340 | 4385 |
4341 | 4386 |
4342 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4387 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
4343 RegExpNode* on_success) { | 4388 RegExpNode* on_success) { |
4344 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4389 int stack_pointer_register = compiler->AllocateRegister(); |
4345 intptr_t position_register = compiler->AllocateRegister(); | 4390 int position_register = compiler->AllocateRegister(); |
4346 | 4391 |
4347 const intptr_t registers_per_capture = 2; | 4392 const int registers_per_capture = 2; |
4348 const intptr_t register_of_first_capture = 2; | 4393 const int register_of_first_capture = 2; |
4349 intptr_t register_count = capture_count_ * registers_per_capture; | 4394 int register_count = capture_count_ * registers_per_capture; |
4350 intptr_t register_start = | 4395 int register_start = |
4351 register_of_first_capture + capture_from_ * registers_per_capture; | 4396 register_of_first_capture + capture_from_ * registers_per_capture; |
4352 | 4397 |
4353 RegExpNode* success; | 4398 RegExpNode* success; |
4354 if (is_positive()) { | 4399 if (is_positive()) { |
4355 RegExpNode* node = ActionNode::BeginSubmatch( | 4400 RegExpNode* node = ActionNode::BeginSubmatch( |
4356 stack_pointer_register, | 4401 stack_pointer_register, |
4357 position_register, | 4402 position_register, |
4358 body()->ToNode( | 4403 body()->ToNode( |
4359 compiler, | 4404 compiler, |
4360 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4405 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
4361 position_register, | 4406 position_register, |
4362 register_count, | 4407 register_count, |
4363 register_start, | 4408 register_start, |
4364 on_success))); | 4409 on_success))); |
4365 return node; | 4410 return node; |
4366 } else { | 4411 } else { |
4367 // We use a ChoiceNode for a negative lookahead because it has most of | 4412 // We use a ChoiceNode for a negative lookahead because it has most of |
4368 // the characteristics we need. It has the body of the lookahead as its | 4413 // the characteristics we need. It has the body of the lookahead as its |
4369 // first alternative and the expression after the lookahead of the second | 4414 // first alternative and the expression after the lookahead of the second |
4370 // alternative. If the first alternative succeeds then the | 4415 // alternative. If the first alternative succeeds then the |
4371 // NegativeSubmatchSuccess will unwind the stack including everything the | 4416 // NegativeSubmatchSuccess will unwind the stack including everything the |
4372 // choice node set up and backtrack. If the first alternative fails then | 4417 // choice node set up and backtrack. If the first alternative fails then |
4373 // the second alternative is tried, which is exactly the desired result | 4418 // the second alternative is tried, which is exactly the desired result |
4374 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 4419 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
4375 // ChoiceNode that knows to ignore the first exit when calculating quick | 4420 // ChoiceNode that knows to ignore the first exit when calculating quick |
4376 // checks. | 4421 // checks. |
| 4422 Zone* zone = compiler->zone(); |
4377 | 4423 |
4378 GuardedAlternative body_alt( | 4424 GuardedAlternative body_alt( |
4379 body()->ToNode( | 4425 body()->ToNode( |
4380 compiler, | 4426 compiler, |
4381 success = new(CI) NegativeSubmatchSuccess(stack_pointer_register, | 4427 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, |
4382 position_register, | 4428 position_register, |
4383 register_count, | 4429 register_count, |
4384 register_start, | 4430 register_start, |
4385 CI))); | 4431 zone))); |
4386 ChoiceNode* choice_node = | 4432 ChoiceNode* choice_node = |
4387 new(CI) NegativeLookaheadChoiceNode(body_alt, | 4433 new(zone) NegativeLookaheadChoiceNode(body_alt, |
4388 GuardedAlternative(on_success), | 4434 GuardedAlternative(on_success), |
4389 CI); | 4435 zone); |
4390 return ActionNode::BeginSubmatch(stack_pointer_register, | 4436 return ActionNode::BeginSubmatch(stack_pointer_register, |
4391 position_register, | 4437 position_register, |
4392 choice_node); | 4438 choice_node); |
4393 } | 4439 } |
4394 } | 4440 } |
4395 | 4441 |
4396 | 4442 |
4397 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4443 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
4398 RegExpNode* on_success) { | 4444 RegExpNode* on_success) { |
4399 return ToNode(body(), index(), compiler, on_success); | 4445 return ToNode(body(), index(), compiler, on_success); |
4400 } | 4446 } |
4401 | 4447 |
4402 | 4448 |
4403 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 4449 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
4404 intptr_t index, | 4450 int index, |
4405 RegExpCompiler* compiler, | 4451 RegExpCompiler* compiler, |
4406 RegExpNode* on_success) { | 4452 RegExpNode* on_success) { |
4407 intptr_t start_reg = RegExpCapture::StartRegister(index); | 4453 int start_reg = RegExpCapture::StartRegister(index); |
4408 intptr_t end_reg = RegExpCapture::EndRegister(index); | 4454 int end_reg = RegExpCapture::EndRegister(index); |
4409 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 4455 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
4410 RegExpNode* body_node = body->ToNode(compiler, store_end); | 4456 RegExpNode* body_node = body->ToNode(compiler, store_end); |
4411 return ActionNode::StorePosition(start_reg, true, body_node); | 4457 return ActionNode::StorePosition(start_reg, true, body_node); |
4412 } | 4458 } |
4413 | 4459 |
4414 | 4460 |
4415 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 4461 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
4416 RegExpNode* on_success) { | 4462 RegExpNode* on_success) { |
4417 ZoneGrowableArray<RegExpTree*>* children = nodes(); | 4463 ZoneList<RegExpTree*>* children = nodes(); |
4418 RegExpNode* current = on_success; | 4464 RegExpNode* current = on_success; |
4419 for (intptr_t i = children->length() - 1; i >= 0; i--) { | 4465 for (int i = children->length() - 1; i >= 0; i--) { |
4420 current = children->At(i)->ToNode(compiler, current); | 4466 current = children->at(i)->ToNode(compiler, current); |
4421 } | 4467 } |
4422 return current; | 4468 return current; |
4423 } | 4469 } |
4424 | 4470 |
4425 | 4471 |
4426 static void AddClass(const intptr_t* elmv, | 4472 static void AddClass(const int* elmv, |
4427 intptr_t elmc, | 4473 int elmc, |
4428 ZoneGrowableArray<CharacterRange>* ranges) { | 4474 ZoneList<CharacterRange>* ranges, |
| 4475 Zone* zone) { |
4429 elmc--; | 4476 elmc--; |
4430 ASSERT(elmv[elmc] == 0x10000); | 4477 DCHECK(elmv[elmc] == 0x10000); |
4431 for (intptr_t i = 0; i < elmc; i += 2) { | 4478 for (int i = 0; i < elmc; i += 2) { |
4432 ASSERT(elmv[i] < elmv[i + 1]); | 4479 DCHECK(elmv[i] < elmv[i + 1]); |
4433 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); | 4480 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone); |
4434 } | 4481 } |
4435 } | 4482 } |
4436 | 4483 |
4437 | 4484 |
4438 static void AddClassNegated(const intptr_t *elmv, | 4485 static void AddClassNegated(const int *elmv, |
4439 intptr_t elmc, | 4486 int elmc, |
4440 ZoneGrowableArray<CharacterRange>* ranges) { | 4487 ZoneList<CharacterRange>* ranges, |
| 4488 Zone* zone) { |
4441 elmc--; | 4489 elmc--; |
4442 ASSERT(elmv[elmc] == 0x10000); | 4490 DCHECK(elmv[elmc] == 0x10000); |
4443 ASSERT(elmv[0] != 0x0000); | 4491 DCHECK(elmv[0] != 0x0000); |
4444 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); | 4492 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit); |
4445 uint16_t last = 0x0000; | 4493 uc16 last = 0x0000; |
4446 for (intptr_t i = 0; i < elmc; i += 2) { | 4494 for (int i = 0; i < elmc; i += 2) { |
4447 ASSERT(last <= elmv[i] - 1); | 4495 DCHECK(last <= elmv[i] - 1); |
4448 ASSERT(elmv[i] < elmv[i + 1]); | 4496 DCHECK(elmv[i] < elmv[i + 1]); |
4449 ranges->Add(CharacterRange(last, elmv[i] - 1)); | 4497 ranges->Add(CharacterRange(last, elmv[i] - 1), zone); |
4450 last = elmv[i + 1]; | 4498 last = elmv[i + 1]; |
4451 } | 4499 } |
4452 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); | 4500 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone); |
4453 } | 4501 } |
4454 | 4502 |
4455 | 4503 |
4456 void CharacterRange::AddClassEscape(uint16_t type, | 4504 void CharacterRange::AddClassEscape(uc16 type, |
4457 ZoneGrowableArray<CharacterRange>* ranges) { | 4505 ZoneList<CharacterRange>* ranges, |
| 4506 Zone* zone) { |
4458 switch (type) { | 4507 switch (type) { |
4459 case 's': | 4508 case 's': |
4460 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); | 4509 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); |
4461 break; | 4510 break; |
4462 case 'S': | 4511 case 'S': |
4463 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); | 4512 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone); |
4464 break; | 4513 break; |
4465 case 'w': | 4514 case 'w': |
4466 AddClass(kWordRanges, kWordRangeCount, ranges); | 4515 AddClass(kWordRanges, kWordRangeCount, ranges, zone); |
4467 break; | 4516 break; |
4468 case 'W': | 4517 case 'W': |
4469 AddClassNegated(kWordRanges, kWordRangeCount, ranges); | 4518 AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone); |
4470 break; | 4519 break; |
4471 case 'd': | 4520 case 'd': |
4472 AddClass(kDigitRanges, kDigitRangeCount, ranges); | 4521 AddClass(kDigitRanges, kDigitRangeCount, ranges, zone); |
4473 break; | 4522 break; |
4474 case 'D': | 4523 case 'D': |
4475 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); | 4524 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone); |
4476 break; | 4525 break; |
4477 case '.': | 4526 case '.': |
4478 AddClassNegated(kLineTerminatorRanges, | 4527 AddClassNegated(kLineTerminatorRanges, |
4479 kLineTerminatorRangeCount, | 4528 kLineTerminatorRangeCount, |
4480 ranges); | 4529 ranges, |
| 4530 zone); |
4481 break; | 4531 break; |
4482 // This is not a character range as defined by the spec but a | 4532 // This is not a character range as defined by the spec but a |
4483 // convenient shorthand for a character class that matches any | 4533 // convenient shorthand for a character class that matches any |
4484 // character. | 4534 // character. |
4485 case '*': | 4535 case '*': |
4486 ranges->Add(CharacterRange::Everything()); | 4536 ranges->Add(CharacterRange::Everything(), zone); |
4487 break; | 4537 break; |
4488 // This is the set of characters matched by the $ and ^ symbols | 4538 // This is the set of characters matched by the $ and ^ symbols |
4489 // in multiline mode. | 4539 // in multiline mode. |
4490 case 'n': | 4540 case 'n': |
4491 AddClass(kLineTerminatorRanges, | 4541 AddClass(kLineTerminatorRanges, |
4492 kLineTerminatorRangeCount, | 4542 kLineTerminatorRangeCount, |
4493 ranges); | 4543 ranges, |
| 4544 zone); |
4494 break; | 4545 break; |
4495 default: | 4546 default: |
4496 UNREACHABLE(); | 4547 UNREACHABLE(); |
4497 } | 4548 } |
4498 } | 4549 } |
4499 | 4550 |
4500 | 4551 |
4501 void CharacterRange::AddCaseEquivalents( | 4552 Vector<const int> CharacterRange::GetWordBounds() { |
4502 ZoneGrowableArray<CharacterRange>* ranges, | 4553 return Vector<const int>(kWordRanges, kWordRangeCount - 1); |
4503 bool is_one_byte, | 4554 } |
4504 Isolate* isolate) { | 4555 |
4505 uint16_t bottom = from(); | 4556 |
4506 uint16_t top = to(); | 4557 class CharacterRangeSplitter { |
| 4558 public: |
| 4559 CharacterRangeSplitter(ZoneList<CharacterRange>** included, |
| 4560 ZoneList<CharacterRange>** excluded, |
| 4561 Zone* zone) |
| 4562 : included_(included), |
| 4563 excluded_(excluded), |
| 4564 zone_(zone) { } |
| 4565 void Call(uc16 from, DispatchTable::Entry entry); |
| 4566 |
| 4567 static const int kInBase = 0; |
| 4568 static const int kInOverlay = 1; |
| 4569 |
| 4570 private: |
| 4571 ZoneList<CharacterRange>** included_; |
| 4572 ZoneList<CharacterRange>** excluded_; |
| 4573 Zone* zone_; |
| 4574 }; |
| 4575 |
| 4576 |
| 4577 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) { |
| 4578 if (!entry.out_set()->Get(kInBase)) return; |
| 4579 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay) |
| 4580 ? included_ |
| 4581 : excluded_; |
| 4582 if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_); |
| 4583 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_); |
| 4584 } |
| 4585 |
| 4586 |
| 4587 void CharacterRange::Split(ZoneList<CharacterRange>* base, |
| 4588 Vector<const int> overlay, |
| 4589 ZoneList<CharacterRange>** included, |
| 4590 ZoneList<CharacterRange>** excluded, |
| 4591 Zone* zone) { |
| 4592 DCHECK_EQ(NULL, *included); |
| 4593 DCHECK_EQ(NULL, *excluded); |
| 4594 DispatchTable table(zone); |
| 4595 for (int i = 0; i < base->length(); i++) |
| 4596 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone); |
| 4597 for (int i = 0; i < overlay.length(); i += 2) { |
| 4598 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1), |
| 4599 CharacterRangeSplitter::kInOverlay, zone); |
| 4600 } |
| 4601 CharacterRangeSplitter callback(included, excluded, zone); |
| 4602 table.ForEach(&callback); |
| 4603 } |
| 4604 |
| 4605 |
| 4606 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| 4607 bool is_one_byte, Zone* zone) { |
| 4608 Isolate* isolate = zone->isolate(); |
| 4609 uc16 bottom = from(); |
| 4610 uc16 top = to(); |
4507 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | 4611 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
4508 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; | 4612 if (bottom > String::kMaxOneByteCharCode) return; |
4509 if (top > Symbols::kMaxOneCharCodeSymbol) { | 4613 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; |
4510 top = Symbols::kMaxOneCharCodeSymbol; | |
4511 } | |
4512 } | 4614 } |
4513 | 4615 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4514 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | |
4515 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; | |
4516 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
4517 if (top == bottom) { | 4616 if (top == bottom) { |
4518 // If this is a singleton we just expand the one character. | 4617 // If this is a singleton we just expand the one character. |
4519 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN
T | 4618 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
4520 for (intptr_t i = 0; i < length; i++) { | 4619 for (int i = 0; i < length; i++) { |
4521 uint32_t chr = chars[i]; | 4620 uc32 chr = chars[i]; |
4522 if (chr != bottom) { | 4621 if (chr != bottom) { |
4523 ranges->Add(CharacterRange::Singleton(chars[i])); | 4622 ranges->Add(CharacterRange::Singleton(chars[i]), zone); |
4524 } | 4623 } |
4525 } | 4624 } |
4526 } else { | 4625 } else { |
4527 // If this is a range we expand the characters block by block, | 4626 // If this is a range we expand the characters block by block, |
4528 // expanding contiguous subranges (blocks) one at a time. | 4627 // expanding contiguous subranges (blocks) one at a time. |
4529 // The approach is as follows. For a given start character we | 4628 // The approach is as follows. For a given start character we |
4530 // look up the remainder of the block that contains it (represented | 4629 // look up the remainder of the block that contains it (represented |
4531 // by the end point), for instance we find 'z' if the character | 4630 // by the end point), for instance we find 'z' if the character |
4532 // is 'c'. A block is characterized by the property | 4631 // is 'c'. A block is characterized by the property |
4533 // that all characters uncanonicalize in the same way, except that | 4632 // that all characters uncanonicalize in the same way, except that |
4534 // each entry in the result is incremented by the distance from the first | 4633 // each entry in the result is incremented by the distance from the first |
4535 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and | 4634 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and |
4536 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. | 4635 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. |
4537 // Once we've found the end point we look up its uncanonicalization | 4636 // Once we've found the end point we look up its uncanonicalization |
4538 // and produce a range for each element. For instance for [c-f] | 4637 // and produce a range for each element. For instance for [c-f] |
4539 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only | 4638 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only |
4540 // add a range if it is not already contained in the input, so [c-f] | 4639 // add a range if it is not already contained in the input, so [c-f] |
4541 // will be skipped but [C-F] will be added. If this range is not | 4640 // will be skipped but [C-F] will be added. If this range is not |
4542 // completely contained in a block we do this for all the blocks | 4641 // completely contained in a block we do this for all the blocks |
4543 // covered by the range (handling characters that is not in a block | 4642 // covered by the range (handling characters that is not in a block |
4544 // as a "singleton block"). | 4643 // as a "singleton block"). |
4545 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4644 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4546 intptr_t pos = bottom; | 4645 int pos = bottom; |
4547 while (pos <= top) { | 4646 while (pos <= top) { |
4548 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); | 4647 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); |
4549 uint16_t block_end; | 4648 uc16 block_end; |
4550 if (length == 0) { | 4649 if (length == 0) { |
4551 block_end = pos; | 4650 block_end = pos; |
4552 } else { | 4651 } else { |
4553 ASSERT(length == 1); | 4652 DCHECK_EQ(1, length); |
4554 block_end = range[0]; | 4653 block_end = range[0]; |
4555 } | 4654 } |
4556 intptr_t end = (block_end > top) ? top : block_end; | 4655 int end = (block_end > top) ? top : block_end; |
4557 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT | 4656 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); |
4558 for (intptr_t i = 0; i < length; i++) { | 4657 for (int i = 0; i < length; i++) { |
4559 uint32_t c = range[i]; | 4658 uc32 c = range[i]; |
4560 uint16_t range_from = c - (block_end - pos); | 4659 uc16 range_from = c - (block_end - pos); |
4561 uint16_t range_to = c - (block_end - end); | 4660 uc16 range_to = c - (block_end - end); |
4562 if (!(bottom <= range_from && range_to <= top)) { | 4661 if (!(bottom <= range_from && range_to <= top)) { |
4563 ranges->Add(CharacterRange(range_from, range_to)); | 4662 ranges->Add(CharacterRange(range_from, range_to), zone); |
4564 } | 4663 } |
4565 } | 4664 } |
4566 pos = end + 1; | 4665 pos = end + 1; |
4567 } | 4666 } |
4568 } | 4667 } |
4569 } | 4668 } |
4570 | 4669 |
4571 | 4670 |
4572 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { | 4671 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { |
4573 ASSERT(ranges != NULL); | 4672 DCHECK_NOT_NULL(ranges); |
4574 intptr_t n = ranges->length(); | 4673 int n = ranges->length(); |
4575 if (n <= 1) return true; | 4674 if (n <= 1) return true; |
4576 intptr_t max = ranges->At(0).to(); | 4675 int max = ranges->at(0).to(); |
4577 for (intptr_t i = 1; i < n; i++) { | 4676 for (int i = 1; i < n; i++) { |
4578 CharacterRange next_range = ranges->At(i); | 4677 CharacterRange next_range = ranges->at(i); |
4579 if (next_range.from() <= max + 1) return false; | 4678 if (next_range.from() <= max + 1) return false; |
4580 max = next_range.to(); | 4679 max = next_range.to(); |
4581 } | 4680 } |
4582 return true; | 4681 return true; |
4583 } | 4682 } |
4584 | 4683 |
4585 | 4684 |
4586 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { | 4685 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) { |
4587 if (ranges_ == NULL) { | 4686 if (ranges_ == NULL) { |
4588 ranges_ = new ZoneGrowableArray<CharacterRange>(2); | 4687 ranges_ = new(zone) ZoneList<CharacterRange>(2, zone); |
4589 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4688 CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone); |
4590 } | 4689 } |
4591 return ranges_; | 4690 return ranges_; |
4592 } | 4691 } |
4593 | 4692 |
4594 | 4693 |
4595 // Move a number of elements in a zone array to another position | 4694 // Move a number of elements in a zonelist to another position |
4596 // in the same array. Handles overlapping source and target areas. | 4695 // in the same list. Handles overlapping source and target areas. |
4597 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, | 4696 static void MoveRanges(ZoneList<CharacterRange>* list, |
4598 intptr_t from, | 4697 int from, |
4599 intptr_t to, | 4698 int to, |
4600 intptr_t count) { | 4699 int count) { |
4601 // Ranges are potentially overlapping. | 4700 // Ranges are potentially overlapping. |
4602 if (from < to) { | 4701 if (from < to) { |
4603 for (intptr_t i = count - 1; i >= 0; i--) { | 4702 for (int i = count - 1; i >= 0; i--) { |
4604 (*list)[to + i] = list->At(from + i); | 4703 list->at(to + i) = list->at(from + i); |
4605 } | 4704 } |
4606 } else { | 4705 } else { |
4607 for (intptr_t i = 0; i < count; i++) { | 4706 for (int i = 0; i < count; i++) { |
4608 (*list)[to + i] = list->At(from + i); | 4707 list->at(to + i) = list->at(from + i); |
4609 } | 4708 } |
4610 } | 4709 } |
4611 } | 4710 } |
4612 | 4711 |
4613 | 4712 |
4614 static intptr_t InsertRangeInCanonicalList( | 4713 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, |
4615 ZoneGrowableArray<CharacterRange>* list, | 4714 int count, |
4616 intptr_t count, | 4715 CharacterRange insert) { |
4617 CharacterRange insert) { | |
4618 // Inserts a range into list[0..count[, which must be sorted | 4716 // Inserts a range into list[0..count[, which must be sorted |
4619 // by from value and non-overlapping and non-adjacent, using at most | 4717 // by from value and non-overlapping and non-adjacent, using at most |
4620 // list[0..count] for the result. Returns the number of resulting | 4718 // list[0..count] for the result. Returns the number of resulting |
4621 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4719 // canonicalized ranges. Inserting a range may collapse existing ranges into |
4622 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4720 // fewer ranges, so the return value can be anything in the range 1..count+1. |
4623 uint16_t from = insert.from(); | 4721 uc16 from = insert.from(); |
4624 uint16_t to = insert.to(); | 4722 uc16 to = insert.to(); |
4625 intptr_t start_pos = 0; | 4723 int start_pos = 0; |
4626 intptr_t end_pos = count; | 4724 int end_pos = count; |
4627 for (intptr_t i = count - 1; i >= 0; i--) { | 4725 for (int i = count - 1; i >= 0; i--) { |
4628 CharacterRange current = list->At(i); | 4726 CharacterRange current = list->at(i); |
4629 if (current.from() > to + 1) { | 4727 if (current.from() > to + 1) { |
4630 end_pos = i; | 4728 end_pos = i; |
4631 } else if (current.to() + 1 < from) { | 4729 } else if (current.to() + 1 < from) { |
4632 start_pos = i + 1; | 4730 start_pos = i + 1; |
4633 break; | 4731 break; |
4634 } | 4732 } |
4635 } | 4733 } |
4636 | 4734 |
4637 // Inserted range overlaps, or is adjacent to, ranges at positions | 4735 // Inserted range overlaps, or is adjacent to, ranges at positions |
4638 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are | 4736 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are |
4639 // not affected by the insertion. | 4737 // not affected by the insertion. |
4640 // If start_pos == end_pos, the range must be inserted before start_pos. | 4738 // If start_pos == end_pos, the range must be inserted before start_pos. |
4641 // if start_pos < end_pos, the entire range from start_pos to end_pos | 4739 // if start_pos < end_pos, the entire range from start_pos to end_pos |
4642 // must be merged with the insert range. | 4740 // must be merged with the insert range. |
4643 | 4741 |
4644 if (start_pos == end_pos) { | 4742 if (start_pos == end_pos) { |
4645 // Insert between existing ranges at position start_pos. | 4743 // Insert between existing ranges at position start_pos. |
4646 if (start_pos < count) { | 4744 if (start_pos < count) { |
4647 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); | 4745 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); |
4648 } | 4746 } |
4649 (*list)[start_pos] = insert; | 4747 list->at(start_pos) = insert; |
4650 return count + 1; | 4748 return count + 1; |
4651 } | 4749 } |
4652 if (start_pos + 1 == end_pos) { | 4750 if (start_pos + 1 == end_pos) { |
4653 // Replace single existing range at position start_pos. | 4751 // Replace single existing range at position start_pos. |
4654 CharacterRange to_replace = list->At(start_pos); | 4752 CharacterRange to_replace = list->at(start_pos); |
4655 intptr_t new_from = Utils::Minimum(to_replace.from(), from); | 4753 int new_from = Min(to_replace.from(), from); |
4656 intptr_t new_to = Utils::Maximum(to_replace.to(), to); | 4754 int new_to = Max(to_replace.to(), to); |
4657 (*list)[start_pos] = CharacterRange(new_from, new_to); | 4755 list->at(start_pos) = CharacterRange(new_from, new_to); |
4658 return count; | 4756 return count; |
4659 } | 4757 } |
4660 // Replace a number of existing ranges from start_pos to end_pos - 1. | 4758 // Replace a number of existing ranges from start_pos to end_pos - 1. |
4661 // Move the remaining ranges down. | 4759 // Move the remaining ranges down. |
4662 | 4760 |
4663 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); | 4761 int new_from = Min(list->at(start_pos).from(), from); |
4664 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); | 4762 int new_to = Max(list->at(end_pos - 1).to(), to); |
4665 if (end_pos < count) { | 4763 if (end_pos < count) { |
4666 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); | 4764 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); |
4667 } | 4765 } |
4668 (*list)[start_pos] = CharacterRange(new_from, new_to); | 4766 list->at(start_pos) = CharacterRange(new_from, new_to); |
4669 return count - (end_pos - start_pos) + 1; | 4767 return count - (end_pos - start_pos) + 1; |
4670 } | 4768 } |
4671 | 4769 |
4672 | 4770 |
4673 void CharacterSet::Canonicalize() { | 4771 void CharacterSet::Canonicalize() { |
4674 // Special/default classes are always considered canonical. The result | 4772 // Special/default classes are always considered canonical. The result |
4675 // of calling ranges() will be sorted. | 4773 // of calling ranges() will be sorted. |
4676 if (ranges_ == NULL) return; | 4774 if (ranges_ == NULL) return; |
4677 CharacterRange::Canonicalize(ranges_); | 4775 CharacterRange::Canonicalize(ranges_); |
4678 } | 4776 } |
4679 | 4777 |
4680 | 4778 |
4681 void CharacterRange::Canonicalize( | 4779 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { |
4682 ZoneGrowableArray<CharacterRange>* character_ranges) { | |
4683 if (character_ranges->length() <= 1) return; | 4780 if (character_ranges->length() <= 1) return; |
4684 // Check whether ranges are already canonical (increasing, non-overlapping, | 4781 // Check whether ranges are already canonical (increasing, non-overlapping, |
4685 // non-adjacent). | 4782 // non-adjacent). |
4686 intptr_t n = character_ranges->length(); | 4783 int n = character_ranges->length(); |
4687 intptr_t max = character_ranges->At(0).to(); | 4784 int max = character_ranges->at(0).to(); |
4688 intptr_t i = 1; | 4785 int i = 1; |
4689 while (i < n) { | 4786 while (i < n) { |
4690 CharacterRange current = character_ranges->At(i); | 4787 CharacterRange current = character_ranges->at(i); |
4691 if (current.from() <= max + 1) { | 4788 if (current.from() <= max + 1) { |
4692 break; | 4789 break; |
4693 } | 4790 } |
4694 max = current.to(); | 4791 max = current.to(); |
4695 i++; | 4792 i++; |
4696 } | 4793 } |
4697 // Canonical until the i'th range. If that's all of them, we are done. | 4794 // Canonical until the i'th range. If that's all of them, we are done. |
4698 if (i == n) return; | 4795 if (i == n) return; |
4699 | 4796 |
4700 // The ranges at index i and forward are not canonicalized. Make them so by | 4797 // The ranges at index i and forward are not canonicalized. Make them so by |
4701 // doing the equivalent of insertion sort (inserting each into the previous | 4798 // doing the equivalent of insertion sort (inserting each into the previous |
4702 // list, in order). | 4799 // list, in order). |
4703 // Notice that inserting a range can reduce the number of ranges in the | 4800 // Notice that inserting a range can reduce the number of ranges in the |
4704 // result due to combining of adjacent and overlapping ranges. | 4801 // result due to combining of adjacent and overlapping ranges. |
4705 intptr_t read = i; // Range to insert. | 4802 int read = i; // Range to insert. |
4706 intptr_t num_canonical = i; // Length of canonicalized part of list. | 4803 int num_canonical = i; // Length of canonicalized part of list. |
4707 do { | 4804 do { |
4708 num_canonical = InsertRangeInCanonicalList(character_ranges, | 4805 num_canonical = InsertRangeInCanonicalList(character_ranges, |
4709 num_canonical, | 4806 num_canonical, |
4710 character_ranges->At(read)); | 4807 character_ranges->at(read)); |
4711 read++; | 4808 read++; |
4712 } while (read < n); | 4809 } while (read < n); |
4713 character_ranges->TruncateTo(num_canonical); | 4810 character_ranges->Rewind(num_canonical); |
4714 | 4811 |
4715 ASSERT(CharacterRange::IsCanonical(character_ranges)); | 4812 DCHECK(CharacterRange::IsCanonical(character_ranges)); |
4716 } | 4813 } |
4717 | 4814 |
4718 | 4815 |
4719 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, | 4816 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, |
4720 ZoneGrowableArray<CharacterRange>* negated_ranges) { | 4817 ZoneList<CharacterRange>* negated_ranges, |
4721 ASSERT(CharacterRange::IsCanonical(ranges)); | 4818 Zone* zone) { |
4722 ASSERT(negated_ranges->length() == 0); | 4819 DCHECK(CharacterRange::IsCanonical(ranges)); |
4723 intptr_t range_count = ranges->length(); | 4820 DCHECK_EQ(0, negated_ranges->length()); |
4724 uint16_t from = 0; | 4821 int range_count = ranges->length(); |
4725 intptr_t i = 0; | 4822 uc16 from = 0; |
4726 if (range_count > 0 && ranges->At(0).from() == 0) { | 4823 int i = 0; |
4727 from = ranges->At(0).to(); | 4824 if (range_count > 0 && ranges->at(0).from() == 0) { |
| 4825 from = ranges->at(0).to(); |
4728 i = 1; | 4826 i = 1; |
4729 } | 4827 } |
4730 while (i < range_count) { | 4828 while (i < range_count) { |
4731 CharacterRange range = ranges->At(i); | 4829 CharacterRange range = ranges->at(i); |
4732 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); | 4830 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone); |
4733 from = range.to(); | 4831 from = range.to(); |
4734 i++; | 4832 i++; |
4735 } | 4833 } |
4736 if (from < Utf16::kMaxCodeUnit) { | 4834 if (from < String::kMaxUtf16CodeUnit) { |
4737 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); | 4835 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit), |
| 4836 zone); |
4738 } | 4837 } |
4739 } | 4838 } |
4740 | 4839 |
4741 | 4840 |
4742 // ------------------------------------------------------------------- | 4841 // ------------------------------------------------------------------- |
4743 // Splay tree | 4842 // Splay tree |
4744 | 4843 |
4745 | 4844 |
4746 // Workaround for the fact that ZoneGrowableArray does not have contains(). | 4845 OutSet* OutSet::Extend(unsigned value, Zone* zone) { |
4747 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, | 4846 if (Get(value)) |
4748 unsigned value) { | 4847 return this; |
4749 for (intptr_t i = 0; i < array->length(); i++) { | 4848 if (successors(zone) != NULL) { |
4750 if (array->At(i) == value) { | 4849 for (int i = 0; i < successors(zone)->length(); i++) { |
4751 return true; | 4850 OutSet* successor = successors(zone)->at(i); |
| 4851 if (successor->Get(value)) |
| 4852 return successor; |
4752 } | 4853 } |
| 4854 } else { |
| 4855 successors_ = new(zone) ZoneList<OutSet*>(2, zone); |
4753 } | 4856 } |
4754 return false; | 4857 OutSet* result = new(zone) OutSet(first_, remaining_); |
| 4858 result->Set(value, zone); |
| 4859 successors(zone)->Add(result, zone); |
| 4860 return result; |
4755 } | 4861 } |
4756 | 4862 |
4757 | 4863 |
4758 void OutSet::Set(unsigned value, Isolate* isolate) { | 4864 void OutSet::Set(unsigned value, Zone *zone) { |
4759 if (value < kFirstLimit) { | 4865 if (value < kFirstLimit) { |
4760 first_ |= (1 << value); | 4866 first_ |= (1 << value); |
4761 } else { | 4867 } else { |
4762 if (remaining_ == NULL) | 4868 if (remaining_ == NULL) |
4763 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1); | 4869 remaining_ = new(zone) ZoneList<unsigned>(1, zone); |
4764 | 4870 if (remaining_->is_empty() || !remaining_->Contains(value)) |
4765 bool remaining_contains_value = ArrayContains(remaining_, value); | 4871 remaining_->Add(value, zone); |
4766 if (remaining_->is_empty() || !remaining_contains_value) { | |
4767 remaining_->Add(value); | |
4768 } | |
4769 } | 4872 } |
4770 } | 4873 } |
4771 | 4874 |
4772 | 4875 |
4773 bool OutSet::Get(unsigned value) const { | 4876 bool OutSet::Get(unsigned value) const { |
4774 if (value < kFirstLimit) { | 4877 if (value < kFirstLimit) { |
4775 return (first_ & (1 << value)) != 0; | 4878 return (first_ & (1 << value)) != 0; |
4776 } else if (remaining_ == NULL) { | 4879 } else if (remaining_ == NULL) { |
4777 return false; | 4880 return false; |
4778 } else { | 4881 } else { |
4779 return ArrayContains(remaining_, value); | 4882 return remaining_->Contains(value); |
4780 } | 4883 } |
4781 } | 4884 } |
4782 | 4885 |
4783 | 4886 |
| 4887 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar; |
| 4888 |
| 4889 |
| 4890 void DispatchTable::AddRange(CharacterRange full_range, int value, |
| 4891 Zone* zone) { |
| 4892 CharacterRange current = full_range; |
| 4893 if (tree()->is_empty()) { |
| 4894 // If this is the first range we just insert into the table. |
| 4895 ZoneSplayTree<Config>::Locator loc; |
| 4896 DCHECK_RESULT(tree()->Insert(current.from(), &loc)); |
| 4897 loc.set_value(Entry(current.from(), current.to(), |
| 4898 empty()->Extend(value, zone))); |
| 4899 return; |
| 4900 } |
| 4901 // First see if there is a range to the left of this one that |
| 4902 // overlaps. |
| 4903 ZoneSplayTree<Config>::Locator loc; |
| 4904 if (tree()->FindGreatestLessThan(current.from(), &loc)) { |
| 4905 Entry* entry = &loc.value(); |
| 4906 // If we've found a range that overlaps with this one, and it |
| 4907 // starts strictly to the left of this one, we have to fix it |
| 4908 // because the following code only handles ranges that start on |
| 4909 // or after the start point of the range we're adding. |
| 4910 if (entry->from() < current.from() && entry->to() >= current.from()) { |
| 4911 // Snap the overlapping range in half around the start point of |
| 4912 // the range we're adding. |
| 4913 CharacterRange left(entry->from(), current.from() - 1); |
| 4914 CharacterRange right(current.from(), entry->to()); |
| 4915 // The left part of the overlapping range doesn't overlap. |
| 4916 // Truncate the whole entry to be just the left part. |
| 4917 entry->set_to(left.to()); |
| 4918 // The right part is the one that overlaps. We add this part |
| 4919 // to the map and let the next step deal with merging it with |
| 4920 // the range we're adding. |
| 4921 ZoneSplayTree<Config>::Locator loc; |
| 4922 DCHECK_RESULT(tree()->Insert(right.from(), &loc)); |
| 4923 loc.set_value(Entry(right.from(), |
| 4924 right.to(), |
| 4925 entry->out_set())); |
| 4926 } |
| 4927 } |
| 4928 while (current.is_valid()) { |
| 4929 if (tree()->FindLeastGreaterThan(current.from(), &loc) && |
| 4930 (loc.value().from() <= current.to()) && |
| 4931 (loc.value().to() >= current.from())) { |
| 4932 Entry* entry = &loc.value(); |
| 4933 // We have overlap. If there is space between the start point of |
| 4934 // the range we're adding and where the overlapping range starts |
| 4935 // then we have to add a range covering just that space. |
| 4936 if (current.from() < entry->from()) { |
| 4937 ZoneSplayTree<Config>::Locator ins; |
| 4938 DCHECK_RESULT(tree()->Insert(current.from(), &ins)); |
| 4939 ins.set_value(Entry(current.from(), |
| 4940 entry->from() - 1, |
| 4941 empty()->Extend(value, zone))); |
| 4942 current.set_from(entry->from()); |
| 4943 } |
| 4944 DCHECK_EQ(current.from(), entry->from()); |
| 4945 // If the overlapping range extends beyond the one we want to add |
| 4946 // we have to snap the right part off and add it separately. |
| 4947 if (entry->to() > current.to()) { |
| 4948 ZoneSplayTree<Config>::Locator ins; |
| 4949 DCHECK_RESULT(tree()->Insert(current.to() + 1, &ins)); |
| 4950 ins.set_value(Entry(current.to() + 1, |
| 4951 entry->to(), |
| 4952 entry->out_set())); |
| 4953 entry->set_to(current.to()); |
| 4954 } |
| 4955 DCHECK(entry->to() <= current.to()); |
| 4956 // The overlapping range is now completely contained by the range |
| 4957 // we're adding so we can just update it and move the start point |
| 4958 // of the range we're adding just past it. |
| 4959 entry->AddValue(value, zone); |
| 4960 // Bail out if the last interval ended at 0xFFFF since otherwise |
| 4961 // adding 1 will wrap around to 0. |
| 4962 if (entry->to() == String::kMaxUtf16CodeUnit) |
| 4963 break; |
| 4964 DCHECK(entry->to() + 1 > current.from()); |
| 4965 current.set_from(entry->to() + 1); |
| 4966 } else { |
| 4967 // There is no overlap so we can just add the range |
| 4968 ZoneSplayTree<Config>::Locator ins; |
| 4969 DCHECK_RESULT(tree()->Insert(current.from(), &ins)); |
| 4970 ins.set_value(Entry(current.from(), |
| 4971 current.to(), |
| 4972 empty()->Extend(value, zone))); |
| 4973 break; |
| 4974 } |
| 4975 } |
| 4976 } |
| 4977 |
| 4978 |
| 4979 OutSet* DispatchTable::Get(uc16 value) { |
| 4980 ZoneSplayTree<Config>::Locator loc; |
| 4981 if (!tree()->FindGreatestLessThan(value, &loc)) |
| 4982 return empty(); |
| 4983 Entry* entry = &loc.value(); |
| 4984 if (value <= entry->to()) |
| 4985 return entry->out_set(); |
| 4986 else |
| 4987 return empty(); |
| 4988 } |
| 4989 |
| 4990 |
4784 // ------------------------------------------------------------------- | 4991 // ------------------------------------------------------------------- |
4785 // Analysis | 4992 // Analysis |
4786 | 4993 |
4787 | 4994 |
4788 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4995 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| 4996 StackLimitCheck check(that->zone()->isolate()); |
| 4997 if (check.HasOverflowed()) { |
| 4998 fail("Stack overflow"); |
| 4999 return; |
| 5000 } |
4789 if (that->info()->been_analyzed || that->info()->being_analyzed) | 5001 if (that->info()->been_analyzed || that->info()->being_analyzed) |
4790 return; | 5002 return; |
4791 that->info()->being_analyzed = true; | 5003 that->info()->being_analyzed = true; |
4792 that->Accept(this); | 5004 that->Accept(this); |
4793 that->info()->being_analyzed = false; | 5005 that->info()->being_analyzed = false; |
4794 that->info()->been_analyzed = true; | 5006 that->info()->been_analyzed = true; |
4795 } | 5007 } |
4796 | 5008 |
4797 | 5009 |
4798 void Analysis::VisitEnd(EndNode* that) { | 5010 void Analysis::VisitEnd(EndNode* that) { |
4799 // nothing to do | 5011 // nothing to do |
4800 } | 5012 } |
4801 | 5013 |
4802 | 5014 |
4803 void TextNode::CalculateOffsets() { | 5015 void TextNode::CalculateOffsets() { |
4804 intptr_t element_count = elements()->length(); | 5016 int element_count = elements()->length(); |
4805 // Set up the offsets of the elements relative to the start. This is a fixed | 5017 // Set up the offsets of the elements relative to the start. This is a fixed |
4806 // quantity since a TextNode can only contain fixed-width things. | 5018 // quantity since a TextNode can only contain fixed-width things. |
4807 intptr_t cp_offset = 0; | 5019 int cp_offset = 0; |
4808 for (intptr_t i = 0; i < element_count; i++) { | 5020 for (int i = 0; i < element_count; i++) { |
4809 TextElement& elm = (*elements())[i]; | 5021 TextElement& elm = elements()->at(i); |
4810 elm.set_cp_offset(cp_offset); | 5022 elm.set_cp_offset(cp_offset); |
4811 cp_offset += elm.length(); | 5023 cp_offset += elm.length(); |
4812 } | 5024 } |
4813 } | 5025 } |
4814 | 5026 |
4815 | 5027 |
4816 void Analysis::VisitText(TextNode* that) { | 5028 void Analysis::VisitText(TextNode* that) { |
4817 if (ignore_case_) { | 5029 if (ignore_case_) { |
4818 that->MakeCaseIndependent(is_one_byte_); | 5030 that->MakeCaseIndependent(is_one_byte_); |
4819 } | 5031 } |
(...skipping 10 matching lines...) Expand all Loading... |
4830 if (!has_failed()) { | 5042 if (!has_failed()) { |
4831 // If the next node is interested in what it follows then this node | 5043 // If the next node is interested in what it follows then this node |
4832 // has to be interested too so it can pass the information on. | 5044 // has to be interested too so it can pass the information on. |
4833 that->info()->AddFromFollowing(target->info()); | 5045 that->info()->AddFromFollowing(target->info()); |
4834 } | 5046 } |
4835 } | 5047 } |
4836 | 5048 |
4837 | 5049 |
4838 void Analysis::VisitChoice(ChoiceNode* that) { | 5050 void Analysis::VisitChoice(ChoiceNode* that) { |
4839 NodeInfo* info = that->info(); | 5051 NodeInfo* info = that->info(); |
4840 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 5052 for (int i = 0; i < that->alternatives()->length(); i++) { |
4841 RegExpNode* node = (*that->alternatives())[i].node(); | 5053 RegExpNode* node = that->alternatives()->at(i).node(); |
4842 EnsureAnalyzed(node); | 5054 EnsureAnalyzed(node); |
4843 if (has_failed()) return; | 5055 if (has_failed()) return; |
4844 // Anything the following nodes need to know has to be known by | 5056 // Anything the following nodes need to know has to be known by |
4845 // this node also, so it can pass it on. | 5057 // this node also, so it can pass it on. |
4846 info->AddFromFollowing(node->info()); | 5058 info->AddFromFollowing(node->info()); |
4847 } | 5059 } |
4848 } | 5060 } |
4849 | 5061 |
4850 | 5062 |
4851 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { | 5063 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { |
4852 NodeInfo* info = that->info(); | 5064 NodeInfo* info = that->info(); |
4853 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 5065 for (int i = 0; i < that->alternatives()->length(); i++) { |
4854 RegExpNode* node = (*that->alternatives())[i].node(); | 5066 RegExpNode* node = that->alternatives()->at(i).node(); |
4855 if (node != that->loop_node()) { | 5067 if (node != that->loop_node()) { |
4856 EnsureAnalyzed(node); | 5068 EnsureAnalyzed(node); |
4857 if (has_failed()) return; | 5069 if (has_failed()) return; |
4858 info->AddFromFollowing(node->info()); | 5070 info->AddFromFollowing(node->info()); |
4859 } | 5071 } |
4860 } | 5072 } |
4861 // Check the loop last since it may need the value of this node | 5073 // Check the loop last since it may need the value of this node |
4862 // to get a correct result. | 5074 // to get a correct result. |
4863 EnsureAnalyzed(that->loop_node()); | 5075 EnsureAnalyzed(that->loop_node()); |
4864 if (!has_failed()) { | 5076 if (!has_failed()) { |
4865 info->AddFromFollowing(that->loop_node()->info()); | 5077 info->AddFromFollowing(that->loop_node()->info()); |
4866 } | 5078 } |
4867 } | 5079 } |
4868 | 5080 |
4869 | 5081 |
4870 void Analysis::VisitBackReference(BackReferenceNode* that) { | 5082 void Analysis::VisitBackReference(BackReferenceNode* that) { |
4871 EnsureAnalyzed(that->on_success()); | 5083 EnsureAnalyzed(that->on_success()); |
4872 } | 5084 } |
4873 | 5085 |
4874 | 5086 |
4875 void Analysis::VisitAssertion(AssertionNode* that) { | 5087 void Analysis::VisitAssertion(AssertionNode* that) { |
4876 EnsureAnalyzed(that->on_success()); | 5088 EnsureAnalyzed(that->on_success()); |
4877 } | 5089 } |
4878 | 5090 |
4879 | 5091 |
4880 void BackReferenceNode::FillInBMInfo(intptr_t offset, | 5092 void BackReferenceNode::FillInBMInfo(int offset, |
4881 intptr_t budget, | 5093 int budget, |
4882 BoyerMooreLookahead* bm, | 5094 BoyerMooreLookahead* bm, |
4883 bool not_at_start) { | 5095 bool not_at_start) { |
4884 // Working out the set of characters that a backreference can match is too | 5096 // Working out the set of characters that a backreference can match is too |
4885 // hard, so we just say that any character can match. | 5097 // hard, so we just say that any character can match. |
4886 bm->SetRest(offset); | 5098 bm->SetRest(offset); |
4887 SaveBMInfo(bm, not_at_start, offset); | 5099 SaveBMInfo(bm, not_at_start, offset); |
4888 } | 5100 } |
4889 | 5101 |
4890 | 5102 |
4891 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == | 5103 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
4892 RegExpMacroAssembler::kTableSize); | 5104 RegExpMacroAssembler::kTableSize); |
4893 | 5105 |
4894 | 5106 |
4895 void ChoiceNode::FillInBMInfo(intptr_t offset, | 5107 void ChoiceNode::FillInBMInfo(int offset, |
4896 intptr_t budget, | 5108 int budget, |
4897 BoyerMooreLookahead* bm, | 5109 BoyerMooreLookahead* bm, |
4898 bool not_at_start) { | 5110 bool not_at_start) { |
4899 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); | 5111 ZoneList<GuardedAlternative>* alts = alternatives(); |
4900 budget = (budget - 1) / alts->length(); | 5112 budget = (budget - 1) / alts->length(); |
4901 for (intptr_t i = 0; i < alts->length(); i++) { | 5113 for (int i = 0; i < alts->length(); i++) { |
4902 GuardedAlternative& alt = (*alts)[i]; | 5114 GuardedAlternative& alt = alts->at(i); |
4903 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 5115 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
4904 bm->SetRest(offset); // Give up trying to fill in info. | 5116 bm->SetRest(offset); // Give up trying to fill in info. |
4905 SaveBMInfo(bm, not_at_start, offset); | 5117 SaveBMInfo(bm, not_at_start, offset); |
4906 return; | 5118 return; |
4907 } | 5119 } |
4908 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); | 5120 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); |
4909 } | 5121 } |
4910 SaveBMInfo(bm, not_at_start, offset); | 5122 SaveBMInfo(bm, not_at_start, offset); |
4911 } | 5123 } |
4912 | 5124 |
4913 | 5125 |
4914 void TextNode::FillInBMInfo(intptr_t initial_offset, | 5126 void TextNode::FillInBMInfo(int initial_offset, |
4915 intptr_t budget, | 5127 int budget, |
4916 BoyerMooreLookahead* bm, | 5128 BoyerMooreLookahead* bm, |
4917 bool not_at_start) { | 5129 bool not_at_start) { |
4918 if (initial_offset >= bm->length()) return; | 5130 if (initial_offset >= bm->length()) return; |
4919 intptr_t offset = initial_offset; | 5131 int offset = initial_offset; |
4920 intptr_t max_char = bm->max_char(); | 5132 int max_char = bm->max_char(); |
4921 for (intptr_t i = 0; i < elements()->length(); i++) { | 5133 for (int i = 0; i < elements()->length(); i++) { |
4922 if (offset >= bm->length()) { | 5134 if (offset >= bm->length()) { |
4923 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5135 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4924 return; | 5136 return; |
4925 } | 5137 } |
4926 TextElement text = elements()->At(i); | 5138 TextElement text = elements()->at(i); |
4927 if (text.text_type() == TextElement::ATOM) { | 5139 if (text.text_type() == TextElement::ATOM) { |
4928 RegExpAtom* atom = text.atom(); | 5140 RegExpAtom* atom = text.atom(); |
4929 for (intptr_t j = 0; j < atom->length(); j++, offset++) { | 5141 for (int j = 0; j < atom->length(); j++, offset++) { |
4930 if (offset >= bm->length()) { | 5142 if (offset >= bm->length()) { |
4931 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5143 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4932 return; | 5144 return; |
4933 } | 5145 } |
4934 uint16_t character = atom->data()->At(j); | 5146 uc16 character = atom->data()[j]; |
4935 if (bm->compiler()->ignore_case()) { | 5147 if (bm->compiler()->ignore_case()) { |
4936 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4937 intptr_t length = GetCaseIndependentLetters( | 5149 int length = GetCaseIndependentLetters( |
| 5150 Isolate::Current(), |
4938 character, | 5151 character, |
4939 bm->max_char() == Symbols::kMaxOneCharCodeSymbol, | 5152 bm->max_char() == String::kMaxOneByteCharCode, |
4940 chars); | 5153 chars); |
4941 for (intptr_t j = 0; j < length; j++) { | 5154 for (int j = 0; j < length; j++) { |
4942 bm->Set(offset, chars[j]); | 5155 bm->Set(offset, chars[j]); |
4943 } | 5156 } |
4944 } else { | 5157 } else { |
4945 if (character <= max_char) bm->Set(offset, character); | 5158 if (character <= max_char) bm->Set(offset, character); |
4946 } | 5159 } |
4947 } | 5160 } |
4948 } else { | 5161 } else { |
4949 ASSERT(text.text_type() == TextElement::CHAR_CLASS); | 5162 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); |
4950 RegExpCharacterClass* char_class = text.char_class(); | 5163 RegExpCharacterClass* char_class = text.char_class(); |
4951 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); | 5164 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
4952 if (char_class->is_negated()) { | 5165 if (char_class->is_negated()) { |
4953 bm->SetAll(offset); | 5166 bm->SetAll(offset); |
4954 } else { | 5167 } else { |
4955 for (intptr_t k = 0; k < ranges->length(); k++) { | 5168 for (int k = 0; k < ranges->length(); k++) { |
4956 CharacterRange& range = (*ranges)[k]; | 5169 CharacterRange& range = ranges->at(k); |
4957 if (range.from() > max_char) continue; | 5170 if (range.from() > max_char) continue; |
4958 intptr_t to = Utils::Minimum(max_char, | 5171 int to = Min(max_char, static_cast<int>(range.to())); |
4959 static_cast<intptr_t>(range.to())); | |
4960 bm->SetInterval(offset, Interval(range.from(), to)); | 5172 bm->SetInterval(offset, Interval(range.from(), to)); |
4961 } | 5173 } |
4962 } | 5174 } |
4963 offset++; | 5175 offset++; |
4964 } | 5176 } |
4965 } | 5177 } |
4966 if (offset >= bm->length()) { | 5178 if (offset >= bm->length()) { |
4967 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5179 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4968 return; | 5180 return; |
4969 } | 5181 } |
4970 on_success()->FillInBMInfo(offset, | 5182 on_success()->FillInBMInfo(offset, |
4971 budget - 1, | 5183 budget - 1, |
4972 bm, | 5184 bm, |
4973 true); // Not at start after a text node. | 5185 true); // Not at start after a text node. |
4974 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5186 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4975 } | 5187 } |
4976 | 5188 |
4977 | 5189 |
| 5190 // ------------------------------------------------------------------- |
| 5191 // Dispatch table construction |
| 5192 |
| 5193 |
| 5194 void DispatchTableConstructor::VisitEnd(EndNode* that) { |
| 5195 AddRange(CharacterRange::Everything()); |
| 5196 } |
| 5197 |
| 5198 |
| 5199 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { |
| 5200 node->set_being_calculated(true); |
| 5201 ZoneList<GuardedAlternative>* alternatives = node->alternatives(); |
| 5202 for (int i = 0; i < alternatives->length(); i++) { |
| 5203 set_choice_index(i); |
| 5204 alternatives->at(i).node()->Accept(this); |
| 5205 } |
| 5206 node->set_being_calculated(false); |
| 5207 } |
| 5208 |
| 5209 |
| 5210 class AddDispatchRange { |
| 5211 public: |
| 5212 explicit AddDispatchRange(DispatchTableConstructor* constructor) |
| 5213 : constructor_(constructor) { } |
| 5214 void Call(uc32 from, DispatchTable::Entry entry); |
| 5215 private: |
| 5216 DispatchTableConstructor* constructor_; |
| 5217 }; |
| 5218 |
| 5219 |
| 5220 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { |
| 5221 CharacterRange range(from, entry.to()); |
| 5222 constructor_->AddRange(range); |
| 5223 } |
| 5224 |
| 5225 |
| 5226 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) { |
| 5227 if (node->being_calculated()) |
| 5228 return; |
| 5229 DispatchTable* table = node->GetTable(ignore_case_); |
| 5230 AddDispatchRange adder(this); |
| 5231 table->ForEach(&adder); |
| 5232 } |
| 5233 |
| 5234 |
| 5235 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) { |
| 5236 // TODO(160): Find the node that we refer back to and propagate its start |
| 5237 // set back to here. For now we just accept anything. |
| 5238 AddRange(CharacterRange::Everything()); |
| 5239 } |
| 5240 |
| 5241 |
| 5242 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { |
| 5243 RegExpNode* target = that->on_success(); |
| 5244 target->Accept(this); |
| 5245 } |
| 5246 |
| 5247 |
| 5248 static int CompareRangeByFrom(const CharacterRange* a, |
| 5249 const CharacterRange* b) { |
| 5250 return Compare<uc16>(a->from(), b->from()); |
| 5251 } |
| 5252 |
| 5253 |
| 5254 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { |
| 5255 ranges->Sort(CompareRangeByFrom); |
| 5256 uc16 last = 0; |
| 5257 for (int i = 0; i < ranges->length(); i++) { |
| 5258 CharacterRange range = ranges->at(i); |
| 5259 if (last < range.from()) |
| 5260 AddRange(CharacterRange(last, range.from() - 1)); |
| 5261 if (range.to() >= last) { |
| 5262 if (range.to() == String::kMaxUtf16CodeUnit) { |
| 5263 return; |
| 5264 } else { |
| 5265 last = range.to() + 1; |
| 5266 } |
| 5267 } |
| 5268 } |
| 5269 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); |
| 5270 } |
| 5271 |
| 5272 |
| 5273 void DispatchTableConstructor::VisitText(TextNode* that) { |
| 5274 TextElement elm = that->elements()->at(0); |
| 5275 switch (elm.text_type()) { |
| 5276 case TextElement::ATOM: { |
| 5277 uc16 c = elm.atom()->data()[0]; |
| 5278 AddRange(CharacterRange(c, c)); |
| 5279 break; |
| 5280 } |
| 5281 case TextElement::CHAR_CLASS: { |
| 5282 RegExpCharacterClass* tree = elm.char_class(); |
| 5283 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); |
| 5284 if (tree->is_negated()) { |
| 5285 AddInverse(ranges); |
| 5286 } else { |
| 5287 for (int i = 0; i < ranges->length(); i++) |
| 5288 AddRange(ranges->at(i)); |
| 5289 } |
| 5290 break; |
| 5291 } |
| 5292 default: { |
| 5293 UNIMPLEMENTED(); |
| 5294 } |
| 5295 } |
| 5296 } |
| 5297 |
| 5298 |
| 5299 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 5300 RegExpNode* target = that->on_success(); |
| 5301 target->Accept(this); |
| 5302 } |
| 5303 |
| 5304 |
4978 RegExpEngine::CompilationResult RegExpEngine::Compile( | 5305 RegExpEngine::CompilationResult RegExpEngine::Compile( |
4979 RegExpCompileData* data, | 5306 RegExpCompileData* data, bool ignore_case, bool is_global, |
4980 const ParsedFunction* parsed_function, | 5307 bool is_multiline, bool is_sticky, Handle<String> pattern, |
4981 const ZoneGrowableArray<const ICData*>& ic_data_array) { | 5308 Handle<String> sample_subject, bool is_one_byte, Zone* zone) { |
4982 Isolate* isolate = Isolate::Current(); | 5309 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 5310 return IrregexpRegExpTooBig(zone->isolate()); |
| 5311 } |
| 5312 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone); |
4983 | 5313 |
4984 const Function& function = parsed_function->function(); | 5314 // Sample some characters from the middle of the string. |
4985 const intptr_t specialization_cid = function.regexp_cid(); | 5315 static const int kSampleSize = 128; |
4986 const bool is_one_byte = (specialization_cid == kOneByteStringCid || | |
4987 specialization_cid == kExternalOneByteStringCid); | |
4988 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp()); | |
4989 const String& pattern = String::Handle(isolate, regexp.pattern()); | |
4990 | 5316 |
4991 ASSERT(!regexp.IsNull()); | 5317 sample_subject = String::Flatten(sample_subject); |
4992 ASSERT(!pattern.IsNull()); | 5318 int chars_sampled = 0; |
4993 | 5319 int half_way = (sample_subject->length() - kSampleSize) / 2; |
4994 const bool ignore_case = regexp.is_ignore_case(); | 5320 for (int i = Max(0, half_way); |
4995 const bool is_global = regexp.is_global(); | 5321 i < sample_subject->length() && chars_sampled < kSampleSize; |
4996 | 5322 i++, chars_sampled++) { |
4997 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); | 5323 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i)); |
4998 | 5324 } |
4999 // TODO(zerny): Frequency sampling is currently disabled because of several | |
5000 // issues. We do not want to store subject strings in the regexp object since | |
5001 // they might be long and we should not prevent their garbage collection. | |
5002 // Passing them to this function explicitly does not help, since we must | |
5003 // generate exactly the same IR for both the unoptimizing and optimizing | |
5004 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | |
5005 // An option would be to store sampling results in the regexp object, but | |
5006 // I'm not sure the performance gains are relevant enough. | |
5007 | 5325 |
5008 // Wrap the body of the regexp in capture #0. | 5326 // Wrap the body of the regexp in capture #0. |
5009 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 5327 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
5010 0, | 5328 0, |
5011 &compiler, | 5329 &compiler, |
5012 compiler.accept()); | 5330 compiler.accept()); |
5013 | |
5014 RegExpNode* node = captured_body; | 5331 RegExpNode* node = captured_body; |
5015 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 5332 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
5016 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 5333 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
5017 intptr_t max_length = data->tree->max_match(); | 5334 int max_length = data->tree->max_match(); |
5018 if (!is_start_anchored) { | 5335 if (!is_start_anchored && !is_sticky) { |
5019 // Add a .*? at the beginning, outside the body capture, unless | 5336 // Add a .*? at the beginning, outside the body capture, unless |
5020 // this expression is anchored at the beginning. | 5337 // this expression is anchored at the beginning or sticky. |
5021 RegExpNode* loop_node = | 5338 RegExpNode* loop_node = |
5022 RegExpQuantifier::ToNode(0, | 5339 RegExpQuantifier::ToNode(0, |
5023 RegExpTree::kInfinity, | 5340 RegExpTree::kInfinity, |
5024 false, | 5341 false, |
5025 new(isolate) RegExpCharacterClass('*'), | 5342 new(zone) RegExpCharacterClass('*'), |
5026 &compiler, | 5343 &compiler, |
5027 captured_body, | 5344 captured_body, |
5028 data->contains_anchor); | 5345 data->contains_anchor); |
5029 | 5346 |
5030 if (data->contains_anchor) { | 5347 if (data->contains_anchor) { |
5031 // Unroll loop once, to take care of the case that might start | 5348 // Unroll loop once, to take care of the case that might start |
5032 // at the start of input. | 5349 // at the start of input. |
5033 ChoiceNode* first_step_node = new(isolate) ChoiceNode(2, isolate); | 5350 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
5034 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 5351 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5035 first_step_node->AddAlternative(GuardedAlternative( | 5352 first_step_node->AddAlternative(GuardedAlternative( |
5036 new(isolate) TextNode( | 5353 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); |
5037 new(isolate) RegExpCharacterClass('*'), loop_node))); | |
5038 node = first_step_node; | 5354 node = first_step_node; |
5039 } else { | 5355 } else { |
5040 node = loop_node; | 5356 node = loop_node; |
5041 } | 5357 } |
5042 } | 5358 } |
5043 if (is_one_byte) { | 5359 if (is_one_byte) { |
5044 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 5360 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5045 // Do it again to propagate the new nodes to places where they were not | 5361 // Do it again to propagate the new nodes to places where they were not |
5046 // put because they had not been calculated yet. | 5362 // put because they had not been calculated yet. |
5047 if (node != NULL) { | 5363 if (node != NULL) { |
5048 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 5364 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5049 } | 5365 } |
5050 } | 5366 } |
5051 | 5367 |
5052 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate); | 5368 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
5053 data->node = node; | 5369 data->node = node; |
5054 Analysis analysis(ignore_case, is_one_byte); | 5370 Analysis analysis(ignore_case, is_one_byte); |
5055 analysis.EnsureAnalyzed(node); | 5371 analysis.EnsureAnalyzed(node); |
5056 if (analysis.has_failed()) { | 5372 if (analysis.has_failed()) { |
5057 const char* error_message = analysis.error_message(); | 5373 const char* error_message = analysis.error_message(); |
5058 return CompilationResult(error_message); | 5374 return CompilationResult(zone->isolate(), error_message); |
5059 } | 5375 } |
5060 | 5376 |
| 5377 // Create the correct assembler for the architecture. |
| 5378 #ifndef V8_INTERPRETED_REGEXP |
5061 // Native regexp implementation. | 5379 // Native regexp implementation. |
5062 | 5380 |
5063 IRRegExpMacroAssembler* macro_assembler = | 5381 NativeRegExpMacroAssembler::Mode mode = |
5064 new(isolate) IRRegExpMacroAssembler(specialization_cid, | 5382 is_one_byte ? NativeRegExpMacroAssembler::LATIN1 |
5065 data->capture_count, | 5383 : NativeRegExpMacroAssembler::UC16; |
5066 parsed_function, | 5384 |
5067 ic_data_array, | 5385 #if V8_TARGET_ARCH_IA32 |
5068 isolate); | 5386 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5387 zone); |
| 5388 #elif V8_TARGET_ARCH_X64 |
| 5389 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5390 zone); |
| 5391 #elif V8_TARGET_ARCH_ARM |
| 5392 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5393 zone); |
| 5394 #elif V8_TARGET_ARCH_ARM64 |
| 5395 RegExpMacroAssemblerARM64 macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5396 zone); |
| 5397 #elif V8_TARGET_ARCH_MIPS |
| 5398 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5399 zone); |
| 5400 #elif V8_TARGET_ARCH_MIPS64 |
| 5401 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5402 zone); |
| 5403 #elif V8_TARGET_ARCH_X87 |
| 5404 RegExpMacroAssemblerX87 macro_assembler(mode, (data->capture_count + 1) * 2, |
| 5405 zone); |
| 5406 #else |
| 5407 #error "Unsupported architecture" |
| 5408 #endif |
| 5409 |
| 5410 #else // V8_INTERPRETED_REGEXP |
| 5411 // Interpreted regexp implementation. |
| 5412 EmbeddedVector<byte, 1024> codes; |
| 5413 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone); |
| 5414 #endif // V8_INTERPRETED_REGEXP |
5069 | 5415 |
5070 // Inserted here, instead of in Assembler, because it depends on information | 5416 // Inserted here, instead of in Assembler, because it depends on information |
5071 // in the AST that isn't replicated in the Node structure. | 5417 // in the AST that isn't replicated in the Node structure. |
5072 static const intptr_t kMaxBacksearchLimit = 1024; | 5418 static const int kMaxBacksearchLimit = 1024; |
5073 if (is_end_anchored && | 5419 if (is_end_anchored && |
5074 !is_start_anchored && | 5420 !is_start_anchored && |
5075 max_length < kMaxBacksearchLimit) { | 5421 max_length < kMaxBacksearchLimit) { |
5076 macro_assembler->SetCurrentPositionFromEnd(max_length); | 5422 macro_assembler.SetCurrentPositionFromEnd(max_length); |
5077 } | 5423 } |
5078 | 5424 |
5079 if (is_global) { | 5425 if (is_global) { |
5080 macro_assembler->set_global_mode( | 5426 macro_assembler.set_global_mode( |
5081 (data->tree->min_match() > 0) | 5427 (data->tree->min_match() > 0) |
5082 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 5428 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
5083 : RegExpMacroAssembler::GLOBAL); | 5429 : RegExpMacroAssembler::GLOBAL); |
5084 } | 5430 } |
5085 | 5431 |
5086 RegExpEngine::CompilationResult result = | 5432 return compiler.Assemble(¯o_assembler, |
5087 compiler.Assemble(macro_assembler, | 5433 node, |
5088 node, | 5434 data->capture_count, |
5089 data->capture_count, | 5435 pattern); |
5090 pattern); | |
5091 | |
5092 if (FLAG_trace_irregexp) { | |
5093 macro_assembler->PrintBlocks(); | |
5094 } | |
5095 | |
5096 return result; | |
5097 } | 5436 } |
5098 | 5437 |
5099 | |
5100 static void CreateSpecializedFunction(Isolate* isolate, | |
5101 const JSRegExp& regexp, | |
5102 intptr_t specialization_cid, | |
5103 const Object& owner) { | |
5104 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | |
5105 | |
5106 Function& fn = Function::Handle(isolate, Function::New( | |
5107 Symbols::IrregExp(), | |
5108 RawFunction::kIrregexpFunction, | |
5109 true, // Static. | |
5110 false, // Not const. | |
5111 false, // Not abstract. | |
5112 false, // Not external. | |
5113 false, // Not native. | |
5114 owner, | |
5115 0)); // No token position. | |
5116 | |
5117 // TODO(zerny): Share these arrays between all irregexp functions. | |
5118 fn.set_num_fixed_parameters(kParamCount); | |
5119 fn.set_parameter_types(Array::Handle(isolate, Array::New(kParamCount, | |
5120 Heap::kOld))); | |
5121 fn.set_parameter_names(Array::Handle(isolate, Array::New(kParamCount, | |
5122 Heap::kOld))); | |
5123 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType())); | |
5124 fn.SetParameterNameAt(0, Symbols::string_param()); | |
5125 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType())); | |
5126 fn.SetParameterNameAt(1, Symbols::start_index_param()); | |
5127 fn.set_result_type(Type::Handle(isolate, Type::ArrayType())); | |
5128 | |
5129 // Cache the result. | |
5130 regexp.set_function(specialization_cid, fn); | |
5131 | |
5132 fn.set_regexp(regexp); | |
5133 fn.set_regexp_cid(specialization_cid); | |
5134 | |
5135 // The function is compiled lazily during the first call. | |
5136 } | |
5137 | |
5138 | |
5139 RawJSRegExp* RegExpEngine::CreateJSRegExp(Isolate* isolate, | |
5140 const String& pattern, | |
5141 bool multi_line, | |
5142 bool ignore_case) { | |
5143 const JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0)); | |
5144 | |
5145 regexp.set_pattern(pattern); | |
5146 | |
5147 if (multi_line) { | |
5148 regexp.set_is_multi_line(); | |
5149 } | |
5150 if (ignore_case) { | |
5151 regexp.set_is_ignore_case(); | |
5152 } | |
5153 | |
5154 // TODO(zerny): We might want to use normal string searching algorithms | |
5155 // for simple patterns. | |
5156 regexp.set_is_complex(); | |
5157 regexp.set_is_global(); // All dart regexps are global. | |
5158 | |
5159 const Library& lib = Library::Handle(isolate, Library::CoreLibrary()); | |
5160 const Class& owner = Class::Handle( | |
5161 isolate, lib.LookupClass(Symbols::RegExp())); | |
5162 | |
5163 CreateSpecializedFunction(isolate, regexp, kOneByteStringCid, owner); | |
5164 CreateSpecializedFunction(isolate, regexp, kTwoByteStringCid, owner); | |
5165 CreateSpecializedFunction(isolate, regexp, kExternalOneByteStringCid, owner); | |
5166 CreateSpecializedFunction(isolate, regexp, kExternalTwoByteStringCid, owner); | |
5167 | |
5168 return regexp.raw(); | |
5169 } | |
5170 | |
5171 | |
5172 } // namespace dart | 5438 } // namespace dart |
OLD | NEW |