OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
6 | 6 |
7 // SNIP | 7 #include "vm/dart_entry.h" |
| 8 #include "vm/regexp_assembler.h" |
| 9 #include "vm/regexp_ast.h" |
| 10 #include "vm/unibrow-inl.h" |
| 11 #include "vm/unicode.h" |
| 12 #include "vm/symbols.h" |
| 13 |
| 14 #define I (isolate()) |
| 15 #define CI (compiler->isolate()) |
8 | 16 |
9 namespace dart { | 17 namespace dart { |
10 | 18 |
11 // SNIP | 19 DECLARE_FLAG(bool, trace_irregexp); |
| 20 |
| 21 // Default to generating optimized regexp code. |
| 22 static const bool kRegexpOptimization = true; |
| 23 |
| 24 // More makes code generation slower, less makes V8 benchmark score lower. |
| 25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; |
| 26 |
| 27 ContainedInLattice AddRange(ContainedInLattice containment, |
| 28 const intptr_t* ranges, |
| 29 intptr_t ranges_length, |
| 30 Interval new_range) { |
| 31 ASSERT((ranges_length & 1) == 1); |
| 32 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1); |
| 33 if (containment == kLatticeUnknown) return containment; |
| 34 bool inside = false; |
| 35 intptr_t last = 0; |
| 36 for (intptr_t i = 0; i < ranges_length; |
| 37 inside = !inside, last = ranges[i], i++) { |
| 38 // Consider the range from last to ranges[i]. |
| 39 // We haven't got to the new range yet. |
| 40 if (ranges[i] <= new_range.from()) continue; |
| 41 // New range is wholly inside last-ranges[i]. Note that new_range.to() is |
| 42 // inclusive, but the values in ranges are not. |
| 43 if (last <= new_range.from() && new_range.to() < ranges[i]) { |
| 44 return Combine(containment, inside ? kLatticeIn : kLatticeOut); |
| 45 } |
| 46 return kLatticeUnknown; |
| 47 } |
| 48 return containment; |
| 49 } |
12 | 50 |
13 // ------------------------------------------------------------------- | 51 // ------------------------------------------------------------------- |
14 // Implementation of the Irregexp regular expression engine. | 52 // Implementation of the Irregexp regular expression engine. |
15 // | 53 // |
16 // The Irregexp regular expression engine is intended to be a complete | 54 // The Irregexp regular expression engine is intended to be a complete |
17 // implementation of ECMAScript regular expressions. It generates either | 55 // implementation of ECMAScript regular expressions. It generates |
18 // bytecodes or native code. | 56 // IR code that is subsequently compiled to native code. |
19 | 57 |
20 // The Irregexp regexp engine is structured in three steps. | 58 // The Irregexp regexp engine is structured in three steps. |
21 // 1) The parser generates an abstract syntax tree. See ast.cc. | 59 // 1) The parser generates an abstract syntax tree. See regexp_ast.cc. |
22 // 2) From the AST a node network is created. The nodes are all | 60 // 2) From the AST a node network is created. The nodes are all |
23 // subclasses of RegExpNode. The nodes represent states when | 61 // subclasses of RegExpNode. The nodes represent states when |
24 // executing a regular expression. Several optimizations are | 62 // executing a regular expression. Several optimizations are |
25 // performed on the node network. | 63 // performed on the node network. |
26 // 3) From the nodes we generate either byte codes or native code | 64 // 3) From the nodes we generate IR instructions that can actually |
27 // that can actually execute the regular expression (perform | 65 // execute the regular expression (perform the search). The |
28 // the search). The code generation step is described in more | 66 // code generation step is described in more detail below. |
29 // detail below. | |
30 | 67 |
31 // Code generation. | 68 // Code generation. |
32 // | 69 // |
33 // The nodes are divided into four main categories. | 70 // The nodes are divided into four main categories. |
34 // * Choice nodes | 71 // * Choice nodes |
35 // These represent places where the regular expression can | 72 // These represent places where the regular expression can |
36 // match in more than one way. For example on entry to an | 73 // match in more than one way. For example on entry to an |
37 // alternation (foo|bar) or a repetition (*, +, ? or {}). | 74 // alternation (foo|bar) or a repetition (*, +, ? or {}). |
38 // * Action nodes | 75 // * Action nodes |
39 // These represent places where some action should be | 76 // These represent places where some action should be |
40 // performed. Examples include recording the current position | 77 // performed. Examples include recording the current position |
41 // in the input string to a register (in order to implement | 78 // in the input string to a register (in order to implement |
42 // captures) or other actions on register for example in order | 79 // captures) or other actions on register for example in order |
43 // to implement the counters needed for {} repetitions. | 80 // to implement the counters needed for {} repetitions. |
44 // * Matching nodes | 81 // * Matching nodes |
45 // These attempt to match some element part of the input string. | 82 // These attempt to match some element part of the input string. |
46 // Examples of elements include character classes, plain strings | 83 // Examples of elements include character classes, plain strings |
47 // or back references. | 84 // or back references. |
48 // * End nodes | 85 // * End nodes |
49 // These are used to implement the actions required on finding | 86 // These are used to implement the actions required on finding |
50 // a successful match or failing to find a match. | 87 // a successful match or failing to find a match. |
51 // | 88 // |
52 // The code generated (whether as byte codes or native code) maintains | 89 // The code generated maintains some state as it runs. This consists of the |
53 // some state as it runs. This consists of the following elements: | 90 // following elements: |
54 // | 91 // |
55 // * The capture registers. Used for string captures. | 92 // * The capture registers. Used for string captures. |
56 // * Other registers. Used for counters etc. | 93 // * Other registers. Used for counters etc. |
57 // * The current position. | 94 // * The current position. |
58 // * The stack of backtracking information. Used when a matching node | 95 // * The stack of backtracking information. Used when a matching node |
59 // fails to find a match and needs to try an alternative. | 96 // fails to find a match and needs to try an alternative. |
60 // | 97 // |
61 // Conceptual regular expression execution model: | 98 // Conceptual regular expression execution model: |
62 // | 99 // |
63 // There is a simple conceptual model of regular expression execution | 100 // There is a simple conceptual model of regular expression execution |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
153 // to that trace. The code generator therefore has the ability to generate | 190 // to that trace. The code generator therefore has the ability to generate |
154 // code for each node several times. In order to limit the size of the | 191 // code for each node several times. In order to limit the size of the |
155 // generated code there is an arbitrary limit on how many specialized sets of | 192 // generated code there is an arbitrary limit on how many specialized sets of |
156 // code may be generated for a given node. If the limit is reached, the | 193 // code may be generated for a given node. If the limit is reached, the |
157 // trace is flushed and a generic version of the code for a node is emitted. | 194 // trace is flushed and a generic version of the code for a node is emitted. |
158 // This is subsequently used for that node. The code emitted for non-generic | 195 // This is subsequently used for that node. The code emitted for non-generic |
159 // trace is not recorded in the node and so it cannot currently be reused in | 196 // trace is not recorded in the node and so it cannot currently be reused in |
160 // the event that code generation is requested for an identical trace. | 197 // the event that code generation is requested for an identical trace. |
161 | 198 |
162 | 199 |
163 void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { | 200 void RegExpTree::AppendToText(RegExpText* text) { |
164 UNREACHABLE(); | 201 UNREACHABLE(); |
165 } | 202 } |
166 | 203 |
167 | 204 |
168 void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) { | 205 void RegExpAtom::AppendToText(RegExpText* text) { |
169 text->AddElement(TextElement::Atom(this), zone); | 206 text->AddElement(TextElement::Atom(this)); |
170 } | 207 } |
171 | 208 |
172 | 209 |
173 void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) { | 210 void RegExpCharacterClass::AppendToText(RegExpText* text) { |
174 text->AddElement(TextElement::CharClass(this), zone); | 211 text->AddElement(TextElement::CharClass(this)); |
175 } | 212 } |
176 | 213 |
177 | 214 |
178 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { | 215 void RegExpText::AppendToText(RegExpText* text) { |
179 for (int i = 0; i < elements()->length(); i++) | 216 for (intptr_t i = 0; i < elements()->length(); i++) |
180 text->AddElement(elements()->at(i), zone); | 217 text->AddElement((*elements())[i]); |
181 } | 218 } |
182 | 219 |
183 | 220 |
184 TextElement TextElement::Atom(RegExpAtom* atom) { | 221 TextElement TextElement::Atom(RegExpAtom* atom) { |
185 return TextElement(ATOM, atom); | 222 return TextElement(ATOM, atom); |
186 } | 223 } |
187 | 224 |
188 | 225 |
189 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { | 226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
190 return TextElement(CHAR_CLASS, char_class); | 227 return TextElement(CHAR_CLASS, char_class); |
191 } | 228 } |
192 | 229 |
193 | 230 |
194 int TextElement::length() const { | 231 intptr_t TextElement::length() const { |
195 switch (text_type()) { | 232 switch (text_type()) { |
196 case ATOM: | 233 case ATOM: |
197 return atom()->length(); | 234 return atom()->length(); |
198 | 235 |
199 case CHAR_CLASS: | 236 case CHAR_CLASS: |
200 return 1; | 237 return 1; |
201 } | 238 } |
202 UNREACHABLE(); | 239 UNREACHABLE(); |
203 return 0; | 240 return 0; |
204 } | 241 } |
205 | 242 |
206 | 243 |
207 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { | 244 class FrequencyCollator : public ValueObject { |
208 if (table_ == NULL) { | |
209 table_ = new(zone()) DispatchTable(zone()); | |
210 DispatchTableConstructor cons(table_, ignore_case, zone()); | |
211 cons.BuildTable(this); | |
212 } | |
213 return table_; | |
214 } | |
215 | |
216 | |
217 class FrequencyCollator { | |
218 public: | 245 public: |
219 FrequencyCollator() : total_samples_(0) { | 246 FrequencyCollator() : total_samples_(0) { |
220 for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) { | 247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { |
221 frequencies_[i] = CharacterFrequency(i); | 248 frequencies_[i] = CharacterFrequency(i); |
222 } | 249 } |
223 } | 250 } |
224 | 251 |
225 void CountCharacter(int character) { | 252 void CountCharacter(intptr_t character) { |
226 int index = (character & RegExpMacroAssembler::kTableMask); | 253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); |
227 frequencies_[index].Increment(); | 254 frequencies_[index].Increment(); |
228 total_samples_++; | 255 total_samples_++; |
229 } | 256 } |
230 | 257 |
231 // Does not measure in percent, but rather per-128 (the table size from the | 258 // Does not measure in percent, but rather per-128 (the table size from the |
232 // regexp macro assembler). | 259 // regexp macro assembler). |
233 int Frequency(int in_character) { | 260 intptr_t Frequency(intptr_t in_character) { |
234 DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character); | 261 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); |
235 if (total_samples_ < 1) return 1; // Division by zero. | 262 if (total_samples_ < 1) return 1; // Division by zero. |
236 int freq_in_per128 = | 263 intptr_t freq_in_per128 = |
237 (frequencies_[in_character].counter() * 128) / total_samples_; | 264 (frequencies_[in_character].counter() * 128) / total_samples_; |
238 return freq_in_per128; | 265 return freq_in_per128; |
239 } | 266 } |
240 | 267 |
241 private: | 268 private: |
242 class CharacterFrequency { | 269 class CharacterFrequency { |
243 public: | 270 public: |
244 CharacterFrequency() : counter_(0), character_(-1) { } | 271 CharacterFrequency() : counter_(0), character_(-1) { } |
245 explicit CharacterFrequency(int character) | 272 explicit CharacterFrequency(intptr_t character) |
246 : counter_(0), character_(character) { } | 273 : counter_(0), character_(character) { } |
247 | 274 |
248 void Increment() { counter_++; } | 275 void Increment() { counter_++; } |
249 int counter() { return counter_; } | 276 intptr_t counter() { return counter_; } |
250 int character() { return character_; } | 277 intptr_t character() { return character_; } |
251 | 278 |
252 private: | 279 private: |
253 int counter_; | 280 intptr_t counter_; |
254 int character_; | 281 intptr_t character_; |
| 282 |
| 283 DISALLOW_ALLOCATION(); |
255 }; | 284 }; |
256 | 285 |
257 | 286 |
258 private: | 287 private: |
259 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
260 int total_samples_; | 289 intptr_t total_samples_; |
261 }; | 290 }; |
262 | 291 |
263 | 292 |
264 class RegExpCompiler { | 293 class RegExpCompiler : public ValueObject { |
265 public: | 294 public: |
266 RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte, | 295 RegExpCompiler(intptr_t capture_count, |
267 Zone* zone); | 296 bool ignore_case, |
| 297 intptr_t specialization_cid); |
268 | 298 |
269 int AllocateRegister() { | 299 intptr_t AllocateRegister() { |
270 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { | |
271 reg_exp_too_big_ = true; | |
272 return next_register_; | |
273 } | |
274 return next_register_++; | 300 return next_register_++; |
275 } | 301 } |
276 | 302 |
277 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, | 303 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
278 RegExpNode* start, | 304 RegExpNode* start, |
279 int capture_count, | 305 intptr_t capture_count, |
280 Handle<String> pattern); | 306 const String& pattern); |
281 | 307 |
282 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 308 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
283 | 309 |
284 static const int kImplementationOffset = 0; | 310 static const intptr_t kImplementationOffset = 0; |
285 static const int kNumberOfRegistersOffset = 0; | 311 static const intptr_t kNumberOfRegistersOffset = 0; |
286 static const int kCodeOffset = 1; | 312 static const intptr_t kCodeOffset = 1; |
287 | 313 |
288 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 314 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
289 EndNode* accept() { return accept_; } | 315 EndNode* accept() { return accept_; } |
290 | 316 |
291 static const int kMaxRecursion = 100; | 317 static const intptr_t kMaxRecursion = 100; |
292 inline int recursion_depth() { return recursion_depth_; } | 318 inline intptr_t recursion_depth() { return recursion_depth_; } |
293 inline void IncrementRecursionDepth() { recursion_depth_++; } | 319 inline void IncrementRecursionDepth() { recursion_depth_++; } |
294 inline void DecrementRecursionDepth() { recursion_depth_--; } | 320 inline void DecrementRecursionDepth() { recursion_depth_--; } |
295 | 321 |
296 void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 322 void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
297 | 323 |
298 inline bool ignore_case() { return ignore_case_; } | 324 inline bool ignore_case() { return ignore_case_; } |
299 inline bool one_byte() { return one_byte_; } | 325 inline bool one_byte() const { |
| 326 return (specialization_cid_ == kOneByteStringCid || |
| 327 specialization_cid_ == kExternalOneByteStringCid); |
| 328 } |
| 329 inline intptr_t specialization_cid() { return specialization_cid_; } |
300 FrequencyCollator* frequency_collator() { return &frequency_collator_; } | 330 FrequencyCollator* frequency_collator() { return &frequency_collator_; } |
301 | 331 |
302 int current_expansion_factor() { return current_expansion_factor_; } | 332 intptr_t current_expansion_factor() { return current_expansion_factor_; } |
303 void set_current_expansion_factor(int value) { | 333 void set_current_expansion_factor(intptr_t value) { |
304 current_expansion_factor_ = value; | 334 current_expansion_factor_ = value; |
305 } | 335 } |
306 | 336 |
307 Zone* zone() const { return zone_; } | 337 Isolate* isolate() const { return isolate_; } |
308 | 338 |
309 static const int kNoRegister = -1; | 339 static const intptr_t kNoRegister = -1; |
310 | 340 |
311 private: | 341 private: |
312 EndNode* accept_; | 342 EndNode* accept_; |
313 int next_register_; | 343 intptr_t next_register_; |
314 List<RegExpNode*>* work_list_; | 344 ZoneGrowableArray<RegExpNode*>* work_list_; |
315 int recursion_depth_; | 345 intptr_t recursion_depth_; |
316 RegExpMacroAssembler* macro_assembler_; | 346 IRRegExpMacroAssembler* macro_assembler_; |
317 bool ignore_case_; | 347 bool ignore_case_; |
318 bool one_byte_; | 348 intptr_t specialization_cid_; |
319 bool reg_exp_too_big_; | 349 bool reg_exp_too_big_; |
320 int current_expansion_factor_; | 350 intptr_t current_expansion_factor_; |
321 FrequencyCollator frequency_collator_; | 351 FrequencyCollator frequency_collator_; |
322 Zone* zone_; | 352 Isolate* isolate_; |
323 }; | 353 }; |
324 | 354 |
325 | 355 |
326 class RecursionCheck { | 356 class RecursionCheck : public ValueObject { |
327 public: | 357 public: |
328 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
329 compiler->IncrementRecursionDepth(); | 359 compiler->IncrementRecursionDepth(); |
330 } | 360 } |
331 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
332 private: | 362 private: |
333 RegExpCompiler* compiler_; | 363 RegExpCompiler* compiler_; |
334 }; | 364 }; |
335 | 365 |
336 | 366 |
337 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { | 367 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
338 return RegExpEngine::CompilationResult(isolate, "RegExp too big"); | 368 return RegExpEngine::CompilationResult("RegExp too big"); |
339 } | 369 } |
340 | 370 |
341 | 371 |
342 // Attempts to compile the regexp using an Irregexp code generator. Returns | 372 // Attempts to compile the regexp using an Irregexp code generator. Returns |
343 // a fixed array or a null handle depending on whether it succeeded. | 373 // a fixed array or a null handle depending on whether it succeeded. |
344 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, | 374 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, |
345 bool one_byte, Zone* zone) | 375 intptr_t specialization_cid) |
346 : next_register_(2 * (capture_count + 1)), | 376 : next_register_(2 * (capture_count + 1)), |
347 work_list_(NULL), | 377 work_list_(NULL), |
348 recursion_depth_(0), | 378 recursion_depth_(0), |
349 ignore_case_(ignore_case), | 379 ignore_case_(ignore_case), |
350 one_byte_(one_byte), | 380 specialization_cid_(specialization_cid), |
351 reg_exp_too_big_(false), | 381 reg_exp_too_big_(false), |
352 current_expansion_factor_(1), | 382 current_expansion_factor_(1), |
353 frequency_collator_(), | 383 isolate_(Isolate::Current()) { |
354 zone_(zone) { | 384 accept_ = new(I) EndNode(EndNode::ACCEPT, I); |
355 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); | |
356 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | |
357 } | 385 } |
358 | 386 |
359 | 387 |
360 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 388 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
361 RegExpMacroAssembler* macro_assembler, | 389 IRRegExpMacroAssembler* macro_assembler, |
362 RegExpNode* start, | 390 RegExpNode* start, |
363 int capture_count, | 391 intptr_t capture_count, |
364 Handle<String> pattern) { | 392 const String& pattern) { |
365 Heap* heap = pattern->GetHeap(); | 393 static const bool use_slow_safe_regexp_compiler = false; |
366 | |
367 bool use_slow_safe_regexp_compiler = false; | |
368 if (heap->total_regexp_code_generated() > | |
369 RegExpImpl::kRegWxpCompiledLimit && | |
370 heap->isolate()->memory_allocator()->SizeExecutable() > | |
371 RegExpImpl::kRegExpExecutableMemoryLimit) { | |
372 use_slow_safe_regexp_compiler = true; | |
373 } | |
374 | 394 |
375 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); | 395 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); |
| 396 macro_assembler_ = macro_assembler; |
376 | 397 |
377 #ifdef DEBUG | 398 ZoneGrowableArray<RegExpNode*> work_list(0); |
378 if (FLAG_trace_regexp_assembler) | |
379 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | |
380 else | |
381 #endif | |
382 macro_assembler_ = macro_assembler; | |
383 | |
384 List <RegExpNode*> work_list(0); | |
385 work_list_ = &work_list; | 399 work_list_ = &work_list; |
386 Label fail; | 400 BlockLabel fail; |
387 macro_assembler_->PushBacktrack(&fail); | 401 macro_assembler_->PushBacktrack(&fail); |
388 Trace new_trace; | 402 Trace new_trace; |
389 start->Emit(this, &new_trace); | 403 start->Emit(this, &new_trace); |
390 macro_assembler_->Bind(&fail); | 404 macro_assembler_->BindBlock(&fail); |
391 macro_assembler_->Fail(); | 405 macro_assembler_->Fail(); |
392 while (!work_list.is_empty()) { | 406 while (!work_list.is_empty()) { |
393 work_list.RemoveLast()->Emit(this, &new_trace); | 407 work_list.RemoveLast()->Emit(this, &new_trace); |
394 } | 408 } |
395 if (reg_exp_too_big_) return IrregexpRegExpTooBig(zone_->isolate()); | 409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
396 | 410 |
397 Handle<HeapObject> code = macro_assembler_->GetCode(pattern); | 411 macro_assembler->GenerateBacktrackBlock(); |
398 heap->IncreaseTotalRegexpCodeGenerated(code->Size()); | 412 |
399 work_list_ = NULL; | 413 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), |
400 #ifdef DEBUG | 414 macro_assembler->graph_entry(), |
401 if (FLAG_print_code) { | 415 macro_assembler->num_blocks(), |
402 CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer()); | 416 macro_assembler->num_stack_locals()); |
403 OFStream os(trace_scope.file()); | |
404 Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os); | |
405 } | |
406 if (FLAG_trace_regexp_assembler) { | |
407 delete macro_assembler_; | |
408 } | |
409 #endif | |
410 return RegExpEngine::CompilationResult(*code, next_register_); | |
411 } | 417 } |
412 | 418 |
413 | 419 |
414 bool Trace::DeferredAction::Mentions(int that) { | 420 bool Trace::DeferredAction::Mentions(intptr_t that) { |
415 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 421 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
416 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 422 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
417 return range.Contains(that); | 423 return range.Contains(that); |
418 } else { | 424 } else { |
419 return reg() == that; | 425 return reg() == that; |
420 } | 426 } |
421 } | 427 } |
422 | 428 |
423 | 429 |
424 bool Trace::mentions_reg(int reg) { | 430 bool Trace::mentions_reg(intptr_t reg) { |
425 for (DeferredAction* action = actions_; | 431 for (DeferredAction* action = actions_; |
426 action != NULL; | 432 action != NULL; |
427 action = action->next()) { | 433 action = action->next()) { |
428 if (action->Mentions(reg)) | 434 if (action->Mentions(reg)) |
429 return true; | 435 return true; |
430 } | 436 } |
431 return false; | 437 return false; |
432 } | 438 } |
433 | 439 |
434 | 440 |
435 bool Trace::GetStoredPosition(int reg, int* cp_offset) { | 441 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { |
436 DCHECK_EQ(0, *cp_offset); | 442 ASSERT(*cp_offset == 0); |
437 for (DeferredAction* action = actions_; | 443 for (DeferredAction* action = actions_; |
438 action != NULL; | 444 action != NULL; |
439 action = action->next()) { | 445 action = action->next()) { |
440 if (action->Mentions(reg)) { | 446 if (action->Mentions(reg)) { |
441 if (action->action_type() == ActionNode::STORE_POSITION) { | 447 if (action->action_type() == ActionNode::STORE_POSITION) { |
442 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 448 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
443 return true; | 449 return true; |
444 } else { | 450 } else { |
445 return false; | 451 return false; |
446 } | 452 } |
447 } | 453 } |
448 } | 454 } |
449 return false; | 455 return false; |
450 } | 456 } |
451 | 457 |
452 | 458 |
453 int Trace::FindAffectedRegisters(OutSet* affected_registers, | 459 // This is called as we come into a loop choice node and some other tricky |
454 Zone* zone) { | 460 // nodes. It normalizes the state of the code generator to ensure we can |
455 int max_register = RegExpCompiler::kNoRegister; | 461 // generate generic code. |
| 462 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, |
| 463 Isolate* isolate) { |
| 464 intptr_t max_register = RegExpCompiler::kNoRegister; |
456 for (DeferredAction* action = actions_; | 465 for (DeferredAction* action = actions_; |
457 action != NULL; | 466 action != NULL; |
458 action = action->next()) { | 467 action = action->next()) { |
459 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 468 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
460 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 469 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
461 for (int i = range.from(); i <= range.to(); i++) | 470 for (intptr_t i = range.from(); i <= range.to(); i++) |
462 affected_registers->Set(i, zone); | 471 affected_registers->Set(i, isolate); |
463 if (range.to() > max_register) max_register = range.to(); | 472 if (range.to() > max_register) max_register = range.to(); |
464 } else { | 473 } else { |
465 affected_registers->Set(action->reg(), zone); | 474 affected_registers->Set(action->reg(), isolate); |
466 if (action->reg() > max_register) max_register = action->reg(); | 475 if (action->reg() > max_register) max_register = action->reg(); |
467 } | 476 } |
468 } | 477 } |
469 return max_register; | 478 return max_register; |
470 } | 479 } |
471 | 480 |
472 | 481 |
473 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, | 482 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, |
474 int max_register, | 483 intptr_t max_register, |
475 const OutSet& registers_to_pop, | 484 const OutSet& registers_to_pop, |
476 const OutSet& registers_to_clear) { | 485 const OutSet& registers_to_clear) { |
477 for (int reg = max_register; reg >= 0; reg--) { | 486 for (intptr_t reg = max_register; reg >= 0; reg--) { |
478 if (registers_to_pop.Get(reg)) { | 487 if (registers_to_pop.Get(reg)) { |
479 assembler->PopRegister(reg); | 488 assembler->PopRegister(reg); |
480 } else if (registers_to_clear.Get(reg)) { | 489 } else if (registers_to_clear.Get(reg)) { |
481 int clear_to = reg; | 490 intptr_t clear_to = reg; |
482 while (reg > 0 && registers_to_clear.Get(reg - 1)) { | 491 while (reg > 0 && registers_to_clear.Get(reg - 1)) { |
483 reg--; | 492 reg--; |
484 } | 493 } |
485 assembler->ClearRegisters(reg, clear_to); | 494 assembler->ClearRegisters(reg, clear_to); |
486 } | 495 } |
487 } | 496 } |
488 } | 497 } |
489 | 498 |
490 | 499 |
491 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, | 500 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
492 int max_register, | 501 intptr_t max_register, |
493 const OutSet& affected_registers, | 502 const OutSet& affected_registers, |
494 OutSet* registers_to_pop, | 503 OutSet* registers_to_pop, |
495 OutSet* registers_to_clear, | 504 OutSet* registers_to_clear, |
496 Zone* zone) { | 505 Isolate* isolate) { |
497 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. | 506 for (intptr_t reg = 0; reg <= max_register; reg++) { |
498 const int push_limit = (assembler->stack_limit_slack() + 1) / 2; | |
499 | |
500 // Count pushes performed to force a stack limit check occasionally. | |
501 int pushes = 0; | |
502 | |
503 for (int reg = 0; reg <= max_register; reg++) { | |
504 if (!affected_registers.Get(reg)) { | 507 if (!affected_registers.Get(reg)) { |
505 continue; | 508 continue; |
506 } | 509 } |
507 | 510 |
508 // The chronologically first deferred action in the trace | 511 // The chronologically first deferred action in the trace |
509 // is used to infer the action needed to restore a register | 512 // is used to infer the action needed to restore a register |
510 // to its previous state (or not, if it's safe to ignore it). | 513 // to its previous state (or not, if it's safe to ignore it). |
511 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; | 514 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; |
512 DeferredActionUndoType undo_action = IGNORE; | 515 DeferredActionUndoType undo_action = ACTION_IGNORE; |
513 | 516 |
514 int value = 0; | 517 intptr_t value = 0; |
515 bool absolute = false; | 518 bool absolute = false; |
516 bool clear = false; | 519 bool clear = false; |
517 int store_position = -1; | 520 intptr_t store_position = -1; |
518 // This is a little tricky because we are scanning the actions in reverse | 521 // This is a little tricky because we are scanning the actions in reverse |
519 // historical order (newest first). | 522 // historical order (newest first). |
520 for (DeferredAction* action = actions_; | 523 for (DeferredAction* action = actions_; |
521 action != NULL; | 524 action != NULL; |
522 action = action->next()) { | 525 action = action->next()) { |
523 if (action->Mentions(reg)) { | 526 if (action->Mentions(reg)) { |
524 switch (action->action_type()) { | 527 switch (action->action_type()) { |
525 case ActionNode::SET_REGISTER: { | 528 case ActionNode::SET_REGISTER: { |
526 Trace::DeferredSetRegister* psr = | 529 Trace::DeferredSetRegister* psr = |
527 static_cast<Trace::DeferredSetRegister*>(action); | 530 static_cast<Trace::DeferredSetRegister*>(action); |
528 if (!absolute) { | 531 if (!absolute) { |
529 value += psr->value(); | 532 value += psr->value(); |
530 absolute = true; | 533 absolute = true; |
531 } | 534 } |
532 // SET_REGISTER is currently only used for newly introduced loop | 535 // SET_REGISTER is currently only used for newly introduced loop |
533 // counters. They can have a significant previous value if they | 536 // counters. They can have a significant previous value if they |
534 // occour in a loop. TODO(lrn): Propagate this information, so | 537 // occour in a loop. TODO(lrn): Propagate this information, so we |
535 // we can set undo_action to IGNORE if we know there is no value to | 538 // can set undo_action to ACTION_IGNORE if we know there is no |
536 // restore. | 539 // value to restore. |
537 undo_action = RESTORE; | 540 undo_action = ACTION_RESTORE; |
538 DCHECK_EQ(store_position, -1); | 541 ASSERT(store_position == -1); |
539 DCHECK(!clear); | 542 ASSERT(!clear); |
540 break; | 543 break; |
541 } | 544 } |
542 case ActionNode::INCREMENT_REGISTER: | 545 case ActionNode::INCREMENT_REGISTER: |
543 if (!absolute) { | 546 if (!absolute) { |
544 value++; | 547 value++; |
545 } | 548 } |
546 DCHECK_EQ(store_position, -1); | 549 ASSERT(store_position == -1); |
547 DCHECK(!clear); | 550 ASSERT(!clear); |
548 undo_action = RESTORE; | 551 undo_action = ACTION_RESTORE; |
549 break; | 552 break; |
550 case ActionNode::STORE_POSITION: { | 553 case ActionNode::STORE_POSITION: { |
551 Trace::DeferredCapture* pc = | 554 Trace::DeferredCapture* pc = |
552 static_cast<Trace::DeferredCapture*>(action); | 555 static_cast<Trace::DeferredCapture*>(action); |
553 if (!clear && store_position == -1) { | 556 if (!clear && store_position == -1) { |
554 store_position = pc->cp_offset(); | 557 store_position = pc->cp_offset(); |
555 } | 558 } |
556 | 559 |
557 // For captures we know that stores and clears alternate. | 560 // For captures we know that stores and clears alternate. |
558 // Other register, are never cleared, and if the occur | 561 // Other register, are never cleared, and if the occur |
559 // inside a loop, they might be assigned more than once. | 562 // inside a loop, they might be assigned more than once. |
560 if (reg <= 1) { | 563 if (reg <= 1) { |
561 // Registers zero and one, aka "capture zero", is | 564 // Registers zero and one, aka "capture zero", is |
562 // always set correctly if we succeed. There is no | 565 // always set correctly if we succeed. There is no |
563 // need to undo a setting on backtrack, because we | 566 // need to undo a setting on backtrack, because we |
564 // will set it again or fail. | 567 // will set it again or fail. |
565 undo_action = IGNORE; | 568 undo_action = ACTION_IGNORE; |
566 } else { | 569 } else { |
567 undo_action = pc->is_capture() ? CLEAR : RESTORE; | 570 undo_action = pc->is_capture() ? ACTION_CLEAR : ACTION_RESTORE; |
568 } | 571 } |
569 DCHECK(!absolute); | 572 ASSERT(!absolute); |
570 DCHECK_EQ(value, 0); | 573 ASSERT(value == 0); |
571 break; | 574 break; |
572 } | 575 } |
573 case ActionNode::CLEAR_CAPTURES: { | 576 case ActionNode::CLEAR_CAPTURES: { |
574 // Since we're scanning in reverse order, if we've already | 577 // Since we're scanning in reverse order, if we've already |
575 // set the position we have to ignore historically earlier | 578 // set the position we have to ignore historically earlier |
576 // clearing operations. | 579 // clearing operations. |
577 if (store_position == -1) { | 580 if (store_position == -1) { |
578 clear = true; | 581 clear = true; |
579 } | 582 } |
580 undo_action = RESTORE; | 583 undo_action = ACTION_RESTORE; |
581 DCHECK(!absolute); | 584 ASSERT(!absolute); |
582 DCHECK_EQ(value, 0); | 585 ASSERT(value == 0); |
583 break; | 586 break; |
584 } | 587 } |
585 default: | 588 default: |
586 UNREACHABLE(); | 589 UNREACHABLE(); |
587 break; | 590 break; |
588 } | 591 } |
589 } | 592 } |
590 } | 593 } |
591 // Prepare for the undo-action (e.g., push if it's going to be popped). | 594 // Prepare for the undo-action (e.g., push if it's going to be popped). |
592 if (undo_action == RESTORE) { | 595 if (undo_action == ACTION_RESTORE) { |
593 pushes++; | 596 assembler->PushRegister(reg); |
594 RegExpMacroAssembler::StackCheckFlag stack_check = | 597 registers_to_pop->Set(reg, isolate); |
595 RegExpMacroAssembler::kNoStackLimitCheck; | 598 } else if (undo_action == ACTION_CLEAR) { |
596 if (pushes == push_limit) { | 599 registers_to_clear->Set(reg, isolate); |
597 stack_check = RegExpMacroAssembler::kCheckStackLimit; | |
598 pushes = 0; | |
599 } | |
600 | |
601 assembler->PushRegister(reg, stack_check); | |
602 registers_to_pop->Set(reg, zone); | |
603 } else if (undo_action == CLEAR) { | |
604 registers_to_clear->Set(reg, zone); | |
605 } | 600 } |
606 // Perform the chronologically last action (or accumulated increment) | 601 // Perform the chronologically last action (or accumulated increment) |
607 // for the register. | 602 // for the register. |
608 if (store_position != -1) { | 603 if (store_position != -1) { |
609 assembler->WriteCurrentPositionToRegister(reg, store_position); | 604 assembler->WriteCurrentPositionToRegister(reg, store_position); |
610 } else if (clear) { | 605 } else if (clear) { |
611 assembler->ClearRegisters(reg, reg); | 606 assembler->ClearRegisters(reg, reg); |
612 } else if (absolute) { | 607 } else if (absolute) { |
613 assembler->SetRegister(reg, value); | 608 assembler->SetRegister(reg, value); |
614 } else if (value != 0) { | 609 } else if (value != 0) { |
615 assembler->AdvanceRegister(reg, value); | 610 assembler->AdvanceRegister(reg, value); |
616 } | 611 } |
617 } | 612 } |
618 } | 613 } |
619 | 614 |
620 | 615 |
621 // This is called as we come into a loop choice node and some other tricky | 616 // This is called as we come into a loop choice node and some other tricky |
622 // nodes. It normalizes the state of the code generator to ensure we can | 617 // nodes. It normalizes the state of the code generator to ensure we can |
623 // generate generic code. | 618 // generate generic code. |
624 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 619 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
625 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 620 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
626 | 621 |
627 DCHECK(!is_trivial()); | 622 ASSERT(!is_trivial()); |
628 | 623 |
629 if (actions_ == NULL && backtrack() == NULL) { | 624 if (actions_ == NULL && backtrack() == NULL) { |
630 // Here we just have some deferred cp advances to fix and we are back to | 625 // Here we just have some deferred cp advances to fix and we are back to |
631 // a normal situation. We may also have to forget some information gained | 626 // a normal situation. We may also have to forget some information gained |
632 // through a quick check that was already performed. | 627 // through a quick check that was already performed. |
633 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); | 628 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); |
634 // Create a new trivial state and generate the node with that. | 629 // Create a new trivial state and generate the node with that. |
635 Trace new_state; | 630 Trace new_state; |
636 successor->Emit(compiler, &new_state); | 631 successor->Emit(compiler, &new_state); |
637 return; | 632 return; |
638 } | 633 } |
639 | 634 |
640 // Generate deferred actions here along with code to undo them again. | 635 // Generate deferred actions here along with code to undo them again. |
641 OutSet affected_registers; | 636 OutSet affected_registers; |
642 | 637 |
643 if (backtrack() != NULL) { | 638 if (backtrack() != NULL) { |
644 // Here we have a concrete backtrack location. These are set up by choice | 639 // Here we have a concrete backtrack location. These are set up by choice |
645 // nodes and so they indicate that we have a deferred save of the current | 640 // nodes and so they indicate that we have a deferred save of the current |
646 // position which we may need to emit here. | 641 // position which we may need to emit here. |
647 assembler->PushCurrentPosition(); | 642 assembler->PushCurrentPosition(); |
648 } | 643 } |
649 | 644 |
650 int max_register = FindAffectedRegisters(&affected_registers, | 645 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI); |
651 compiler->zone()); | |
652 OutSet registers_to_pop; | 646 OutSet registers_to_pop; |
653 OutSet registers_to_clear; | 647 OutSet registers_to_clear; |
654 PerformDeferredActions(assembler, | 648 PerformDeferredActions(assembler, |
655 max_register, | 649 max_register, |
656 affected_registers, | 650 affected_registers, |
657 ®isters_to_pop, | 651 ®isters_to_pop, |
658 ®isters_to_clear, | 652 ®isters_to_clear, |
659 compiler->zone()); | 653 CI); |
660 if (cp_offset_ != 0) { | 654 if (cp_offset_ != 0) { |
661 assembler->AdvanceCurrentPosition(cp_offset_); | 655 assembler->AdvanceCurrentPosition(cp_offset_); |
662 } | 656 } |
663 | 657 |
664 // Create a new trivial state and generate the node with that. | 658 // Create a new trivial state and generate the node with that. |
665 Label undo; | 659 BlockLabel undo; |
666 assembler->PushBacktrack(&undo); | 660 assembler->PushBacktrack(&undo); |
667 Trace new_state; | 661 Trace new_state; |
668 successor->Emit(compiler, &new_state); | 662 successor->Emit(compiler, &new_state); |
669 | 663 |
670 // On backtrack we need to restore state. | 664 // On backtrack we need to restore state. |
671 assembler->Bind(&undo); | 665 assembler->BindBlock(&undo); |
672 RestoreAffectedRegisters(assembler, | 666 RestoreAffectedRegisters(assembler, |
673 max_register, | 667 max_register, |
674 registers_to_pop, | 668 registers_to_pop, |
675 registers_to_clear); | 669 registers_to_clear); |
676 if (backtrack() == NULL) { | 670 if (backtrack() == NULL) { |
677 assembler->Backtrack(); | 671 assembler->Backtrack(); |
678 } else { | 672 } else { |
679 assembler->PopCurrentPosition(); | 673 assembler->PopCurrentPosition(); |
680 assembler->GoTo(backtrack()); | 674 assembler->GoTo(backtrack()); |
681 } | 675 } |
682 } | 676 } |
683 | 677 |
684 | 678 |
685 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { | 679 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { |
686 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 680 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
687 | 681 |
688 // Omit flushing the trace. We discard the entire stack frame anyway. | 682 // Omit flushing the trace. We discard the entire stack frame anyway. |
689 | 683 |
690 if (!label()->is_bound()) { | 684 if (!label()->IsBound()) { |
691 // We are completely independent of the trace, since we ignore it, | 685 // We are completely independent of the trace, since we ignore it, |
692 // so this code can be used as the generic version. | 686 // so this code can be used as the generic version. |
693 assembler->Bind(label()); | 687 assembler->BindBlock(label()); |
694 } | 688 } |
695 | 689 |
696 // Throw away everything on the backtrack stack since the start | 690 // Throw away everything on the backtrack stack since the start |
697 // of the negative submatch and restore the character position. | 691 // of the negative submatch and restore the character position. |
698 assembler->ReadCurrentPositionFromRegister(current_position_register_); | 692 assembler->ReadCurrentPositionFromRegister(current_position_register_); |
699 assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 693 assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
700 if (clear_capture_count_ > 0) { | 694 if (clear_capture_count_ > 0) { |
701 // Clear any captures that might have been performed during the success | 695 // Clear any captures that might have been performed during the success |
702 // of the body of the negative look-ahead. | 696 // of the body of the negative look-ahead. |
703 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; | 697 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; |
704 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); | 698 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); |
705 } | 699 } |
706 // Now that we have unwound the stack we find at the top of the stack the | 700 // Now that we have unwound the stack we find at the top of the stack the |
707 // backtrack that the BeginSubmatch node got. | 701 // backtrack that the BeginSubmatch node got. |
708 assembler->Backtrack(); | 702 assembler->Backtrack(); |
709 } | 703 } |
710 | 704 |
711 | 705 |
712 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 706 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
713 if (!trace->is_trivial()) { | 707 if (!trace->is_trivial()) { |
714 trace->Flush(compiler, this); | 708 trace->Flush(compiler, this); |
715 return; | 709 return; |
716 } | 710 } |
717 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 711 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
718 if (!label()->is_bound()) { | 712 if (!label()->IsBound()) { |
719 assembler->Bind(label()); | 713 assembler->BindBlock(label()); |
720 } | 714 } |
721 switch (action_) { | 715 switch (action_) { |
722 case ACCEPT: | 716 case ACCEPT: |
723 assembler->Succeed(); | 717 assembler->Succeed(); |
724 return; | 718 return; |
725 case BACKTRACK: | 719 case BACKTRACK: |
726 assembler->GoTo(trace->backtrack()); | 720 assembler->GoTo(trace->backtrack()); |
727 return; | 721 return; |
728 case NEGATIVE_SUBMATCH_SUCCESS: | 722 case NEGATIVE_SUBMATCH_SUCCESS: |
729 // This case is handled in a different virtual method. | 723 // This case is handled in a different virtual method. |
730 UNREACHABLE(); | 724 UNREACHABLE(); |
731 } | 725 } |
732 UNIMPLEMENTED(); | 726 UNIMPLEMENTED(); |
733 } | 727 } |
734 | 728 |
735 | 729 |
736 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { | 730 void GuardedAlternative::AddGuard(Guard* guard, Isolate* isolate) { |
737 if (guards_ == NULL) | 731 if (guards_ == NULL) |
738 guards_ = new(zone) ZoneList<Guard*>(1, zone); | 732 guards_ = new(isolate) ZoneGrowableArray<Guard*>(1); |
739 guards_->Add(guard, zone); | 733 guards_->Add(guard); |
740 } | 734 } |
741 | 735 |
742 | 736 |
743 ActionNode* ActionNode::SetRegister(int reg, | 737 ActionNode* ActionNode::SetRegister(intptr_t reg, |
744 int val, | 738 intptr_t val, |
745 RegExpNode* on_success) { | 739 RegExpNode* on_success) { |
746 ActionNode* result = | 740 ActionNode* result = |
747 new(on_success->zone()) ActionNode(SET_REGISTER, on_success); | 741 new(on_success->isolate()) ActionNode(SET_REGISTER, on_success); |
748 result->data_.u_store_register.reg = reg; | 742 result->data_.u_store_register.reg = reg; |
749 result->data_.u_store_register.value = val; | 743 result->data_.u_store_register.value = val; |
750 return result; | 744 return result; |
751 } | 745 } |
752 | 746 |
753 | 747 |
754 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { | 748 ActionNode* ActionNode::IncrementRegister(intptr_t reg, |
| 749 RegExpNode* on_success) { |
755 ActionNode* result = | 750 ActionNode* result = |
756 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); | 751 new(on_success->isolate()) ActionNode(INCREMENT_REGISTER, on_success); |
757 result->data_.u_increment_register.reg = reg; | 752 result->data_.u_increment_register.reg = reg; |
758 return result; | 753 return result; |
759 } | 754 } |
760 | 755 |
761 | 756 |
762 ActionNode* ActionNode::StorePosition(int reg, | 757 ActionNode* ActionNode::StorePosition(intptr_t reg, |
763 bool is_capture, | 758 bool is_capture, |
764 RegExpNode* on_success) { | 759 RegExpNode* on_success) { |
765 ActionNode* result = | 760 ActionNode* result = |
766 new(on_success->zone()) ActionNode(STORE_POSITION, on_success); | 761 new(on_success->isolate()) ActionNode(STORE_POSITION, on_success); |
767 result->data_.u_position_register.reg = reg; | 762 result->data_.u_position_register.reg = reg; |
768 result->data_.u_position_register.is_capture = is_capture; | 763 result->data_.u_position_register.is_capture = is_capture; |
769 return result; | 764 return result; |
770 } | 765 } |
771 | 766 |
772 | 767 |
773 ActionNode* ActionNode::ClearCaptures(Interval range, | 768 ActionNode* ActionNode::ClearCaptures(Interval range, |
774 RegExpNode* on_success) { | 769 RegExpNode* on_success) { |
775 ActionNode* result = | 770 ActionNode* result = |
776 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); | 771 new(on_success->isolate()) ActionNode(CLEAR_CAPTURES, on_success); |
777 result->data_.u_clear_captures.range_from = range.from(); | 772 result->data_.u_clear_captures.range_from = range.from(); |
778 result->data_.u_clear_captures.range_to = range.to(); | 773 result->data_.u_clear_captures.range_to = range.to(); |
779 return result; | 774 return result; |
780 } | 775 } |
781 | 776 |
782 | 777 |
783 ActionNode* ActionNode::BeginSubmatch(int stack_reg, | 778 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, |
784 int position_reg, | 779 intptr_t position_reg, |
785 RegExpNode* on_success) { | 780 RegExpNode* on_success) { |
786 ActionNode* result = | 781 ActionNode* result = |
787 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); | 782 new(on_success->isolate()) ActionNode(BEGIN_SUBMATCH, on_success); |
788 result->data_.u_submatch.stack_pointer_register = stack_reg; | 783 result->data_.u_submatch.stack_pointer_register = stack_reg; |
789 result->data_.u_submatch.current_position_register = position_reg; | 784 result->data_.u_submatch.current_position_register = position_reg; |
790 return result; | 785 return result; |
791 } | 786 } |
792 | 787 |
793 | 788 |
794 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, | 789 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, |
795 int position_reg, | 790 intptr_t position_reg, |
796 int clear_register_count, | 791 intptr_t clear_register_count, |
797 int clear_register_from, | 792 intptr_t clear_register_from, |
798 RegExpNode* on_success) { | 793 RegExpNode* on_success) { |
799 ActionNode* result = | 794 ActionNode* result = |
800 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); | 795 new(on_success->isolate()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, |
| 796 on_success); |
801 result->data_.u_submatch.stack_pointer_register = stack_reg; | 797 result->data_.u_submatch.stack_pointer_register = stack_reg; |
802 result->data_.u_submatch.current_position_register = position_reg; | 798 result->data_.u_submatch.current_position_register = position_reg; |
803 result->data_.u_submatch.clear_register_count = clear_register_count; | 799 result->data_.u_submatch.clear_register_count = clear_register_count; |
804 result->data_.u_submatch.clear_register_from = clear_register_from; | 800 result->data_.u_submatch.clear_register_from = clear_register_from; |
805 return result; | 801 return result; |
806 } | 802 } |
807 | 803 |
808 | 804 |
809 ActionNode* ActionNode::EmptyMatchCheck(int start_register, | 805 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, |
810 int repetition_register, | 806 intptr_t repetition_register, |
811 int repetition_limit, | 807 intptr_t repetition_limit, |
812 RegExpNode* on_success) { | 808 RegExpNode* on_success) { |
813 ActionNode* result = | 809 ActionNode* result = |
814 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 810 new(on_success->isolate()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
815 result->data_.u_empty_match_check.start_register = start_register; | 811 result->data_.u_empty_match_check.start_register = start_register; |
816 result->data_.u_empty_match_check.repetition_register = repetition_register; | 812 result->data_.u_empty_match_check.repetition_register = repetition_register; |
817 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 813 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
818 return result; | 814 return result; |
819 } | 815 } |
820 | 816 |
821 | 817 |
822 #define DEFINE_ACCEPT(Type) \ | 818 #define DEFINE_ACCEPT(Type) \ |
823 void Type##Node::Accept(NodeVisitor* visitor) { \ | 819 void Type##Node::Accept(NodeVisitor* visitor) { \ |
824 visitor->Visit##Type(this); \ | 820 visitor->Visit##Type(this); \ |
825 } | 821 } |
826 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 822 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
827 #undef DEFINE_ACCEPT | 823 #undef DEFINE_ACCEPT |
828 | 824 |
829 | 825 |
830 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 826 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
831 visitor->VisitLoopChoice(this); | 827 visitor->VisitLoopChoice(this); |
832 } | 828 } |
833 | 829 |
834 | 830 |
835 // ------------------------------------------------------------------- | 831 // ------------------------------------------------------------------- |
836 // Emit code. | 832 // Emit code. |
837 | 833 |
838 | 834 |
839 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 835 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
840 Guard* guard, | 836 Guard* guard, |
841 Trace* trace) { | 837 Trace* trace) { |
842 switch (guard->op()) { | 838 switch (guard->op()) { |
843 case Guard::LT: | 839 case Guard::LT: |
844 DCHECK(!trace->mentions_reg(guard->reg())); | 840 ASSERT(!trace->mentions_reg(guard->reg())); |
845 macro_assembler->IfRegisterGE(guard->reg(), | 841 macro_assembler->IfRegisterGE(guard->reg(), |
846 guard->value(), | 842 guard->value(), |
847 trace->backtrack()); | 843 trace->backtrack()); |
848 break; | 844 break; |
849 case Guard::GEQ: | 845 case Guard::GEQ: |
850 DCHECK(!trace->mentions_reg(guard->reg())); | 846 ASSERT(!trace->mentions_reg(guard->reg())); |
851 macro_assembler->IfRegisterLT(guard->reg(), | 847 macro_assembler->IfRegisterLT(guard->reg(), |
852 guard->value(), | 848 guard->value(), |
853 trace->backtrack()); | 849 trace->backtrack()); |
854 break; | 850 break; |
855 } | 851 } |
856 } | 852 } |
857 | 853 |
858 | 854 |
859 // Returns the number of characters in the equivalence class, omitting those | 855 // Returns the number of characters in the equivalence class, omitting those |
860 // that cannot occur in the source string because it is ASCII. | 856 // that cannot occur in the source string because it is ASCII. |
861 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, | 857 static intptr_t GetCaseIndependentLetters(uint16_t character, |
862 bool one_byte_subject, | 858 bool one_byte_subject, |
863 unibrow::uchar* letters) { | 859 int32_t* letters) { |
864 int length = | 860 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
865 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); | 861 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); |
866 // Unibrow returns 0 or 1 for characters where case independence is | 862 // Unibrow returns 0 or 1 for characters where case independence is |
867 // trivial. | 863 // trivial. |
868 if (length == 0) { | 864 if (length == 0) { |
869 letters[0] = character; | 865 letters[0] = character; |
870 length = 1; | 866 length = 1; |
871 } | 867 } |
872 if (!one_byte_subject || character <= String::kMaxOneByteCharCode) { | 868 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { |
873 return length; | 869 return length; |
874 } | 870 } |
875 | 871 |
876 // The standard requires that non-ASCII characters cannot have ASCII | 872 // The standard requires that non-ASCII characters cannot have ASCII |
877 // character codes in their equivalence class. | 873 // character codes in their equivalence class. |
878 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, | 874 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, |
879 // is it? For example, \u00C5 is equivalent to \u212B. | 875 // is it? For example, \u00C5 is equivalent to \u212B. |
880 return 0; | 876 return 0; |
881 } | 877 } |
882 | 878 |
883 | 879 |
884 static inline bool EmitSimpleCharacter(Isolate* isolate, | 880 static inline bool EmitSimpleCharacter(Isolate* isolate, |
885 RegExpCompiler* compiler, | 881 RegExpCompiler* compiler, |
886 uc16 c, | 882 uint16_t c, |
887 Label* on_failure, | 883 BlockLabel* on_failure, |
888 int cp_offset, | 884 intptr_t cp_offset, |
889 bool check, | 885 bool check, |
890 bool preloaded) { | 886 bool preloaded) { |
891 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 887 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
892 bool bound_checked = false; | 888 bool bound_checked = false; |
893 if (!preloaded) { | 889 if (!preloaded) { |
894 assembler->LoadCurrentCharacter( | 890 assembler->LoadCurrentCharacter( |
895 cp_offset, | 891 cp_offset, |
896 on_failure, | 892 on_failure, |
897 check); | 893 check); |
898 bound_checked = true; | 894 bound_checked = true; |
899 } | 895 } |
900 assembler->CheckNotCharacter(c, on_failure); | 896 assembler->CheckNotCharacter(c, on_failure); |
901 return bound_checked; | 897 return bound_checked; |
902 } | 898 } |
903 | 899 |
904 | 900 |
905 // Only emits non-letters (things that don't have case). Only used for case | 901 // Only emits non-letters (things that don't have case). Only used for case |
906 // independent matches. | 902 // independent matches. |
907 static inline bool EmitAtomNonLetter(Isolate* isolate, | 903 static inline bool EmitAtomNonLetter(Isolate* isolate, |
908 RegExpCompiler* compiler, | 904 RegExpCompiler* compiler, |
909 uc16 c, | 905 uint16_t c, |
910 Label* on_failure, | 906 BlockLabel* on_failure, |
911 int cp_offset, | 907 intptr_t cp_offset, |
912 bool check, | 908 bool check, |
913 bool preloaded) { | 909 bool preloaded) { |
914 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 910 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
915 bool one_byte = compiler->one_byte(); | 911 bool one_byte = compiler->one_byte(); |
916 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 912 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
917 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); | 913 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); |
918 if (length < 1) { | 914 if (length < 1) { |
919 // This can't match. Must be an one-byte subject and a non-one-byte | 915 // This can't match. Must be an one-byte subject and a non-one-byte |
920 // character. We do not need to do anything since the one-byte pass | 916 // character. We do not need to do anything since the one-byte pass |
921 // already handled this. | 917 // already handled this. |
922 return false; // Bounds not checked. | 918 return false; // Bounds not checked. |
923 } | 919 } |
924 bool checked = false; | 920 bool checked = false; |
925 // We handle the length > 1 case in a later pass. | 921 // We handle the length > 1 case in a later pass. |
926 if (length == 1) { | 922 if (length == 1) { |
927 if (one_byte && c > String::kMaxOneByteCharCodeU) { | 923 if (one_byte && c > Symbols::kMaxOneCharCodeSymbol) { |
928 // Can't match - see above. | 924 // Can't match - see above. |
929 return false; // Bounds not checked. | 925 return false; // Bounds not checked. |
930 } | 926 } |
931 if (!preloaded) { | 927 if (!preloaded) { |
932 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 928 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
933 checked = check; | 929 checked = check; |
934 } | 930 } |
935 macro_assembler->CheckNotCharacter(c, on_failure); | 931 macro_assembler->CheckNotCharacter(c, on_failure); |
936 } | 932 } |
937 return checked; | 933 return checked; |
938 } | 934 } |
939 | 935 |
940 | 936 |
941 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 937 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
942 bool one_byte, uc16 c1, uc16 c2, | 938 bool one_byte, |
943 Label* on_failure) { | 939 uint16_t c1, |
944 uc16 char_mask; | 940 uint16_t c2, |
| 941 BlockLabel* on_failure) { |
| 942 uint16_t char_mask; |
945 if (one_byte) { | 943 if (one_byte) { |
946 char_mask = String::kMaxOneByteCharCode; | 944 char_mask = Symbols::kMaxOneCharCodeSymbol; |
947 } else { | 945 } else { |
948 char_mask = String::kMaxUtf16CodeUnit; | 946 char_mask = Utf16::kMaxCodeUnit; |
949 } | 947 } |
950 uc16 exor = c1 ^ c2; | 948 uint16_t exor = c1 ^ c2; |
951 // Check whether exor has only one bit set. | 949 // Check whether exor has only one bit set. |
952 if (((exor - 1) & exor) == 0) { | 950 if (((exor - 1) & exor) == 0) { |
953 // If c1 and c2 differ only by one bit. | 951 // If c1 and c2 differ only by one bit. |
954 // Ecma262UnCanonicalize always gives the highest number last. | 952 // Ecma262UnCanonicalize always gives the highest number last. |
955 DCHECK(c2 > c1); | 953 ASSERT(c2 > c1); |
956 uc16 mask = char_mask ^ exor; | 954 uint16_t mask = char_mask ^ exor; |
957 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); | 955 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); |
958 return true; | 956 return true; |
959 } | 957 } |
960 DCHECK(c2 > c1); | 958 ASSERT(c2 > c1); |
961 uc16 diff = c2 - c1; | 959 uint16_t diff = c2 - c1; |
962 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 960 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
963 // If the characters differ by 2^n but don't differ by one bit then | 961 // If the characters differ by 2^n but don't differ by one bit then |
964 // subtract the difference from the found character, then do the or | 962 // subtract the difference from the found character, then do the or |
965 // trick. We avoid the theoretical case where negative numbers are | 963 // trick. We avoid the theoretical case where negative numbers are |
966 // involved in order to simplify code generation. | 964 // involved in order to simplify code generation. |
967 uc16 mask = char_mask ^ diff; | 965 uint16_t mask = char_mask ^ diff; |
968 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 966 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
969 diff, | 967 diff, |
970 mask, | 968 mask, |
971 on_failure); | 969 on_failure); |
972 return true; | 970 return true; |
973 } | 971 } |
974 return false; | 972 return false; |
975 } | 973 } |
976 | 974 |
977 | 975 |
978 typedef bool EmitCharacterFunction(Isolate* isolate, | 976 typedef bool EmitCharacterFunction(Isolate* isolate, |
979 RegExpCompiler* compiler, | 977 RegExpCompiler* compiler, |
980 uc16 c, | 978 uint16_t c, |
981 Label* on_failure, | 979 BlockLabel* on_failure, |
982 int cp_offset, | 980 intptr_t cp_offset, |
983 bool check, | 981 bool check, |
984 bool preloaded); | 982 bool preloaded); |
985 | 983 |
986 // Only emits letters (things that have case). Only used for case independent | 984 // Only emits letters (things that have case). Only used for case independent |
987 // matches. | 985 // matches. |
988 static inline bool EmitAtomLetter(Isolate* isolate, | 986 static inline bool EmitAtomLetter(Isolate* isolate, |
989 RegExpCompiler* compiler, | 987 RegExpCompiler* compiler, |
990 uc16 c, | 988 uint16_t c, |
991 Label* on_failure, | 989 BlockLabel* on_failure, |
992 int cp_offset, | 990 intptr_t cp_offset, |
993 bool check, | 991 bool check, |
994 bool preloaded) { | 992 bool preloaded) { |
995 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 993 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
996 bool one_byte = compiler->one_byte(); | 994 bool one_byte = compiler->one_byte(); |
997 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 995 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
998 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); | 996 intptr_t length = GetCaseIndependentLetters(c, one_byte, chars); |
999 if (length <= 1) return false; | 997 if (length <= 1) return false; |
1000 // We may not need to check against the end of the input string | 998 // We may not need to check against the end of the input string |
1001 // if this character lies before a character that matched. | 999 // if this character lies before a character that matched. |
1002 if (!preloaded) { | 1000 if (!preloaded) { |
1003 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1001 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1004 } | 1002 } |
1005 Label ok; | 1003 BlockLabel ok; |
1006 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1004 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1007 switch (length) { | 1005 switch (length) { |
1008 case 2: { | 1006 case 2: { |
1009 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], | 1007 if (ShortCutEmitCharacterPair(macro_assembler, |
1010 chars[1], on_failure)) { | 1008 one_byte, |
| 1009 chars[0], |
| 1010 chars[1], |
| 1011 on_failure)) { |
1011 } else { | 1012 } else { |
1012 macro_assembler->CheckCharacter(chars[0], &ok); | 1013 macro_assembler->CheckCharacter(chars[0], &ok); |
1013 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1014 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
1014 macro_assembler->Bind(&ok); | 1015 macro_assembler->BindBlock(&ok); |
1015 } | 1016 } |
1016 break; | 1017 break; |
1017 } | 1018 } |
1018 case 4: | 1019 case 4: |
1019 macro_assembler->CheckCharacter(chars[3], &ok); | 1020 macro_assembler->CheckCharacter(chars[3], &ok); |
1020 // Fall through! | 1021 // Fall through! |
1021 case 3: | 1022 case 3: |
1022 macro_assembler->CheckCharacter(chars[0], &ok); | 1023 macro_assembler->CheckCharacter(chars[0], &ok); |
1023 macro_assembler->CheckCharacter(chars[1], &ok); | 1024 macro_assembler->CheckCharacter(chars[1], &ok); |
1024 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1025 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1025 macro_assembler->Bind(&ok); | 1026 macro_assembler->BindBlock(&ok); |
1026 break; | 1027 break; |
1027 default: | 1028 default: |
1028 UNREACHABLE(); | 1029 UNREACHABLE(); |
1029 break; | 1030 break; |
1030 } | 1031 } |
1031 return true; | 1032 return true; |
1032 } | 1033 } |
1033 | 1034 |
1034 | 1035 |
1035 static void EmitBoundaryTest(RegExpMacroAssembler* masm, | 1036 static void EmitBoundaryTest(RegExpMacroAssembler* masm, |
1036 int border, | 1037 intptr_t border, |
1037 Label* fall_through, | 1038 BlockLabel* fall_through, |
1038 Label* above_or_equal, | 1039 BlockLabel* above_or_equal, |
1039 Label* below) { | 1040 BlockLabel* below) { |
1040 if (below != fall_through) { | 1041 if (below != fall_through) { |
1041 masm->CheckCharacterLT(border, below); | 1042 masm->CheckCharacterLT(border, below); |
1042 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); | 1043 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); |
1043 } else { | 1044 } else { |
1044 masm->CheckCharacterGT(border - 1, above_or_equal); | 1045 masm->CheckCharacterGT(border - 1, above_or_equal); |
1045 } | 1046 } |
1046 } | 1047 } |
1047 | 1048 |
1048 | 1049 |
1049 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, | 1050 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, |
1050 int first, | 1051 intptr_t first, |
1051 int last, | 1052 intptr_t last, |
1052 Label* fall_through, | 1053 BlockLabel* fall_through, |
1053 Label* in_range, | 1054 BlockLabel* in_range, |
1054 Label* out_of_range) { | 1055 BlockLabel* out_of_range) { |
1055 if (in_range == fall_through) { | 1056 if (in_range == fall_through) { |
1056 if (first == last) { | 1057 if (first == last) { |
1057 masm->CheckNotCharacter(first, out_of_range); | 1058 masm->CheckNotCharacter(first, out_of_range); |
1058 } else { | 1059 } else { |
1059 masm->CheckCharacterNotInRange(first, last, out_of_range); | 1060 masm->CheckCharacterNotInRange(first, last, out_of_range); |
1060 } | 1061 } |
1061 } else { | 1062 } else { |
1062 if (first == last) { | 1063 if (first == last) { |
1063 masm->CheckCharacter(first, in_range); | 1064 masm->CheckCharacter(first, in_range); |
1064 } else { | 1065 } else { |
1065 masm->CheckCharacterInRange(first, last, in_range); | 1066 masm->CheckCharacterInRange(first, last, in_range); |
1066 } | 1067 } |
1067 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1068 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
1068 } | 1069 } |
1069 } | 1070 } |
1070 | 1071 |
1071 | 1072 |
1072 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
1073 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
1074 static void EmitUseLookupTable( | 1075 static void EmitUseLookupTable( |
1075 RegExpMacroAssembler* masm, | 1076 RegExpMacroAssembler* masm, |
1076 ZoneList<int>* ranges, | 1077 ZoneGrowableArray<int>* ranges, |
1077 int start_index, | 1078 intptr_t start_index, |
1078 int end_index, | 1079 intptr_t end_index, |
1079 int min_char, | 1080 intptr_t min_char, |
1080 Label* fall_through, | 1081 BlockLabel* fall_through, |
1081 Label* even_label, | 1082 BlockLabel* even_label, |
1082 Label* odd_label) { | 1083 BlockLabel* odd_label) { |
1083 static const int kSize = RegExpMacroAssembler::kTableSize; | 1084 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
1084 static const int kMask = RegExpMacroAssembler::kTableMask; | 1085 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
1085 | 1086 |
1086 int base = (min_char & ~kMask); | 1087 intptr_t base = (min_char & ~kMask); |
1087 USE(base); | |
1088 | 1088 |
1089 // Assert that everything is on one kTableSize page. | 1089 // Assert that everything is on one kTableSize page. |
1090 for (int i = start_index; i <= end_index; i++) { | 1090 for (intptr_t i = start_index; i <= end_index; i++) { |
1091 DCHECK_EQ(ranges->at(i) & ~kMask, base); | 1091 ASSERT((ranges->At(i) & ~kMask) == base); |
1092 } | 1092 } |
1093 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); | 1093 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); |
1094 | 1094 |
1095 char templ[kSize]; | 1095 char templ[kSize]; |
1096 Label* on_bit_set; | 1096 BlockLabel* on_bit_set; |
1097 Label* on_bit_clear; | 1097 BlockLabel* on_bit_clear; |
1098 int bit; | 1098 intptr_t bit; |
1099 if (even_label == fall_through) { | 1099 if (even_label == fall_through) { |
1100 on_bit_set = odd_label; | 1100 on_bit_set = odd_label; |
1101 on_bit_clear = even_label; | 1101 on_bit_clear = even_label; |
1102 bit = 1; | 1102 bit = 1; |
1103 } else { | 1103 } else { |
1104 on_bit_set = even_label; | 1104 on_bit_set = even_label; |
1105 on_bit_clear = odd_label; | 1105 on_bit_clear = odd_label; |
1106 bit = 0; | 1106 bit = 0; |
1107 } | 1107 } |
1108 for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) { | 1108 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize; |
| 1109 i++) { |
1109 templ[i] = bit; | 1110 templ[i] = bit; |
1110 } | 1111 } |
1111 int j = 0; | 1112 intptr_t j = 0; |
1112 bit ^= 1; | 1113 bit ^= 1; |
1113 for (int i = start_index; i < end_index; i++) { | 1114 for (intptr_t i = start_index; i < end_index; i++) { |
1114 for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { | 1115 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { |
1115 templ[j] = bit; | 1116 templ[j] = bit; |
1116 } | 1117 } |
1117 bit ^= 1; | 1118 bit ^= 1; |
1118 } | 1119 } |
1119 for (int i = j; i < kSize; i++) { | 1120 for (intptr_t i = j; i < kSize; i++) { |
1120 templ[i] = bit; | 1121 templ[i] = bit; |
1121 } | 1122 } |
1122 Factory* factory = masm->zone()->isolate()->factory(); | |
1123 // TODO(erikcorry): Cache these. | 1123 // TODO(erikcorry): Cache these. |
1124 Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED); | 1124 const TypedData& ba = TypedData::ZoneHandle( |
1125 for (int i = 0; i < kSize; i++) { | 1125 masm->isolate(), |
1126 ba->set(i, templ[i]); | 1126 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
| 1127 for (intptr_t i = 0; i < kSize; i++) { |
| 1128 ba.SetUint8(i, templ[i]); |
1127 } | 1129 } |
1128 masm->CheckBitInTable(ba, on_bit_set); | 1130 masm->CheckBitInTable(ba, on_bit_set); |
1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
1130 } | 1132 } |
1131 | 1133 |
1132 | 1134 |
1133 static void CutOutRange(RegExpMacroAssembler* masm, | 1135 static void CutOutRange(RegExpMacroAssembler* masm, |
1134 ZoneList<int>* ranges, | 1136 ZoneGrowableArray<int>* ranges, |
1135 int start_index, | 1137 intptr_t start_index, |
1136 int end_index, | 1138 intptr_t end_index, |
1137 int cut_index, | 1139 intptr_t cut_index, |
1138 Label* even_label, | 1140 BlockLabel* even_label, |
1139 Label* odd_label) { | 1141 BlockLabel* odd_label) { |
1140 bool odd = (((cut_index - start_index) & 1) == 1); | 1142 bool odd = (((cut_index - start_index) & 1) == 1); |
1141 Label* in_range_label = odd ? odd_label : even_label; | 1143 BlockLabel* in_range_label = odd ? odd_label : even_label; |
1142 Label dummy; | 1144 BlockLabel dummy; |
1143 EmitDoubleBoundaryTest(masm, | 1145 EmitDoubleBoundaryTest(masm, |
1144 ranges->at(cut_index), | 1146 ranges->At(cut_index), |
1145 ranges->at(cut_index + 1) - 1, | 1147 ranges->At(cut_index + 1) - 1, |
1146 &dummy, | 1148 &dummy, |
1147 in_range_label, | 1149 in_range_label, |
1148 &dummy); | 1150 &dummy); |
1149 DCHECK(!dummy.is_linked()); | 1151 ASSERT(!dummy.IsLinked()); |
1150 // Cut out the single range by rewriting the array. This creates a new | 1152 // Cut out the single range by rewriting the array. This creates a new |
1151 // range that is a merger of the two ranges on either side of the one we | 1153 // range that is a merger of the two ranges on either side of the one we |
1152 // are cutting out. The oddity of the labels is preserved. | 1154 // are cutting out. The oddity of the labels is preserved. |
1153 for (int j = cut_index; j > start_index; j--) { | 1155 for (intptr_t j = cut_index; j > start_index; j--) { |
1154 ranges->at(j) = ranges->at(j - 1); | 1156 (*ranges)[j] = ranges->At(j - 1); |
1155 } | 1157 } |
1156 for (int j = cut_index + 1; j < end_index; j++) { | 1158 for (intptr_t j = cut_index + 1; j < end_index; j++) { |
1157 ranges->at(j) = ranges->at(j + 1); | 1159 (*ranges)[j] = ranges->At(j + 1); |
1158 } | 1160 } |
1159 } | 1161 } |
1160 | 1162 |
1161 | 1163 |
1162 // Unicode case. Split the search space into kSize spaces that are handled | 1164 // Unicode case. Split the search space into kSize spaces that are handled |
1163 // with recursion. | 1165 // with recursion. |
1164 static void SplitSearchSpace(ZoneList<int>* ranges, | 1166 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, |
1165 int start_index, | 1167 intptr_t start_index, |
1166 int end_index, | 1168 intptr_t end_index, |
1167 int* new_start_index, | 1169 intptr_t* new_start_index, |
1168 int* new_end_index, | 1170 intptr_t* new_end_index, |
1169 int* border) { | 1171 intptr_t* border) { |
1170 static const int kSize = RegExpMacroAssembler::kTableSize; | 1172 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
1171 static const int kMask = RegExpMacroAssembler::kTableMask; | 1173 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
1172 | 1174 |
1173 int first = ranges->at(start_index); | 1175 intptr_t first = ranges->At(start_index); |
1174 int last = ranges->at(end_index) - 1; | 1176 intptr_t last = ranges->At(end_index) - 1; |
1175 | 1177 |
1176 *new_start_index = start_index; | 1178 *new_start_index = start_index; |
1177 *border = (ranges->at(start_index) & ~kMask) + kSize; | 1179 *border = (ranges->At(start_index) & ~kMask) + kSize; |
1178 while (*new_start_index < end_index) { | 1180 while (*new_start_index < end_index) { |
1179 if (ranges->at(*new_start_index) > *border) break; | 1181 if (ranges->At(*new_start_index) > *border) break; |
1180 (*new_start_index)++; | 1182 (*new_start_index)++; |
1181 } | 1183 } |
1182 // new_start_index is the index of the first edge that is beyond the | 1184 // new_start_index is the index of the first edge that is beyond the |
1183 // current kSize space. | 1185 // current kSize space. |
1184 | 1186 |
1185 // For very large search spaces we do a binary chop search of the non-Latin1 | 1187 // For very large search spaces we do a binary chop search of the non-Latin1 |
1186 // space instead of just going to the end of the current kSize space. The | 1188 // space instead of just going to the end of the current kSize space. The |
1187 // heuristics are complicated a little by the fact that any 128-character | 1189 // heuristics are complicated a little by the fact that any 128-character |
1188 // encoding space can be quickly tested with a table lookup, so we don't | 1190 // encoding space can be quickly tested with a table lookup, so we don't |
1189 // wish to do binary chop search at a smaller granularity than that. A | 1191 // wish to do binary chop search at a smaller granularity than that. A |
1190 // 128-character space can take up a lot of space in the ranges array if, | 1192 // 128-character space can take up a lot of space in the ranges array if, |
1191 // for example, we only want to match every second character (eg. the lower | 1193 // for example, we only want to match every second character (eg. the lower |
1192 // case characters on some Unicode pages). | 1194 // case characters on some Unicode pages). |
1193 int binary_chop_index = (end_index + start_index) / 2; | 1195 intptr_t binary_chop_index = (end_index + start_index) / 2; |
1194 // The first test ensures that we get to the code that handles the Latin1 | 1196 // The first test ensures that we get to the code that handles the Latin1 |
1195 // range with a single not-taken branch, speeding up this important | 1197 // range with a single not-taken branch, speeding up this important |
1196 // character range (even non-Latin1 charset-based text has spaces and | 1198 // character range (even non-Latin1 charset-based text has spaces and |
1197 // punctuation). | 1199 // punctuation). |
1198 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case. | 1200 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. |
1199 end_index - start_index > (*new_start_index - start_index) * 2 && | 1201 end_index - start_index > (*new_start_index - start_index) * 2 && |
1200 last - first > kSize * 2 && binary_chop_index > *new_start_index && | 1202 last - first > kSize * 2 && |
1201 ranges->at(binary_chop_index) >= first + 2 * kSize) { | 1203 binary_chop_index > *new_start_index && |
1202 int scan_forward_for_section_border = binary_chop_index;; | 1204 ranges->At(binary_chop_index) >= first + 2 * kSize) { |
1203 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; | 1205 intptr_t scan_forward_for_section_border = binary_chop_index;; |
| 1206 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; |
1204 | 1207 |
1205 while (scan_forward_for_section_border < end_index) { | 1208 while (scan_forward_for_section_border < end_index) { |
1206 if (ranges->at(scan_forward_for_section_border) > new_border) { | 1209 if (ranges->At(scan_forward_for_section_border) > new_border) { |
1207 *new_start_index = scan_forward_for_section_border; | 1210 *new_start_index = scan_forward_for_section_border; |
1208 *border = new_border; | 1211 *border = new_border; |
1209 break; | 1212 break; |
1210 } | 1213 } |
1211 scan_forward_for_section_border++; | 1214 scan_forward_for_section_border++; |
1212 } | 1215 } |
1213 } | 1216 } |
1214 | 1217 |
1215 DCHECK(*new_start_index > start_index); | 1218 ASSERT(*new_start_index > start_index); |
1216 *new_end_index = *new_start_index - 1; | 1219 *new_end_index = *new_start_index - 1; |
1217 if (ranges->at(*new_end_index) == *border) { | 1220 if (ranges->At(*new_end_index) == *border) { |
1218 (*new_end_index)--; | 1221 (*new_end_index)--; |
1219 } | 1222 } |
1220 if (*border >= ranges->at(end_index)) { | 1223 if (*border >= ranges->At(end_index)) { |
1221 *border = ranges->at(end_index); | 1224 *border = ranges->At(end_index); |
1222 *new_start_index = end_index; // Won't be used. | 1225 *new_start_index = end_index; // Won't be used. |
1223 *new_end_index = end_index - 1; | 1226 *new_end_index = end_index - 1; |
1224 } | 1227 } |
1225 } | 1228 } |
1226 | 1229 |
1227 | 1230 |
1228 // Gets a series of segment boundaries representing a character class. If the | 1231 // Gets a series of segment boundaries representing a character class. If the |
1229 // character is in the range between an even and an odd boundary (counting from | 1232 // character is in the range between an even and an odd boundary (counting from |
1230 // start_index) then go to even_label, otherwise go to odd_label. We already | 1233 // start_index) then go to even_label, otherwise go to odd_label. We already |
1231 // know that the character is in the range of min_char to max_char inclusive. | 1234 // know that the character is in the range of min_char to max_char inclusive. |
1232 // Either label can be NULL indicating backtracking. Either label can also be | 1235 // Either label can be NULL indicating backtracking. Either label can also be |
1233 // equal to the fall_through label. | 1236 // equal to the fall_through label. |
1234 static void GenerateBranches(RegExpMacroAssembler* masm, | 1237 static void GenerateBranches(RegExpMacroAssembler* masm, |
1235 ZoneList<int>* ranges, | 1238 ZoneGrowableArray<int>* ranges, |
1236 int start_index, | 1239 intptr_t start_index, |
1237 int end_index, | 1240 intptr_t end_index, |
1238 uc16 min_char, | 1241 uint16_t min_char, |
1239 uc16 max_char, | 1242 uint16_t max_char, |
1240 Label* fall_through, | 1243 BlockLabel* fall_through, |
1241 Label* even_label, | 1244 BlockLabel* even_label, |
1242 Label* odd_label) { | 1245 BlockLabel* odd_label) { |
1243 int first = ranges->at(start_index); | 1246 intptr_t first = ranges->At(start_index); |
1244 int last = ranges->at(end_index) - 1; | 1247 intptr_t last = ranges->At(end_index) - 1; |
1245 | 1248 |
1246 DCHECK_LT(min_char, first); | 1249 ASSERT(min_char < first); |
1247 | 1250 |
1248 // Just need to test if the character is before or on-or-after | 1251 // Just need to test if the character is before or on-or-after |
1249 // a particular character. | 1252 // a particular character. |
1250 if (start_index == end_index) { | 1253 if (start_index == end_index) { |
1251 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | 1254 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); |
1252 return; | 1255 return; |
1253 } | 1256 } |
1254 | 1257 |
1255 // Another almost trivial case: There is one interval in the middle that is | 1258 // Another almost trivial case: There is one interval in the middle that is |
1256 // different from the end intervals. | 1259 // different from the end intervals. |
1257 if (start_index + 1 == end_index) { | 1260 if (start_index + 1 == end_index) { |
1258 EmitDoubleBoundaryTest( | 1261 EmitDoubleBoundaryTest( |
1259 masm, first, last, fall_through, even_label, odd_label); | 1262 masm, first, last, fall_through, even_label, odd_label); |
1260 return; | 1263 return; |
1261 } | 1264 } |
1262 | 1265 |
1263 // It's not worth using table lookup if there are very few intervals in the | 1266 // It's not worth using table lookup if there are very few intervals in the |
1264 // character class. | 1267 // character class. |
1265 if (end_index - start_index <= 6) { | 1268 if (end_index - start_index <= 6) { |
1266 // It is faster to test for individual characters, so we look for those | 1269 // It is faster to test for individual characters, so we look for those |
1267 // first, then try arbitrary ranges in the second round. | 1270 // first, then try arbitrary ranges in the second round. |
1268 static int kNoCutIndex = -1; | 1271 static intptr_t kNoCutIndex = -1; |
1269 int cut = kNoCutIndex; | 1272 intptr_t cut = kNoCutIndex; |
1270 for (int i = start_index; i < end_index; i++) { | 1273 for (intptr_t i = start_index; i < end_index; i++) { |
1271 if (ranges->at(i) == ranges->at(i + 1) - 1) { | 1274 if (ranges->At(i) == ranges->At(i + 1) - 1) { |
1272 cut = i; | 1275 cut = i; |
1273 break; | 1276 break; |
1274 } | 1277 } |
1275 } | 1278 } |
1276 if (cut == kNoCutIndex) cut = start_index; | 1279 if (cut == kNoCutIndex) cut = start_index; |
1277 CutOutRange( | 1280 CutOutRange( |
1278 masm, ranges, start_index, end_index, cut, even_label, odd_label); | 1281 masm, ranges, start_index, end_index, cut, even_label, odd_label); |
1279 DCHECK_GE(end_index - start_index, 2); | 1282 ASSERT(end_index - start_index >= 2); |
1280 GenerateBranches(masm, | 1283 GenerateBranches(masm, |
1281 ranges, | 1284 ranges, |
1282 start_index + 1, | 1285 start_index + 1, |
1283 end_index - 1, | 1286 end_index - 1, |
1284 min_char, | 1287 min_char, |
1285 max_char, | 1288 max_char, |
1286 fall_through, | 1289 fall_through, |
1287 even_label, | 1290 even_label, |
1288 odd_label); | 1291 odd_label); |
1289 return; | 1292 return; |
1290 } | 1293 } |
1291 | 1294 |
1292 // If there are a lot of intervals in the regexp, then we will use tables to | 1295 // If there are a lot of intervals in the regexp, then we will use tables to |
1293 // determine whether the character is inside or outside the character class. | 1296 // determine whether the character is inside or outside the character class. |
1294 static const int kBits = RegExpMacroAssembler::kTableSizeBits; | 1297 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; |
1295 | 1298 |
1296 if ((max_char >> kBits) == (min_char >> kBits)) { | 1299 if ((max_char >> kBits) == (min_char >> kBits)) { |
1297 EmitUseLookupTable(masm, | 1300 EmitUseLookupTable(masm, |
1298 ranges, | 1301 ranges, |
1299 start_index, | 1302 start_index, |
1300 end_index, | 1303 end_index, |
1301 min_char, | 1304 min_char, |
1302 fall_through, | 1305 fall_through, |
1303 even_label, | 1306 even_label, |
1304 odd_label); | 1307 odd_label); |
1305 return; | 1308 return; |
1306 } | 1309 } |
1307 | 1310 |
1308 if ((min_char >> kBits) != (first >> kBits)) { | 1311 if ((min_char >> kBits) != (first >> kBits)) { |
1309 masm->CheckCharacterLT(first, odd_label); | 1312 masm->CheckCharacterLT(first, odd_label); |
1310 GenerateBranches(masm, | 1313 GenerateBranches(masm, |
1311 ranges, | 1314 ranges, |
1312 start_index + 1, | 1315 start_index + 1, |
1313 end_index, | 1316 end_index, |
1314 first, | 1317 first, |
1315 max_char, | 1318 max_char, |
1316 fall_through, | 1319 fall_through, |
1317 odd_label, | 1320 odd_label, |
1318 even_label); | 1321 even_label); |
1319 return; | 1322 return; |
1320 } | 1323 } |
1321 | 1324 |
1322 int new_start_index = 0; | 1325 intptr_t new_start_index = 0; |
1323 int new_end_index = 0; | 1326 intptr_t new_end_index = 0; |
1324 int border = 0; | 1327 intptr_t border = 0; |
1325 | 1328 |
1326 SplitSearchSpace(ranges, | 1329 SplitSearchSpace(ranges, |
1327 start_index, | 1330 start_index, |
1328 end_index, | 1331 end_index, |
1329 &new_start_index, | 1332 &new_start_index, |
1330 &new_end_index, | 1333 &new_end_index, |
1331 &border); | 1334 &border); |
1332 | 1335 |
1333 Label handle_rest; | 1336 BlockLabel handle_rest; |
1334 Label* above = &handle_rest; | 1337 BlockLabel* above = &handle_rest; |
1335 if (border == last + 1) { | 1338 if (border == last + 1) { |
1336 // We didn't find any section that started after the limit, so everything | 1339 // We didn't find any section that started after the limit, so everything |
1337 // above the border is one of the terminal labels. | 1340 // above the border is one of the terminal labels. |
1338 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; | 1341 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; |
1339 DCHECK(new_end_index == end_index - 1); | 1342 ASSERT(new_end_index == end_index - 1); |
1340 } | 1343 } |
1341 | 1344 |
1342 DCHECK_LE(start_index, new_end_index); | 1345 ASSERT(start_index <= new_end_index); |
1343 DCHECK_LE(new_start_index, end_index); | 1346 ASSERT(new_start_index <= end_index); |
1344 DCHECK_LT(start_index, new_start_index); | 1347 ASSERT(start_index < new_start_index); |
1345 DCHECK_LT(new_end_index, end_index); | 1348 ASSERT(new_end_index < end_index); |
1346 DCHECK(new_end_index + 1 == new_start_index || | 1349 ASSERT(new_end_index + 1 == new_start_index || |
1347 (new_end_index + 2 == new_start_index && | 1350 (new_end_index + 2 == new_start_index && |
1348 border == ranges->at(new_end_index + 1))); | 1351 border == ranges->At(new_end_index + 1))); |
1349 DCHECK_LT(min_char, border - 1); | 1352 ASSERT(min_char < border - 1); |
1350 DCHECK_LT(border, max_char); | 1353 ASSERT(border < max_char); |
1351 DCHECK_LT(ranges->at(new_end_index), border); | 1354 ASSERT(ranges->At(new_end_index) < border); |
1352 DCHECK(border < ranges->at(new_start_index) || | 1355 ASSERT(border < ranges->At(new_start_index) || |
1353 (border == ranges->at(new_start_index) && | 1356 (border == ranges->At(new_start_index) && |
1354 new_start_index == end_index && | 1357 new_start_index == end_index && |
1355 new_end_index == end_index - 1 && | 1358 new_end_index == end_index - 1 && |
1356 border == last + 1)); | 1359 border == last + 1)); |
1357 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1)); | 1360 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); |
1358 | 1361 |
1359 masm->CheckCharacterGT(border - 1, above); | 1362 masm->CheckCharacterGT(border - 1, above); |
1360 Label dummy; | 1363 BlockLabel dummy; |
1361 GenerateBranches(masm, | 1364 GenerateBranches(masm, |
1362 ranges, | 1365 ranges, |
1363 start_index, | 1366 start_index, |
1364 new_end_index, | 1367 new_end_index, |
1365 min_char, | 1368 min_char, |
1366 border - 1, | 1369 border - 1, |
1367 &dummy, | 1370 &dummy, |
1368 even_label, | 1371 even_label, |
1369 odd_label); | 1372 odd_label); |
1370 if (handle_rest.is_linked()) { | 1373 |
1371 masm->Bind(&handle_rest); | 1374 if (handle_rest.IsLinked()) { |
| 1375 masm->BindBlock(&handle_rest); |
1372 bool flip = (new_start_index & 1) != (start_index & 1); | 1376 bool flip = (new_start_index & 1) != (start_index & 1); |
1373 GenerateBranches(masm, | 1377 GenerateBranches(masm, |
1374 ranges, | 1378 ranges, |
1375 new_start_index, | 1379 new_start_index, |
1376 end_index, | 1380 end_index, |
1377 border, | 1381 border, |
1378 max_char, | 1382 max_char, |
1379 &dummy, | 1383 &dummy, |
1380 flip ? odd_label : even_label, | 1384 flip ? odd_label : even_label, |
1381 flip ? even_label : odd_label); | 1385 flip ? even_label : odd_label); |
1382 } | 1386 } |
1383 } | 1387 } |
1384 | 1388 |
1385 | 1389 |
1386 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1390 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1387 RegExpCharacterClass* cc, bool one_byte, | 1391 RegExpCharacterClass* cc, |
1388 Label* on_failure, int cp_offset, bool check_offset, | 1392 bool one_byte, |
1389 bool preloaded, Zone* zone) { | 1393 BlockLabel* on_failure, |
1390 ZoneList<CharacterRange>* ranges = cc->ranges(zone); | 1394 intptr_t cp_offset, |
| 1395 bool check_offset, |
| 1396 bool preloaded, |
| 1397 Isolate* isolate) { |
| 1398 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
1391 if (!CharacterRange::IsCanonical(ranges)) { | 1399 if (!CharacterRange::IsCanonical(ranges)) { |
1392 CharacterRange::Canonicalize(ranges); | 1400 CharacterRange::Canonicalize(ranges); |
1393 } | 1401 } |
1394 | 1402 |
1395 int max_char; | 1403 intptr_t max_char; |
1396 if (one_byte) { | 1404 if (one_byte) { |
1397 max_char = String::kMaxOneByteCharCode; | 1405 max_char = Symbols::kMaxOneCharCodeSymbol; |
1398 } else { | 1406 } else { |
1399 max_char = String::kMaxUtf16CodeUnit; | 1407 max_char = Utf16::kMaxCodeUnit; |
1400 } | 1408 } |
1401 | 1409 |
1402 int range_count = ranges->length(); | 1410 intptr_t range_count = ranges->length(); |
1403 | 1411 |
1404 int last_valid_range = range_count - 1; | 1412 intptr_t last_valid_range = range_count - 1; |
1405 while (last_valid_range >= 0) { | 1413 while (last_valid_range >= 0) { |
1406 CharacterRange& range = ranges->at(last_valid_range); | 1414 CharacterRange& range = (*ranges)[last_valid_range]; |
1407 if (range.from() <= max_char) { | 1415 if (range.from() <= max_char) { |
1408 break; | 1416 break; |
1409 } | 1417 } |
1410 last_valid_range--; | 1418 last_valid_range--; |
1411 } | 1419 } |
1412 | 1420 |
1413 if (last_valid_range < 0) { | 1421 if (last_valid_range < 0) { |
1414 if (!cc->is_negated()) { | 1422 if (!cc->is_negated()) { |
1415 macro_assembler->GoTo(on_failure); | 1423 macro_assembler->GoTo(on_failure); |
1416 } | 1424 } |
1417 if (check_offset) { | 1425 if (check_offset) { |
1418 macro_assembler->CheckPosition(cp_offset, on_failure); | 1426 macro_assembler->CheckPosition(cp_offset, on_failure); |
1419 } | 1427 } |
1420 return; | 1428 return; |
1421 } | 1429 } |
1422 | 1430 |
1423 if (last_valid_range == 0 && | 1431 if (last_valid_range == 0 && |
1424 ranges->at(0).IsEverything(max_char)) { | 1432 ranges->At(0).IsEverything(max_char)) { |
1425 if (cc->is_negated()) { | 1433 if (cc->is_negated()) { |
1426 macro_assembler->GoTo(on_failure); | 1434 macro_assembler->GoTo(on_failure); |
1427 } else { | 1435 } else { |
1428 // This is a common case hit by non-anchored expressions. | 1436 // This is a common case hit by non-anchored expressions. |
1429 if (check_offset) { | 1437 if (check_offset) { |
1430 macro_assembler->CheckPosition(cp_offset, on_failure); | 1438 macro_assembler->CheckPosition(cp_offset, on_failure); |
1431 } | 1439 } |
1432 } | 1440 } |
1433 return; | 1441 return; |
1434 } | 1442 } |
1435 if (last_valid_range == 0 && | 1443 if (last_valid_range == 0 && |
1436 !cc->is_negated() && | 1444 !cc->is_negated() && |
1437 ranges->at(0).IsEverything(max_char)) { | 1445 ranges->At(0).IsEverything(max_char)) { |
1438 // This is a common case hit by non-anchored expressions. | 1446 // This is a common case hit by non-anchored expressions. |
1439 if (check_offset) { | 1447 if (check_offset) { |
1440 macro_assembler->CheckPosition(cp_offset, on_failure); | 1448 macro_assembler->CheckPosition(cp_offset, on_failure); |
1441 } | 1449 } |
1442 return; | 1450 return; |
1443 } | 1451 } |
1444 | 1452 |
1445 if (!preloaded) { | 1453 if (!preloaded) { |
1446 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1454 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1447 } | 1455 } |
1448 | 1456 |
1449 if (cc->is_standard(zone) && | 1457 if (cc->is_standard() && |
1450 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1458 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), |
1451 on_failure)) { | 1459 on_failure)) { |
1452 return; | 1460 return; |
1453 } | 1461 } |
1454 | 1462 |
1455 | 1463 |
1456 // A new list with ascending entries. Each entry is a code unit | 1464 // A new list with ascending entries. Each entry is a code unit |
1457 // where there is a boundary between code units that are part of | 1465 // where there is a boundary between code units that are part of |
1458 // the class and code units that are not. Normally we insert an | 1466 // the class and code units that are not. Normally we insert an |
1459 // entry at zero which goes to the failure label, but if there | 1467 // entry at zero which goes to the failure label, but if there |
1460 // was already one there we fall through for success on that entry. | 1468 // was already one there we fall through for success on that entry. |
1461 // Subsequent entries have alternating meaning (success/failure). | 1469 // Subsequent entries have alternating meaning (success/failure). |
1462 ZoneList<int>* range_boundaries = | 1470 ZoneGrowableArray<int>* range_boundaries = |
1463 new(zone) ZoneList<int>(last_valid_range, zone); | 1471 new(isolate) ZoneGrowableArray<int>(last_valid_range); |
1464 | 1472 |
1465 bool zeroth_entry_is_failure = !cc->is_negated(); | 1473 bool zeroth_entry_is_failure = !cc->is_negated(); |
1466 | 1474 |
1467 for (int i = 0; i <= last_valid_range; i++) { | 1475 for (intptr_t i = 0; i <= last_valid_range; i++) { |
1468 CharacterRange& range = ranges->at(i); | 1476 CharacterRange& range = (*ranges)[i]; |
1469 if (range.from() == 0) { | 1477 if (range.from() == 0) { |
1470 DCHECK_EQ(i, 0); | 1478 ASSERT(i == 0); |
1471 zeroth_entry_is_failure = !zeroth_entry_is_failure; | 1479 zeroth_entry_is_failure = !zeroth_entry_is_failure; |
1472 } else { | 1480 } else { |
1473 range_boundaries->Add(range.from(), zone); | 1481 range_boundaries->Add(range.from()); |
1474 } | 1482 } |
1475 range_boundaries->Add(range.to() + 1, zone); | 1483 range_boundaries->Add(range.to() + 1); |
1476 } | 1484 } |
1477 int end_index = range_boundaries->length() - 1; | 1485 intptr_t end_index = range_boundaries->length() - 1; |
1478 if (range_boundaries->at(end_index) > max_char) { | 1486 if (range_boundaries->At(end_index) > max_char) { |
1479 end_index--; | 1487 end_index--; |
1480 } | 1488 } |
1481 | 1489 |
1482 Label fall_through; | 1490 BlockLabel fall_through; |
1483 GenerateBranches(macro_assembler, | 1491 GenerateBranches(macro_assembler, |
1484 range_boundaries, | 1492 range_boundaries, |
1485 0, // start_index. | 1493 0, // start_index. |
1486 end_index, | 1494 end_index, |
1487 0, // min_char. | 1495 0, // min_char. |
1488 max_char, | 1496 max_char, |
1489 &fall_through, | 1497 &fall_through, |
1490 zeroth_entry_is_failure ? &fall_through : on_failure, | 1498 zeroth_entry_is_failure ? &fall_through : on_failure, |
1491 zeroth_entry_is_failure ? on_failure : &fall_through); | 1499 zeroth_entry_is_failure ? on_failure : &fall_through); |
1492 macro_assembler->Bind(&fall_through); | 1500 macro_assembler->BindBlock(&fall_through); |
1493 } | 1501 } |
1494 | 1502 |
1495 | 1503 |
1496 RegExpNode::~RegExpNode() { | 1504 RegExpNode::~RegExpNode() { |
1497 } | 1505 } |
1498 | 1506 |
1499 | 1507 |
1500 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1508 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
1501 Trace* trace) { | 1509 Trace* trace) { |
1502 // If we are generating a greedy loop then don't stop and don't reuse code. | 1510 // If we are generating a greedy loop then don't stop and don't reuse code. |
1503 if (trace->stop_node() != NULL) { | 1511 if (trace->stop_node() != NULL) { |
1504 return CONTINUE; | 1512 return CONTINUE; |
1505 } | 1513 } |
1506 | 1514 |
1507 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1515 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1508 if (trace->is_trivial()) { | 1516 if (trace->is_trivial()) { |
1509 if (label_.is_bound()) { | 1517 if (label_.IsBound()) { |
1510 // We are being asked to generate a generic version, but that's already | 1518 // We are being asked to generate a generic version, but that's already |
1511 // been done so just go to it. | 1519 // been done so just go to it. |
1512 macro_assembler->GoTo(&label_); | 1520 macro_assembler->GoTo(&label_); |
1513 return DONE; | 1521 return DONE; |
1514 } | 1522 } |
1515 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { | 1523 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { |
1516 // To avoid too deep recursion we push the node to the work queue and just | 1524 // To avoid too deep recursion we push the node to the work queue and just |
1517 // generate a goto here. | 1525 // generate a goto here. |
1518 compiler->AddWork(this); | 1526 compiler->AddWork(this); |
1519 macro_assembler->GoTo(&label_); | 1527 macro_assembler->GoTo(&label_); |
1520 return DONE; | 1528 return DONE; |
1521 } | 1529 } |
1522 // Generate generic version of the node and bind the label for later use. | 1530 // Generate generic version of the node and bind the label for later use. |
1523 macro_assembler->Bind(&label_); | 1531 macro_assembler->BindBlock(&label_); |
1524 return CONTINUE; | 1532 return CONTINUE; |
1525 } | 1533 } |
1526 | 1534 |
1527 // We are being asked to make a non-generic version. Keep track of how many | 1535 // We are being asked to make a non-generic version. Keep track of how many |
1528 // non-generic versions we generate so as not to overdo it. | 1536 // non-generic versions we generate so as not to overdo it. |
1529 trace_count_++; | 1537 trace_count_++; |
1530 if (FLAG_regexp_optimization && | 1538 if (kRegexpOptimization && |
1531 trace_count_ < kMaxCopiesCodeGenerated && | 1539 trace_count_ < kMaxCopiesCodeGenerated && |
1532 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | 1540 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { |
1533 return CONTINUE; | 1541 return CONTINUE; |
1534 } | 1542 } |
1535 | 1543 |
1536 // If we get here code has been generated for this node too many times or | 1544 // If we get here code has been generated for this node too many times or |
1537 // recursion is too deep. Time to switch to a generic version. The code for | 1545 // recursion is too deep. Time to switch to a generic version. The code for |
1538 // generic versions above can handle deep recursion properly. | 1546 // generic versions above can handle deep recursion properly. |
1539 trace->Flush(compiler, this); | 1547 trace->Flush(compiler, this); |
1540 return DONE; | 1548 return DONE; |
1541 } | 1549 } |
1542 | 1550 |
1543 | 1551 |
1544 int ActionNode::EatsAtLeast(int still_to_find, | 1552 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, |
1545 int budget, | 1553 intptr_t budget, |
1546 bool not_at_start) { | 1554 bool not_at_start) { |
1547 if (budget <= 0) return 0; | 1555 if (budget <= 0) return 0; |
1548 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1556 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
1549 return on_success()->EatsAtLeast(still_to_find, | 1557 return on_success()->EatsAtLeast(still_to_find, |
1550 budget - 1, | 1558 budget - 1, |
1551 not_at_start); | 1559 not_at_start); |
1552 } | 1560 } |
1553 | 1561 |
1554 | 1562 |
1555 void ActionNode::FillInBMInfo(int offset, | 1563 void ActionNode::FillInBMInfo(intptr_t offset, |
1556 int budget, | 1564 intptr_t budget, |
1557 BoyerMooreLookahead* bm, | 1565 BoyerMooreLookahead* bm, |
1558 bool not_at_start) { | 1566 bool not_at_start) { |
1559 if (action_type_ == BEGIN_SUBMATCH) { | 1567 if (action_type_ == BEGIN_SUBMATCH) { |
1560 bm->SetRest(offset); | 1568 bm->SetRest(offset); |
1561 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1569 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
1562 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1570 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1563 } | 1571 } |
1564 SaveBMInfo(bm, not_at_start, offset); | 1572 SaveBMInfo(bm, not_at_start, offset); |
1565 } | 1573 } |
1566 | 1574 |
1567 | 1575 |
1568 int AssertionNode::EatsAtLeast(int still_to_find, | 1576 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, |
1569 int budget, | 1577 intptr_t budget, |
1570 bool not_at_start) { | 1578 bool not_at_start) { |
1571 if (budget <= 0) return 0; | 1579 if (budget <= 0) return 0; |
1572 // If we know we are not at the start and we are asked "how many characters | 1580 // If we know we are not at the start and we are asked "how many characters |
1573 // will you match if you succeed?" then we can answer anything since false | 1581 // will you match if you succeed?" then we can answer anything since false |
1574 // implies false. So lets just return the max answer (still_to_find) since | 1582 // implies false. So lets just return the max answer (still_to_find) since |
1575 // that won't prevent us from preloading a lot of characters for the other | 1583 // that won't prevent us from preloading a lot of characters for the other |
1576 // branches in the node graph. | 1584 // branches in the node graph. |
1577 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1585 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
1578 return on_success()->EatsAtLeast(still_to_find, | 1586 return on_success()->EatsAtLeast(still_to_find, |
1579 budget - 1, | 1587 budget - 1, |
1580 not_at_start); | 1588 not_at_start); |
1581 } | 1589 } |
1582 | 1590 |
1583 | 1591 |
1584 void AssertionNode::FillInBMInfo(int offset, | 1592 void AssertionNode::FillInBMInfo(intptr_t offset, |
1585 int budget, | 1593 intptr_t budget, |
1586 BoyerMooreLookahead* bm, | 1594 BoyerMooreLookahead* bm, |
1587 bool not_at_start) { | 1595 bool not_at_start) { |
1588 // Match the behaviour of EatsAtLeast on this node. | 1596 // Match the behaviour of EatsAtLeast on this node. |
1589 if (assertion_type() == AT_START && not_at_start) return; | 1597 if (assertion_type() == AT_START && not_at_start) return; |
1590 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1598 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1591 SaveBMInfo(bm, not_at_start, offset); | 1599 SaveBMInfo(bm, not_at_start, offset); |
1592 } | 1600 } |
1593 | 1601 |
1594 | 1602 |
1595 int BackReferenceNode::EatsAtLeast(int still_to_find, | 1603 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, |
1596 int budget, | 1604 intptr_t budget, |
1597 bool not_at_start) { | 1605 bool not_at_start) { |
1598 if (budget <= 0) return 0; | 1606 if (budget <= 0) return 0; |
1599 return on_success()->EatsAtLeast(still_to_find, | 1607 return on_success()->EatsAtLeast(still_to_find, |
1600 budget - 1, | 1608 budget - 1, |
1601 not_at_start); | 1609 not_at_start); |
1602 } | 1610 } |
1603 | 1611 |
1604 | 1612 |
1605 int TextNode::EatsAtLeast(int still_to_find, | 1613 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, |
1606 int budget, | 1614 intptr_t budget, |
1607 bool not_at_start) { | 1615 bool not_at_start) { |
1608 int answer = Length(); | 1616 intptr_t answer = Length(); |
1609 if (answer >= still_to_find) return answer; | 1617 if (answer >= still_to_find) return answer; |
1610 if (budget <= 0) return answer; | 1618 if (budget <= 0) return answer; |
1611 // We are not at start after this node so we set the last argument to 'true'. | 1619 // We are not at start after this node so we set the last argument to 'true'. |
1612 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 1620 return answer + on_success()->EatsAtLeast(still_to_find - answer, |
1613 budget - 1, | 1621 budget - 1, |
1614 true); | 1622 true); |
1615 } | 1623 } |
1616 | 1624 |
1617 | 1625 |
1618 int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, | 1626 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1619 int budget, | 1627 intptr_t budget, |
1620 bool not_at_start) { | 1628 bool not_at_start) { |
1621 if (budget <= 0) return 0; | 1629 if (budget <= 0) return 0; |
1622 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1630 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1623 // afterwards. | 1631 // afterwards. |
1624 RegExpNode* node = alternatives_->at(1).node(); | 1632 RegExpNode* node = (*alternatives_)[1].node(); |
1625 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1633 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1626 } | 1634 } |
1627 | 1635 |
1628 | 1636 |
1629 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( | 1637 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( |
1630 QuickCheckDetails* details, | 1638 QuickCheckDetails* details, |
1631 RegExpCompiler* compiler, | 1639 RegExpCompiler* compiler, |
1632 int filled_in, | 1640 intptr_t filled_in, |
1633 bool not_at_start) { | 1641 bool not_at_start) { |
1634 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1642 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1635 // afterwards. | 1643 // afterwards. |
1636 RegExpNode* node = alternatives_->at(1).node(); | 1644 RegExpNode* node = (*alternatives_)[1].node(); |
1637 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); | 1645 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); |
1638 } | 1646 } |
1639 | 1647 |
1640 | 1648 |
1641 int ChoiceNode::EatsAtLeastHelper(int still_to_find, | 1649 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, |
1642 int budget, | 1650 intptr_t budget, |
1643 RegExpNode* ignore_this_node, | 1651 RegExpNode* ignore_this_node, |
1644 bool not_at_start) { | 1652 bool not_at_start) { |
1645 if (budget <= 0) return 0; | 1653 if (budget <= 0) return 0; |
1646 int min = 100; | 1654 intptr_t min = 100; |
1647 int choice_count = alternatives_->length(); | 1655 intptr_t choice_count = alternatives_->length(); |
1648 budget = (budget - 1) / choice_count; | 1656 budget = (budget - 1) / choice_count; |
1649 for (int i = 0; i < choice_count; i++) { | 1657 for (intptr_t i = 0; i < choice_count; i++) { |
1650 RegExpNode* node = alternatives_->at(i).node(); | 1658 RegExpNode* node = (*alternatives_)[i].node(); |
1651 if (node == ignore_this_node) continue; | 1659 if (node == ignore_this_node) continue; |
1652 int node_eats_at_least = | 1660 intptr_t node_eats_at_least = |
1653 node->EatsAtLeast(still_to_find, budget, not_at_start); | 1661 node->EatsAtLeast(still_to_find, budget, not_at_start); |
1654 if (node_eats_at_least < min) min = node_eats_at_least; | 1662 if (node_eats_at_least < min) min = node_eats_at_least; |
1655 if (min == 0) return 0; | 1663 if (min == 0) return 0; |
1656 } | 1664 } |
1657 return min; | 1665 return min; |
1658 } | 1666 } |
1659 | 1667 |
1660 | 1668 |
1661 int LoopChoiceNode::EatsAtLeast(int still_to_find, | 1669 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1662 int budget, | 1670 intptr_t budget, |
1663 bool not_at_start) { | 1671 bool not_at_start) { |
1664 return EatsAtLeastHelper(still_to_find, | 1672 return EatsAtLeastHelper(still_to_find, |
1665 budget - 1, | 1673 budget - 1, |
1666 loop_node_, | 1674 loop_node_, |
1667 not_at_start); | 1675 not_at_start); |
1668 } | 1676 } |
1669 | 1677 |
1670 | 1678 |
1671 int ChoiceNode::EatsAtLeast(int still_to_find, | 1679 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1672 int budget, | 1680 intptr_t budget, |
1673 bool not_at_start) { | 1681 bool not_at_start) { |
1674 return EatsAtLeastHelper(still_to_find, | 1682 return EatsAtLeastHelper(still_to_find, |
1675 budget, | 1683 budget, |
1676 NULL, | 1684 NULL, |
1677 not_at_start); | 1685 not_at_start); |
1678 } | 1686 } |
1679 | 1687 |
1680 | 1688 |
1681 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1689 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
1682 static inline uint32_t SmearBitsRight(uint32_t v) { | 1690 static inline uint32_t SmearBitsRight(uint32_t v) { |
1683 v |= v >> 1; | 1691 v |= v >> 1; |
1684 v |= v >> 2; | 1692 v |= v >> 2; |
1685 v |= v >> 4; | 1693 v |= v >> 4; |
1686 v |= v >> 8; | 1694 v |= v >> 8; |
1687 v |= v >> 16; | 1695 v |= v >> 16; |
1688 return v; | 1696 return v; |
1689 } | 1697 } |
1690 | 1698 |
1691 | 1699 |
1692 bool QuickCheckDetails::Rationalize(bool asc) { | 1700 bool QuickCheckDetails::Rationalize(bool asc) { |
1693 bool found_useful_op = false; | 1701 bool found_useful_op = false; |
1694 uint32_t char_mask; | 1702 uint32_t char_mask; |
1695 if (asc) { | 1703 if (asc) { |
1696 char_mask = String::kMaxOneByteCharCode; | 1704 char_mask = Symbols::kMaxOneCharCodeSymbol; |
1697 } else { | 1705 } else { |
1698 char_mask = String::kMaxUtf16CodeUnit; | 1706 char_mask = Utf16::kMaxCodeUnit; |
1699 } | 1707 } |
1700 mask_ = 0; | 1708 mask_ = 0; |
1701 value_ = 0; | 1709 value_ = 0; |
1702 int char_shift = 0; | 1710 intptr_t char_shift = 0; |
1703 for (int i = 0; i < characters_; i++) { | 1711 for (intptr_t i = 0; i < characters_; i++) { |
1704 Position* pos = &positions_[i]; | 1712 Position* pos = &positions_[i]; |
1705 if ((pos->mask & String::kMaxOneByteCharCode) != 0) { | 1713 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { |
1706 found_useful_op = true; | 1714 found_useful_op = true; |
1707 } | 1715 } |
1708 mask_ |= (pos->mask & char_mask) << char_shift; | 1716 mask_ |= (pos->mask & char_mask) << char_shift; |
1709 value_ |= (pos->value & char_mask) << char_shift; | 1717 value_ |= (pos->value & char_mask) << char_shift; |
1710 char_shift += asc ? 8 : 16; | 1718 char_shift += asc ? 8 : 16; |
1711 } | 1719 } |
1712 return found_useful_op; | 1720 return found_useful_op; |
1713 } | 1721 } |
1714 | 1722 |
1715 | 1723 |
1716 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1724 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
1717 Trace* bounds_check_trace, | 1725 Trace* bounds_check_trace, |
1718 Trace* trace, | 1726 Trace* trace, |
1719 bool preload_has_checked_bounds, | 1727 bool preload_has_checked_bounds, |
1720 Label* on_possible_success, | 1728 BlockLabel* on_possible_success, |
1721 QuickCheckDetails* details, | 1729 QuickCheckDetails* details, |
1722 bool fall_through_on_failure) { | 1730 bool fall_through_on_failure) { |
1723 if (details->characters() == 0) return false; | 1731 if (details->characters() == 0) return false; |
1724 GetQuickCheckDetails( | 1732 GetQuickCheckDetails( |
1725 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); | 1733 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); |
1726 if (details->cannot_match()) return false; | 1734 if (details->cannot_match()) return false; |
1727 if (!details->Rationalize(compiler->one_byte())) return false; | 1735 if (!details->Rationalize(compiler->one_byte())) return false; |
1728 DCHECK(details->characters() == 1 || | 1736 ASSERT(details->characters() == 1 || |
1729 compiler->macro_assembler()->CanReadUnaligned()); | 1737 compiler->macro_assembler()->CanReadUnaligned()); |
1730 uint32_t mask = details->mask(); | 1738 uint32_t mask = details->mask(); |
1731 uint32_t value = details->value(); | 1739 uint32_t value = details->value(); |
1732 | 1740 |
1733 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1741 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1734 | 1742 |
1735 if (trace->characters_preloaded() != details->characters()) { | 1743 if (trace->characters_preloaded() != details->characters()) { |
1736 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1744 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); |
1737 // We are attempting to preload the minimum number of characters | 1745 // We are attempting to preload the minimum number of characters |
1738 // any choice would eat, so if the bounds check fails, then none of the | 1746 // any choice would eat, so if the bounds check fails, then none of the |
1739 // choices can succeed, so we can just immediately backtrack, rather | 1747 // choices can succeed, so we can just immediately backtrack, rather |
1740 // than go to the next choice. | 1748 // than go to the next choice. |
1741 assembler->LoadCurrentCharacter(trace->cp_offset(), | 1749 assembler->LoadCurrentCharacter(trace->cp_offset(), |
1742 bounds_check_trace->backtrack(), | 1750 bounds_check_trace->backtrack(), |
1743 !preload_has_checked_bounds, | 1751 !preload_has_checked_bounds, |
1744 details->characters()); | 1752 details->characters()); |
1745 } | 1753 } |
1746 | 1754 |
1747 | 1755 |
1748 bool need_mask = true; | 1756 bool need_mask = true; |
1749 | 1757 |
1750 if (details->characters() == 1) { | 1758 if (details->characters() == 1) { |
1751 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1759 // If number of characters preloaded is 1 then we used a byte or 16 bit |
1752 // load so the value is already masked down. | 1760 // load so the value is already masked down. |
1753 uint32_t char_mask; | 1761 uint32_t char_mask; |
1754 if (compiler->one_byte()) { | 1762 if (compiler->one_byte()) { |
1755 char_mask = String::kMaxOneByteCharCode; | 1763 char_mask = Symbols::kMaxOneCharCodeSymbol; |
1756 } else { | 1764 } else { |
1757 char_mask = String::kMaxUtf16CodeUnit; | 1765 char_mask = Utf16::kMaxCodeUnit; |
1758 } | 1766 } |
1759 if ((mask & char_mask) == char_mask) need_mask = false; | 1767 if ((mask & char_mask) == char_mask) need_mask = false; |
1760 mask &= char_mask; | 1768 mask &= char_mask; |
1761 } else { | 1769 } else { |
1762 // For 2-character preloads in one-byte mode or 1-character preloads in | 1770 // For 2-character preloads in one-byte mode or 1-character preloads in |
1763 // two-byte mode we also use a 16 bit load with zero extend. | 1771 // two-byte mode we also use a 16 bit load with zero extend. |
1764 if (details->characters() == 2 && compiler->one_byte()) { | 1772 if (details->characters() == 2 && compiler->one_byte()) { |
1765 if ((mask & 0xffff) == 0xffff) need_mask = false; | 1773 if ((mask & 0xffff) == 0xffff) need_mask = false; |
1766 } else if (details->characters() == 1 && !compiler->one_byte()) { | 1774 } else if (details->characters() == 1 && !compiler->one_byte()) { |
1767 if ((mask & 0xffff) == 0xffff) need_mask = false; | 1775 if ((mask & 0xffff) == 0xffff) need_mask = false; |
(...skipping 22 matching lines...) Expand all Loading... |
1790 // Here is the meat of GetQuickCheckDetails (see also the comment on the | 1798 // Here is the meat of GetQuickCheckDetails (see also the comment on the |
1791 // super-class in the .h file). | 1799 // super-class in the .h file). |
1792 // | 1800 // |
1793 // We iterate along the text object, building up for each character a | 1801 // We iterate along the text object, building up for each character a |
1794 // mask and value that can be used to test for a quick failure to match. | 1802 // mask and value that can be used to test for a quick failure to match. |
1795 // The masks and values for the positions will be combined into a single | 1803 // The masks and values for the positions will be combined into a single |
1796 // machine word for the current character width in order to be used in | 1804 // machine word for the current character width in order to be used in |
1797 // generating a quick check. | 1805 // generating a quick check. |
1798 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1806 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
1799 RegExpCompiler* compiler, | 1807 RegExpCompiler* compiler, |
1800 int characters_filled_in, | 1808 intptr_t characters_filled_in, |
1801 bool not_at_start) { | 1809 bool not_at_start) { |
1802 Isolate* isolate = compiler->macro_assembler()->zone()->isolate(); | 1810 #if defined(__GNUC__) |
1803 DCHECK(characters_filled_in < details->characters()); | 1811 // TODO(zerny): Make the combination code byte-order independent. |
1804 int characters = details->characters(); | 1812 ASSERT(details->characters() == 1 || |
1805 int char_mask; | 1813 (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)); |
| 1814 #endif |
| 1815 ASSERT(characters_filled_in < details->characters()); |
| 1816 intptr_t characters = details->characters(); |
| 1817 intptr_t char_mask; |
1806 if (compiler->one_byte()) { | 1818 if (compiler->one_byte()) { |
1807 char_mask = String::kMaxOneByteCharCode; | 1819 char_mask = Symbols::kMaxOneCharCodeSymbol; |
1808 } else { | 1820 } else { |
1809 char_mask = String::kMaxUtf16CodeUnit; | 1821 char_mask = Utf16::kMaxCodeUnit; |
1810 } | 1822 } |
1811 for (int k = 0; k < elms_->length(); k++) { | 1823 for (intptr_t k = 0; k < elms_->length(); k++) { |
1812 TextElement elm = elms_->at(k); | 1824 TextElement elm = elms_->At(k); |
1813 if (elm.text_type() == TextElement::ATOM) { | 1825 if (elm.text_type() == TextElement::ATOM) { |
1814 Vector<const uc16> quarks = elm.atom()->data(); | 1826 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); |
1815 for (int i = 0; i < characters && i < quarks.length(); i++) { | 1827 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) { |
1816 QuickCheckDetails::Position* pos = | 1828 QuickCheckDetails::Position* pos = |
1817 details->positions(characters_filled_in); | 1829 details->positions(characters_filled_in); |
1818 uc16 c = quarks[i]; | 1830 uint16_t c = quarks->At(i); |
1819 if (c > char_mask) { | 1831 if (c > char_mask) { |
1820 // If we expect a non-Latin1 character from an one-byte string, | 1832 // If we expect a non-Latin1 character from an one-byte string, |
1821 // there is no way we can match. Not even case-independent | 1833 // there is no way we can match. Not even case independent |
1822 // matching can turn an Latin1 character into non-Latin1 or | 1834 // matching can turn an Latin1 character into non-Latin1 or |
1823 // vice versa. | 1835 // vice versa. |
1824 // TODO(dcarney): issue 3550. Verify that this works as expected. | 1836 // TODO(dcarney): issue 3550. Verify that this works as expected. |
1825 // For example, \u0178 is uppercase of \u00ff (y-umlaut). | 1837 // For example, \u0178 is uppercase of \u00ff (y-umlaut). |
1826 details->set_cannot_match(); | 1838 details->set_cannot_match(); |
1827 pos->determines_perfectly = false; | 1839 pos->determines_perfectly = false; |
1828 return; | 1840 return; |
1829 } | 1841 } |
1830 if (compiler->ignore_case()) { | 1842 if (compiler->ignore_case()) { |
1831 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1843 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1832 int length = GetCaseIndependentLetters(isolate, c, | 1844 intptr_t length = |
1833 compiler->one_byte(), chars); | 1845 GetCaseIndependentLetters(c, compiler->one_byte(), chars); |
1834 DCHECK(length != 0); // Can only happen if c > char_mask (see above). | 1846 ASSERT(length != 0); // Can only happen if c > char_mask (see above). |
1835 if (length == 1) { | 1847 if (length == 1) { |
1836 // This letter has no case equivalents, so it's nice and simple | 1848 // This letter has no case equivalents, so it's nice and simple |
1837 // and the mask-compare will determine definitely whether we have | 1849 // and the mask-compare will determine definitely whether we have |
1838 // a match at this character position. | 1850 // a match at this character position. |
1839 pos->mask = char_mask; | 1851 pos->mask = char_mask; |
1840 pos->value = c; | 1852 pos->value = c; |
1841 pos->determines_perfectly = true; | 1853 pos->determines_perfectly = true; |
1842 } else { | 1854 } else { |
1843 uint32_t common_bits = char_mask; | 1855 uint32_t common_bits = char_mask; |
1844 uint32_t bits = chars[0]; | 1856 uint32_t bits = chars[0]; |
1845 for (int j = 1; j < length; j++) { | 1857 for (intptr_t j = 1; j < length; j++) { |
1846 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); | 1858 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); |
1847 common_bits ^= differing_bits; | 1859 common_bits ^= differing_bits; |
1848 bits &= common_bits; | 1860 bits &= common_bits; |
1849 } | 1861 } |
1850 // If length is 2 and common bits has only one zero in it then | 1862 // If length is 2 and common bits has only one zero in it then |
1851 // our mask and compare instruction will determine definitely | 1863 // our mask and compare instruction will determine definitely |
1852 // whether we have a match at this character position. Otherwise | 1864 // whether we have a match at this character position. Otherwise |
1853 // it can only be an approximate check. | 1865 // it can only be an approximate check. |
1854 uint32_t one_zero = (common_bits | ~char_mask); | 1866 uint32_t one_zero = (common_bits | ~char_mask); |
1855 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { | 1867 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { |
1856 pos->determines_perfectly = true; | 1868 pos->determines_perfectly = true; |
1857 } | 1869 } |
1858 pos->mask = common_bits; | 1870 pos->mask = common_bits; |
1859 pos->value = bits; | 1871 pos->value = bits; |
1860 } | 1872 } |
1861 } else { | 1873 } else { |
1862 // Don't ignore case. Nice simple case where the mask-compare will | 1874 // Don't ignore case. Nice simple case where the mask-compare will |
1863 // determine definitely whether we have a match at this character | 1875 // determine definitely whether we have a match at this character |
1864 // position. | 1876 // position. |
1865 pos->mask = char_mask; | 1877 pos->mask = char_mask; |
1866 pos->value = c; | 1878 pos->value = c; |
1867 pos->determines_perfectly = true; | 1879 pos->determines_perfectly = true; |
1868 } | 1880 } |
1869 characters_filled_in++; | 1881 characters_filled_in++; |
1870 DCHECK(characters_filled_in <= details->characters()); | 1882 ASSERT(characters_filled_in <= details->characters()); |
1871 if (characters_filled_in == details->characters()) { | 1883 if (characters_filled_in == details->characters()) { |
1872 return; | 1884 return; |
1873 } | 1885 } |
1874 } | 1886 } |
1875 } else { | 1887 } else { |
1876 QuickCheckDetails::Position* pos = | 1888 QuickCheckDetails::Position* pos = |
1877 details->positions(characters_filled_in); | 1889 details->positions(characters_filled_in); |
1878 RegExpCharacterClass* tree = elm.char_class(); | 1890 RegExpCharacterClass* tree = elm.char_class(); |
1879 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); | 1891 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges(); |
1880 if (tree->is_negated()) { | 1892 if (tree->is_negated()) { |
1881 // A quick check uses multi-character mask and compare. There is no | 1893 // A quick check uses multi-character mask and compare. There is no |
1882 // useful way to incorporate a negative char class into this scheme | 1894 // useful way to incorporate a negative char class into this scheme |
1883 // so we just conservatively create a mask and value that will always | 1895 // so we just conservatively create a mask and value that will always |
1884 // succeed. | 1896 // succeed. |
1885 pos->mask = 0; | 1897 pos->mask = 0; |
1886 pos->value = 0; | 1898 pos->value = 0; |
1887 } else { | 1899 } else { |
1888 int first_range = 0; | 1900 intptr_t first_range = 0; |
1889 while (ranges->at(first_range).from() > char_mask) { | 1901 while (ranges->At(first_range).from() > char_mask) { |
1890 first_range++; | 1902 first_range++; |
1891 if (first_range == ranges->length()) { | 1903 if (first_range == ranges->length()) { |
1892 details->set_cannot_match(); | 1904 details->set_cannot_match(); |
1893 pos->determines_perfectly = false; | 1905 pos->determines_perfectly = false; |
1894 return; | 1906 return; |
1895 } | 1907 } |
1896 } | 1908 } |
1897 CharacterRange range = ranges->at(first_range); | 1909 CharacterRange range = ranges->At(first_range); |
1898 uc16 from = range.from(); | 1910 uint16_t from = range.from(); |
1899 uc16 to = range.to(); | 1911 uint16_t to = range.to(); |
1900 if (to > char_mask) { | 1912 if (to > char_mask) { |
1901 to = char_mask; | 1913 to = char_mask; |
1902 } | 1914 } |
1903 uint32_t differing_bits = (from ^ to); | 1915 uint32_t differing_bits = (from ^ to); |
1904 // A mask and compare is only perfect if the differing bits form a | 1916 // A mask and compare is only perfect if the differing bits form a |
1905 // number like 00011111 with one single block of trailing 1s. | 1917 // number like 00011111 with one single block of trailing 1s. |
1906 if ((differing_bits & (differing_bits + 1)) == 0 && | 1918 if ((differing_bits & (differing_bits + 1)) == 0 && |
1907 from + differing_bits == to) { | 1919 from + differing_bits == to) { |
1908 pos->determines_perfectly = true; | 1920 pos->determines_perfectly = true; |
1909 } | 1921 } |
1910 uint32_t common_bits = ~SmearBitsRight(differing_bits); | 1922 uint32_t common_bits = ~SmearBitsRight(differing_bits); |
1911 uint32_t bits = (from & common_bits); | 1923 uint32_t bits = (from & common_bits); |
1912 for (int i = first_range + 1; i < ranges->length(); i++) { | 1924 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { |
1913 CharacterRange range = ranges->at(i); | 1925 CharacterRange range = ranges->At(i); |
1914 uc16 from = range.from(); | 1926 uint16_t from = range.from(); |
1915 uc16 to = range.to(); | 1927 uint16_t to = range.to(); |
1916 if (from > char_mask) continue; | 1928 if (from > char_mask) continue; |
1917 if (to > char_mask) to = char_mask; | 1929 if (to > char_mask) to = char_mask; |
1918 // Here we are combining more ranges into the mask and compare | 1930 // Here we are combining more ranges into the mask and compare |
1919 // value. With each new range the mask becomes more sparse and | 1931 // value. With each new range the mask becomes more sparse and |
1920 // so the chances of a false positive rise. A character class | 1932 // so the chances of a false positive rise. A character class |
1921 // with multiple ranges is assumed never to be equivalent to a | 1933 // with multiple ranges is assumed never to be equivalent to a |
1922 // mask and compare operation. | 1934 // mask and compare operation. |
1923 pos->determines_perfectly = false; | 1935 pos->determines_perfectly = false; |
1924 uint32_t new_common_bits = (from ^ to); | 1936 uint32_t new_common_bits = (from ^ to); |
1925 new_common_bits = ~SmearBitsRight(new_common_bits); | 1937 new_common_bits = ~SmearBitsRight(new_common_bits); |
1926 common_bits &= new_common_bits; | 1938 common_bits &= new_common_bits; |
1927 bits &= new_common_bits; | 1939 bits &= new_common_bits; |
1928 uint32_t differing_bits = (from & common_bits) ^ bits; | 1940 uint32_t differing_bits = (from & common_bits) ^ bits; |
1929 common_bits ^= differing_bits; | 1941 common_bits ^= differing_bits; |
1930 bits &= common_bits; | 1942 bits &= common_bits; |
1931 } | 1943 } |
1932 pos->mask = common_bits; | 1944 pos->mask = common_bits; |
1933 pos->value = bits; | 1945 pos->value = bits; |
1934 } | 1946 } |
1935 characters_filled_in++; | 1947 characters_filled_in++; |
1936 DCHECK(characters_filled_in <= details->characters()); | 1948 ASSERT(characters_filled_in <= details->characters()); |
1937 if (characters_filled_in == details->characters()) { | 1949 if (characters_filled_in == details->characters()) { |
1938 return; | 1950 return; |
1939 } | 1951 } |
1940 } | 1952 } |
1941 } | 1953 } |
1942 DCHECK(characters_filled_in != details->characters()); | 1954 ASSERT(characters_filled_in != details->characters()); |
1943 if (!details->cannot_match()) { | 1955 if (!details->cannot_match()) { |
1944 on_success()-> GetQuickCheckDetails(details, | 1956 on_success()-> GetQuickCheckDetails(details, |
1945 compiler, | 1957 compiler, |
1946 characters_filled_in, | 1958 characters_filled_in, |
1947 true); | 1959 true); |
1948 } | 1960 } |
1949 } | 1961 } |
1950 | 1962 |
1951 | 1963 |
1952 void QuickCheckDetails::Clear() { | 1964 void QuickCheckDetails::Clear() { |
1953 for (int i = 0; i < characters_; i++) { | 1965 for (int i = 0; i < characters_; i++) { |
1954 positions_[i].mask = 0; | 1966 positions_[i].mask = 0; |
1955 positions_[i].value = 0; | 1967 positions_[i].value = 0; |
1956 positions_[i].determines_perfectly = false; | 1968 positions_[i].determines_perfectly = false; |
1957 } | 1969 } |
1958 characters_ = 0; | 1970 characters_ = 0; |
1959 } | 1971 } |
1960 | 1972 |
1961 | 1973 |
1962 void QuickCheckDetails::Advance(int by, bool one_byte) { | 1974 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { |
1963 DCHECK(by >= 0); | 1975 ASSERT(by >= 0); |
1964 if (by >= characters_) { | 1976 if (by >= characters_) { |
1965 Clear(); | 1977 Clear(); |
1966 return; | 1978 return; |
1967 } | 1979 } |
1968 for (int i = 0; i < characters_ - by; i++) { | 1980 for (intptr_t i = 0; i < characters_ - by; i++) { |
1969 positions_[i] = positions_[by + i]; | 1981 positions_[i] = positions_[by + i]; |
1970 } | 1982 } |
1971 for (int i = characters_ - by; i < characters_; i++) { | 1983 for (intptr_t i = characters_ - by; i < characters_; i++) { |
1972 positions_[i].mask = 0; | 1984 positions_[i].mask = 0; |
1973 positions_[i].value = 0; | 1985 positions_[i].value = 0; |
1974 positions_[i].determines_perfectly = false; | 1986 positions_[i].determines_perfectly = false; |
1975 } | 1987 } |
1976 characters_ -= by; | 1988 characters_ -= by; |
1977 // We could change mask_ and value_ here but we would never advance unless | 1989 // We could change mask_ and value_ here but we would never advance unless |
1978 // they had already been used in a check and they won't be used again because | 1990 // they had already been used in a check and they won't be used again because |
1979 // it would gain us nothing. So there's no point. | 1991 // it would gain us nothing. So there's no point. |
1980 } | 1992 } |
1981 | 1993 |
1982 | 1994 |
1983 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { | 1995 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { |
1984 DCHECK(characters_ == other->characters_); | 1996 ASSERT(characters_ == other->characters_); |
1985 if (other->cannot_match_) { | 1997 if (other->cannot_match_) { |
1986 return; | 1998 return; |
1987 } | 1999 } |
1988 if (cannot_match_) { | 2000 if (cannot_match_) { |
1989 *this = *other; | 2001 *this = *other; |
1990 return; | 2002 return; |
1991 } | 2003 } |
1992 for (int i = from_index; i < characters_; i++) { | 2004 for (intptr_t i = from_index; i < characters_; i++) { |
1993 QuickCheckDetails::Position* pos = positions(i); | 2005 QuickCheckDetails::Position* pos = positions(i); |
1994 QuickCheckDetails::Position* other_pos = other->positions(i); | 2006 QuickCheckDetails::Position* other_pos = other->positions(i); |
1995 if (pos->mask != other_pos->mask || | 2007 if (pos->mask != other_pos->mask || |
1996 pos->value != other_pos->value || | 2008 pos->value != other_pos->value || |
1997 !other_pos->determines_perfectly) { | 2009 !other_pos->determines_perfectly) { |
1998 // Our mask-compare operation will be approximate unless we have the | 2010 // Our mask-compare operation will be approximate unless we have the |
1999 // exact same operation on both sides of the alternation. | 2011 // exact same operation on both sides of the alternation. |
2000 pos->determines_perfectly = false; | 2012 pos->determines_perfectly = false; |
2001 } | 2013 } |
2002 pos->mask &= other_pos->mask; | 2014 pos->mask &= other_pos->mask; |
2003 pos->value &= pos->mask; | 2015 pos->value &= pos->mask; |
2004 other_pos->value &= pos->mask; | 2016 other_pos->value &= pos->mask; |
2005 uc16 differing_bits = (pos->value ^ other_pos->value); | 2017 uint16_t differing_bits = (pos->value ^ other_pos->value); |
2006 pos->mask &= ~differing_bits; | 2018 pos->mask &= ~differing_bits; |
2007 pos->value &= pos->mask; | 2019 pos->value &= pos->mask; |
2008 } | 2020 } |
2009 } | 2021 } |
2010 | 2022 |
2011 | 2023 |
2012 class VisitMarker { | 2024 class VisitMarker : public ValueObject { |
2013 public: | 2025 public: |
2014 explicit VisitMarker(NodeInfo* info) : info_(info) { | 2026 explicit VisitMarker(NodeInfo* info) : info_(info) { |
2015 DCHECK(!info->visited); | 2027 ASSERT(!info->visited); |
2016 info->visited = true; | 2028 info->visited = true; |
2017 } | 2029 } |
2018 ~VisitMarker() { | 2030 ~VisitMarker() { |
2019 info_->visited = false; | 2031 info_->visited = false; |
2020 } | 2032 } |
2021 private: | 2033 private: |
2022 NodeInfo* info_; | 2034 NodeInfo* info_; |
2023 }; | 2035 }; |
2024 | 2036 |
2025 | 2037 |
2026 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { | 2038 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2027 if (info()->replacement_calculated) return replacement(); | 2039 if (info()->replacement_calculated) return replacement(); |
2028 if (depth < 0) return this; | 2040 if (depth < 0) return this; |
2029 DCHECK(!info()->visited); | 2041 ASSERT(!info()->visited); |
2030 VisitMarker marker(info()); | 2042 VisitMarker marker(info()); |
2031 return FilterSuccessor(depth - 1, ignore_case); | 2043 return FilterSuccessor(depth - 1, ignore_case); |
2032 } | 2044 } |
2033 | 2045 |
2034 | 2046 |
2035 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { | 2047 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { |
2036 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 2048 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
2037 if (next == NULL) return set_replacement(NULL); | 2049 if (next == NULL) return set_replacement(NULL); |
2038 on_success_ = next; | 2050 on_success_ = next; |
2039 return set_replacement(this); | 2051 return set_replacement(this); |
2040 } | 2052 } |
2041 | 2053 |
2042 | 2054 |
2043 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2055 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
2044 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2056 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
2045 // TODO(dcarney): this could be a lot more efficient. | 2057 // TODO(dcarney): this could be a lot more efficient. |
2046 return range.Contains(0x39c) || | 2058 return range.Contains(0x39c) || |
2047 range.Contains(0x3bc) || range.Contains(0x178); | 2059 range.Contains(0x3bc) || range.Contains(0x178); |
2048 } | 2060 } |
2049 | 2061 |
2050 | 2062 |
2051 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { | 2063 static bool RangesContainLatin1Equivalents( |
2052 for (int i = 0; i < ranges->length(); i++) { | 2064 ZoneGrowableArray<CharacterRange>* ranges) { |
| 2065 for (intptr_t i = 0; i < ranges->length(); i++) { |
2053 // TODO(dcarney): this could be a lot more efficient. | 2066 // TODO(dcarney): this could be a lot more efficient. |
2054 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; | 2067 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; |
2055 } | 2068 } |
2056 return false; | 2069 return false; |
2057 } | 2070 } |
2058 | 2071 |
2059 | 2072 |
2060 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { | 2073 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { |
| 2074 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); |
| 2075 switch (c) { |
| 2076 // This are equivalent characters in unicode. |
| 2077 case 0x39c: |
| 2078 case 0x3bc: |
| 2079 return 0xb5; |
| 2080 // This is an uppercase of a Latin-1 character |
| 2081 // outside of Latin-1. |
| 2082 case 0x178: |
| 2083 return 0xff; |
| 2084 } |
| 2085 return 0; |
| 2086 } |
| 2087 |
| 2088 |
| 2089 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2061 if (info()->replacement_calculated) return replacement(); | 2090 if (info()->replacement_calculated) return replacement(); |
2062 if (depth < 0) return this; | 2091 if (depth < 0) return this; |
2063 DCHECK(!info()->visited); | 2092 ASSERT(!info()->visited); |
2064 VisitMarker marker(info()); | 2093 VisitMarker marker(info()); |
2065 int element_count = elms_->length(); | 2094 intptr_t element_count = elms_->length(); |
2066 for (int i = 0; i < element_count; i++) { | 2095 for (intptr_t i = 0; i < element_count; i++) { |
2067 TextElement elm = elms_->at(i); | 2096 TextElement elm = elms_->At(i); |
2068 if (elm.text_type() == TextElement::ATOM) { | 2097 if (elm.text_type() == TextElement::ATOM) { |
2069 Vector<const uc16> quarks = elm.atom()->data(); | 2098 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); |
2070 for (int j = 0; j < quarks.length(); j++) { | 2099 for (intptr_t j = 0; j < quarks->length(); j++) { |
2071 uint16_t c = quarks[j]; | 2100 uint16_t c = quarks->At(j); |
2072 if (c <= String::kMaxOneByteCharCode) continue; | 2101 if (c <= Symbols::kMaxOneCharCodeSymbol) continue; |
2073 if (!ignore_case) return set_replacement(NULL); | 2102 if (!ignore_case) return set_replacement(NULL); |
2074 // Here, we need to check for characters whose upper and lower cases | 2103 // Here, we need to check for characters whose upper and lower cases |
2075 // are outside the Latin-1 range. | 2104 // are outside the Latin-1 range. |
2076 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2105 uint16_t converted = ConvertNonLatin1ToLatin1(c); |
2077 // Character is outside Latin-1 completely | 2106 // Character is outside Latin-1 completely |
2078 if (converted == 0) return set_replacement(NULL); | 2107 if (converted == 0) return set_replacement(NULL); |
2079 // Convert quark to Latin-1 in place. | 2108 // Convert quark to Latin-1 in place. |
2080 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2109 (*quarks)[0] = converted; |
2081 copy[j] = converted; | |
2082 } | 2110 } |
2083 } else { | 2111 } else { |
2084 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); | 2112 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
2085 RegExpCharacterClass* cc = elm.char_class(); | 2113 RegExpCharacterClass* cc = elm.char_class(); |
2086 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2114 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
2087 if (!CharacterRange::IsCanonical(ranges)) { | 2115 if (!CharacterRange::IsCanonical(ranges)) { |
2088 CharacterRange::Canonicalize(ranges); | 2116 CharacterRange::Canonicalize(ranges); |
2089 } | 2117 } |
2090 // Now they are in order so we only need to look at the first. | 2118 // Now they are in order so we only need to look at the first. |
2091 int range_count = ranges->length(); | 2119 intptr_t range_count = ranges->length(); |
2092 if (cc->is_negated()) { | 2120 if (cc->is_negated()) { |
2093 if (range_count != 0 && | 2121 if (range_count != 0 && |
2094 ranges->at(0).from() == 0 && | 2122 ranges->At(0).from() == 0 && |
2095 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2123 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { |
2096 // This will be handled in a later filter. | 2124 // This will be handled in a later filter. |
2097 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2125 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2098 return set_replacement(NULL); | 2126 return set_replacement(NULL); |
2099 } | 2127 } |
2100 } else { | 2128 } else { |
2101 if (range_count == 0 || | 2129 if (range_count == 0 || |
2102 ranges->at(0).from() > String::kMaxOneByteCharCode) { | 2130 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { |
2103 // This will be handled in a later filter. | 2131 // This will be handled in a later filter. |
2104 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2132 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2105 return set_replacement(NULL); | 2133 return set_replacement(NULL); |
2106 } | 2134 } |
2107 } | 2135 } |
2108 } | 2136 } |
2109 } | 2137 } |
2110 return FilterSuccessor(depth - 1, ignore_case); | 2138 return FilterSuccessor(depth - 1, ignore_case); |
2111 } | 2139 } |
2112 | 2140 |
2113 | 2141 |
2114 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { | 2142 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2115 if (info()->replacement_calculated) return replacement(); | 2143 if (info()->replacement_calculated) return replacement(); |
2116 if (depth < 0) return this; | 2144 if (depth < 0) return this; |
2117 if (info()->visited) return this; | 2145 if (info()->visited) return this; |
2118 { | 2146 { |
2119 VisitMarker marker(info()); | 2147 VisitMarker marker(info()); |
2120 | 2148 |
2121 RegExpNode* continue_replacement = | 2149 RegExpNode* continue_replacement = |
2122 continue_node_->FilterOneByte(depth - 1, ignore_case); | 2150 continue_node_->FilterOneByte(depth - 1, ignore_case); |
2123 // If we can't continue after the loop then there is no sense in doing the | 2151 // If we can't continue after the loop then there is no sense in doing the |
2124 // loop. | 2152 // loop. |
2125 if (continue_replacement == NULL) return set_replacement(NULL); | 2153 if (continue_replacement == NULL) return set_replacement(NULL); |
2126 } | 2154 } |
2127 | 2155 |
2128 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); | 2156 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
2129 } | 2157 } |
2130 | 2158 |
2131 | 2159 |
2132 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { | 2160 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2133 if (info()->replacement_calculated) return replacement(); | 2161 if (info()->replacement_calculated) return replacement(); |
2134 if (depth < 0) return this; | 2162 if (depth < 0) return this; |
2135 if (info()->visited) return this; | 2163 if (info()->visited) return this; |
2136 VisitMarker marker(info()); | 2164 VisitMarker marker(info()); |
2137 int choice_count = alternatives_->length(); | 2165 intptr_t choice_count = alternatives_->length(); |
2138 | 2166 |
2139 for (int i = 0; i < choice_count; i++) { | 2167 for (intptr_t i = 0; i < choice_count; i++) { |
2140 GuardedAlternative alternative = alternatives_->at(i); | 2168 GuardedAlternative alternative = alternatives_->At(i); |
2141 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2169 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
2142 set_replacement(this); | 2170 set_replacement(this); |
2143 return this; | 2171 return this; |
2144 } | 2172 } |
2145 } | 2173 } |
2146 | 2174 |
2147 int surviving = 0; | 2175 intptr_t surviving = 0; |
2148 RegExpNode* survivor = NULL; | 2176 RegExpNode* survivor = NULL; |
2149 for (int i = 0; i < choice_count; i++) { | 2177 for (intptr_t i = 0; i < choice_count; i++) { |
2150 GuardedAlternative alternative = alternatives_->at(i); | 2178 GuardedAlternative alternative = alternatives_->At(i); |
2151 RegExpNode* replacement = | 2179 RegExpNode* replacement = |
2152 alternative.node()->FilterOneByte(depth - 1, ignore_case); | 2180 alternative.node()->FilterOneByte(depth - 1, ignore_case); |
2153 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. | 2181 ASSERT(replacement != this); // No missing EMPTY_MATCH_CHECK. |
2154 if (replacement != NULL) { | 2182 if (replacement != NULL) { |
2155 alternatives_->at(i).set_node(replacement); | 2183 (*alternatives_)[i].set_node(replacement); |
2156 surviving++; | 2184 surviving++; |
2157 survivor = replacement; | 2185 survivor = replacement; |
2158 } | 2186 } |
2159 } | 2187 } |
2160 if (surviving < 2) return set_replacement(survivor); | 2188 if (surviving < 2) return set_replacement(survivor); |
2161 | 2189 |
2162 set_replacement(this); | 2190 set_replacement(this); |
2163 if (surviving == choice_count) { | 2191 if (surviving == choice_count) { |
2164 return this; | 2192 return this; |
2165 } | 2193 } |
2166 // Only some of the nodes survived the filtering. We need to rebuild the | 2194 // Only some of the nodes survived the filtering. We need to rebuild the |
2167 // alternatives list. | 2195 // alternatives list. |
2168 ZoneList<GuardedAlternative>* new_alternatives = | 2196 ZoneGrowableArray<GuardedAlternative>* new_alternatives = |
2169 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); | 2197 new(I) ZoneGrowableArray<GuardedAlternative>(surviving); |
2170 for (int i = 0; i < choice_count; i++) { | 2198 for (intptr_t i = 0; i < choice_count; i++) { |
2171 RegExpNode* replacement = | 2199 RegExpNode* replacement = |
2172 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); | 2200 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); |
2173 if (replacement != NULL) { | 2201 if (replacement != NULL) { |
2174 alternatives_->at(i).set_node(replacement); | 2202 (*alternatives_)[i].set_node(replacement); |
2175 new_alternatives->Add(alternatives_->at(i), zone()); | 2203 new_alternatives->Add((*alternatives_)[i]); |
2176 } | 2204 } |
2177 } | 2205 } |
2178 alternatives_ = new_alternatives; | 2206 alternatives_ = new_alternatives; |
2179 return this; | 2207 return this; |
2180 } | 2208 } |
2181 | 2209 |
2182 | 2210 |
2183 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth, | 2211 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, |
2184 bool ignore_case) { | 2212 bool ignore_case) { |
2185 if (info()->replacement_calculated) return replacement(); | 2213 if (info()->replacement_calculated) return replacement(); |
2186 if (depth < 0) return this; | 2214 if (depth < 0) return this; |
2187 if (info()->visited) return this; | 2215 if (info()->visited) return this; |
2188 VisitMarker marker(info()); | 2216 VisitMarker marker(info()); |
2189 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2217 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
2190 // afterwards. | 2218 // afterwards. |
2191 RegExpNode* node = alternatives_->at(1).node(); | 2219 RegExpNode* node = (*alternatives_)[1].node(); |
2192 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); | 2220 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
2193 if (replacement == NULL) return set_replacement(NULL); | 2221 if (replacement == NULL) return set_replacement(NULL); |
2194 alternatives_->at(1).set_node(replacement); | 2222 (*alternatives_)[1].set_node(replacement); |
2195 | 2223 |
2196 RegExpNode* neg_node = alternatives_->at(0).node(); | 2224 RegExpNode* neg_node = (*alternatives_)[0].node(); |
2197 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); | 2225 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
2198 // If the negative lookahead is always going to fail then | 2226 // If the negative lookahead is always going to fail then |
2199 // we don't need to check it. | 2227 // we don't need to check it. |
2200 if (neg_replacement == NULL) return set_replacement(replacement); | 2228 if (neg_replacement == NULL) return set_replacement(replacement); |
2201 alternatives_->at(0).set_node(neg_replacement); | 2229 (*alternatives_)[0].set_node(neg_replacement); |
2202 return set_replacement(this); | 2230 return set_replacement(this); |
2203 } | 2231 } |
2204 | 2232 |
2205 | 2233 |
2206 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2234 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2207 RegExpCompiler* compiler, | 2235 RegExpCompiler* compiler, |
2208 int characters_filled_in, | 2236 intptr_t characters_filled_in, |
2209 bool not_at_start) { | 2237 bool not_at_start) { |
2210 if (body_can_be_zero_length_ || info()->visited) return; | 2238 if (body_can_be_zero_length_ || info()->visited) return; |
2211 VisitMarker marker(info()); | 2239 VisitMarker marker(info()); |
2212 return ChoiceNode::GetQuickCheckDetails(details, | 2240 return ChoiceNode::GetQuickCheckDetails(details, |
2213 compiler, | 2241 compiler, |
2214 characters_filled_in, | 2242 characters_filled_in, |
2215 not_at_start); | 2243 not_at_start); |
2216 } | 2244 } |
2217 | 2245 |
2218 | 2246 |
2219 void LoopChoiceNode::FillInBMInfo(int offset, | 2247 void LoopChoiceNode::FillInBMInfo(intptr_t offset, |
2220 int budget, | 2248 intptr_t budget, |
2221 BoyerMooreLookahead* bm, | 2249 BoyerMooreLookahead* bm, |
2222 bool not_at_start) { | 2250 bool not_at_start) { |
2223 if (body_can_be_zero_length_ || budget <= 0) { | 2251 if (body_can_be_zero_length_ || budget <= 0) { |
2224 bm->SetRest(offset); | 2252 bm->SetRest(offset); |
2225 SaveBMInfo(bm, not_at_start, offset); | 2253 SaveBMInfo(bm, not_at_start, offset); |
2226 return; | 2254 return; |
2227 } | 2255 } |
2228 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2256 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
2229 SaveBMInfo(bm, not_at_start, offset); | 2257 SaveBMInfo(bm, not_at_start, offset); |
2230 } | 2258 } |
2231 | 2259 |
2232 | 2260 |
2233 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2261 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2234 RegExpCompiler* compiler, | 2262 RegExpCompiler* compiler, |
2235 int characters_filled_in, | 2263 intptr_t characters_filled_in, |
2236 bool not_at_start) { | 2264 bool not_at_start) { |
2237 not_at_start = (not_at_start || not_at_start_); | 2265 not_at_start = (not_at_start || not_at_start_); |
2238 int choice_count = alternatives_->length(); | 2266 intptr_t choice_count = alternatives_->length(); |
2239 DCHECK(choice_count > 0); | 2267 ASSERT(choice_count > 0); |
2240 alternatives_->at(0).node()->GetQuickCheckDetails(details, | 2268 (*alternatives_)[0].node()->GetQuickCheckDetails(details, |
2241 compiler, | 2269 compiler, |
2242 characters_filled_in, | 2270 characters_filled_in, |
2243 not_at_start); | 2271 not_at_start); |
2244 for (int i = 1; i < choice_count; i++) { | 2272 for (intptr_t i = 1; i < choice_count; i++) { |
2245 QuickCheckDetails new_details(details->characters()); | 2273 QuickCheckDetails new_details(details->characters()); |
2246 RegExpNode* node = alternatives_->at(i).node(); | 2274 RegExpNode* node = (*alternatives_)[i].node(); |
2247 node->GetQuickCheckDetails(&new_details, compiler, | 2275 node->GetQuickCheckDetails(&new_details, compiler, |
2248 characters_filled_in, | 2276 characters_filled_in, |
2249 not_at_start); | 2277 not_at_start); |
2250 // Here we merge the quick match details of the two branches. | 2278 // Here we merge the quick match details of the two branches. |
2251 details->Merge(&new_details, characters_filled_in); | 2279 details->Merge(&new_details, characters_filled_in); |
2252 } | 2280 } |
2253 } | 2281 } |
2254 | 2282 |
2255 | 2283 |
2256 // Check for [0-9A-Z_a-z]. | 2284 // Check for [0-9A-Z_a-z]. |
2257 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2285 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
2258 Label* word, | 2286 BlockLabel* word, |
2259 Label* non_word, | 2287 BlockLabel* non_word, |
2260 bool fall_through_on_word) { | 2288 bool fall_through_on_word) { |
2261 if (assembler->CheckSpecialCharacterClass( | 2289 if (assembler->CheckSpecialCharacterClass( |
2262 fall_through_on_word ? 'w' : 'W', | 2290 fall_through_on_word ? 'w' : 'W', |
2263 fall_through_on_word ? non_word : word)) { | 2291 fall_through_on_word ? non_word : word)) { |
2264 // Optimized implementation available. | 2292 // Optimized implementation available. |
2265 return; | 2293 return; |
2266 } | 2294 } |
2267 assembler->CheckCharacterGT('z', non_word); | 2295 assembler->CheckCharacterGT('z', non_word); |
2268 assembler->CheckCharacterLT('0', non_word); | 2296 assembler->CheckCharacterLT('0', non_word); |
2269 assembler->CheckCharacterGT('a' - 1, word); | 2297 assembler->CheckCharacterGT('a' - 1, word); |
(...skipping 12 matching lines...) Expand all Loading... |
2282 // that matches newline or the start of input). | 2310 // that matches newline or the start of input). |
2283 static void EmitHat(RegExpCompiler* compiler, | 2311 static void EmitHat(RegExpCompiler* compiler, |
2284 RegExpNode* on_success, | 2312 RegExpNode* on_success, |
2285 Trace* trace) { | 2313 Trace* trace) { |
2286 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2314 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2287 // We will be loading the previous character into the current character | 2315 // We will be loading the previous character into the current character |
2288 // register. | 2316 // register. |
2289 Trace new_trace(*trace); | 2317 Trace new_trace(*trace); |
2290 new_trace.InvalidateCurrentCharacter(); | 2318 new_trace.InvalidateCurrentCharacter(); |
2291 | 2319 |
2292 Label ok; | 2320 BlockLabel ok; |
2293 if (new_trace.cp_offset() == 0) { | 2321 if (new_trace.cp_offset() == 0) { |
2294 // The start of input counts as a newline in this context, so skip to | 2322 // The start of input counts as a newline in this context, so skip to |
2295 // ok if we are at the start. | 2323 // ok if we are at the start. |
2296 assembler->CheckAtStart(&ok); | 2324 assembler->CheckAtStart(&ok); |
2297 } | 2325 } |
2298 // We already checked that we are not at the start of input so it must be | 2326 // We already checked that we are not at the start of input so it must be |
2299 // OK to load the previous character. | 2327 // OK to load the previous character. |
2300 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, | 2328 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, |
2301 new_trace.backtrack(), | 2329 new_trace.backtrack(), |
2302 false); | 2330 false); |
2303 if (!assembler->CheckSpecialCharacterClass('n', | 2331 if (!assembler->CheckSpecialCharacterClass('n', |
2304 new_trace.backtrack())) { | 2332 new_trace.backtrack())) { |
2305 // Newline means \n, \r, 0x2028 or 0x2029. | 2333 // Newline means \n, \r, 0x2028 or 0x2029. |
2306 if (!compiler->one_byte()) { | 2334 if (!compiler->one_byte()) { |
2307 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2335 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
2308 } | 2336 } |
2309 assembler->CheckCharacter('\n', &ok); | 2337 assembler->CheckCharacter('\n', &ok); |
2310 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2338 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
2311 } | 2339 } |
2312 assembler->Bind(&ok); | 2340 assembler->BindBlock(&ok); |
2313 on_success->Emit(compiler, &new_trace); | 2341 on_success->Emit(compiler, &new_trace); |
2314 } | 2342 } |
2315 | 2343 |
2316 | 2344 |
2317 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2345 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
2318 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2346 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
2319 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2347 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2320 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2348 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
2321 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2349 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
2322 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2350 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
2323 if (lookahead == NULL) { | 2351 if (lookahead == NULL) { |
2324 int eats_at_least = | 2352 intptr_t eats_at_least = |
2325 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, | 2353 Utils::Minimum(kMaxLookaheadForBoyerMoore, |
2326 kRecursionBudget, | 2354 EatsAtLeast(kMaxLookaheadForBoyerMoore, |
2327 not_at_start)); | 2355 kRecursionBudget, |
| 2356 not_at_start)); |
2328 if (eats_at_least >= 1) { | 2357 if (eats_at_least >= 1) { |
2329 BoyerMooreLookahead* bm = | 2358 BoyerMooreLookahead* bm = |
2330 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); | 2359 new(I) BoyerMooreLookahead(eats_at_least, compiler, I); |
2331 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); | 2360 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); |
2332 if (bm->at(0)->is_non_word()) | 2361 if (bm->at(0)->is_non_word()) |
2333 next_is_word_character = Trace::FALSE_VALUE; | 2362 next_is_word_character = Trace::FALSE_VALUE; |
2334 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 2363 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
2335 } | 2364 } |
2336 } else { | 2365 } else { |
2337 if (lookahead->at(0)->is_non_word()) | 2366 if (lookahead->at(0)->is_non_word()) |
2338 next_is_word_character = Trace::FALSE_VALUE; | 2367 next_is_word_character = Trace::FALSE_VALUE; |
2339 if (lookahead->at(0)->is_word()) | 2368 if (lookahead->at(0)->is_word()) |
2340 next_is_word_character = Trace::TRUE_VALUE; | 2369 next_is_word_character = Trace::TRUE_VALUE; |
2341 } | 2370 } |
2342 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); | 2371 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); |
2343 if (next_is_word_character == Trace::UNKNOWN) { | 2372 if (next_is_word_character == Trace::UNKNOWN) { |
2344 Label before_non_word; | 2373 BlockLabel before_non_word; |
2345 Label before_word; | 2374 BlockLabel before_word; |
2346 if (trace->characters_preloaded() != 1) { | 2375 if (trace->characters_preloaded() != 1) { |
2347 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 2376 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
2348 } | 2377 } |
2349 // Fall through on non-word. | 2378 // Fall through on non-word. |
2350 EmitWordCheck(assembler, &before_word, &before_non_word, false); | 2379 EmitWordCheck(assembler, &before_word, &before_non_word, false); |
2351 // Next character is not a word character. | 2380 // Next character is not a word character. |
2352 assembler->Bind(&before_non_word); | 2381 assembler->BindBlock(&before_non_word); |
2353 Label ok; | 2382 BlockLabel ok; |
| 2383 // Backtrack on \B (non-boundary check) if previous is a word, |
| 2384 // since we know next *is not* a word and this would be a boundary. |
2354 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2385 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
2355 assembler->GoTo(&ok); | |
2356 | 2386 |
2357 assembler->Bind(&before_word); | 2387 if (!assembler->IsClosed()) { |
| 2388 assembler->GoTo(&ok); |
| 2389 } |
| 2390 |
| 2391 assembler->BindBlock(&before_word); |
2358 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2392 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2359 assembler->Bind(&ok); | 2393 assembler->BindBlock(&ok); |
2360 } else if (next_is_word_character == Trace::TRUE_VALUE) { | 2394 } else if (next_is_word_character == Trace::TRUE_VALUE) { |
2361 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2395 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2362 } else { | 2396 } else { |
2363 DCHECK(next_is_word_character == Trace::FALSE_VALUE); | 2397 ASSERT(next_is_word_character == Trace::FALSE_VALUE); |
2364 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2398 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
2365 } | 2399 } |
2366 } | 2400 } |
2367 | 2401 |
2368 | 2402 |
2369 void AssertionNode::BacktrackIfPrevious( | 2403 void AssertionNode::BacktrackIfPrevious( |
2370 RegExpCompiler* compiler, | 2404 RegExpCompiler* compiler, |
2371 Trace* trace, | 2405 Trace* trace, |
2372 AssertionNode::IfPrevious backtrack_if_previous) { | 2406 AssertionNode::IfPrevious backtrack_if_previous) { |
2373 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2407 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2374 Trace new_trace(*trace); | 2408 Trace new_trace(*trace); |
2375 new_trace.InvalidateCurrentCharacter(); | 2409 new_trace.InvalidateCurrentCharacter(); |
2376 | 2410 |
2377 Label fall_through, dummy; | 2411 BlockLabel fall_through, dummy; |
2378 | 2412 |
2379 Label* non_word = backtrack_if_previous == kIsNonWord ? | 2413 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? |
2380 new_trace.backtrack() : | 2414 new_trace.backtrack() : |
2381 &fall_through; | 2415 &fall_through; |
2382 Label* word = backtrack_if_previous == kIsNonWord ? | 2416 BlockLabel* word = backtrack_if_previous == kIsNonWord ? |
2383 &fall_through : | 2417 &fall_through : |
2384 new_trace.backtrack(); | 2418 new_trace.backtrack(); |
2385 | 2419 |
2386 if (new_trace.cp_offset() == 0) { | 2420 if (new_trace.cp_offset() == 0) { |
2387 // The start of input counts as a non-word character, so the question is | 2421 // The start of input counts as a non-word character, so the question is |
2388 // decided if we are at the start. | 2422 // decided if we are at the start. |
2389 assembler->CheckAtStart(non_word); | 2423 assembler->CheckAtStart(non_word); |
2390 } | 2424 } |
2391 // We already checked that we are not at the start of input so it must be | 2425 // We already checked that we are not at the start of input so it must be |
2392 // OK to load the previous character. | 2426 // OK to load the previous character. |
2393 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2427 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
2394 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2428 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
2395 | 2429 |
2396 assembler->Bind(&fall_through); | 2430 assembler->BindBlock(&fall_through); |
2397 on_success()->Emit(compiler, &new_trace); | 2431 on_success()->Emit(compiler, &new_trace); |
2398 } | 2432 } |
2399 | 2433 |
2400 | 2434 |
2401 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2435 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2402 RegExpCompiler* compiler, | 2436 RegExpCompiler* compiler, |
2403 int filled_in, | 2437 intptr_t filled_in, |
2404 bool not_at_start) { | 2438 bool not_at_start) { |
2405 if (assertion_type_ == AT_START && not_at_start) { | 2439 if (assertion_type_ == AT_START && not_at_start) { |
2406 details->set_cannot_match(); | 2440 details->set_cannot_match(); |
2407 return; | 2441 return; |
2408 } | 2442 } |
2409 return on_success()->GetQuickCheckDetails(details, | 2443 return on_success()->GetQuickCheckDetails(details, |
2410 compiler, | 2444 compiler, |
2411 filled_in, | 2445 filled_in, |
2412 not_at_start); | 2446 not_at_start); |
2413 } | 2447 } |
2414 | 2448 |
2415 | 2449 |
2416 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2450 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2417 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2451 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2418 switch (assertion_type_) { | 2452 switch (assertion_type_) { |
2419 case AT_END: { | 2453 case AT_END: { |
2420 Label ok; | 2454 BlockLabel ok; |
2421 assembler->CheckPosition(trace->cp_offset(), &ok); | 2455 assembler->CheckPosition(trace->cp_offset(), &ok); |
2422 assembler->GoTo(trace->backtrack()); | 2456 assembler->GoTo(trace->backtrack()); |
2423 assembler->Bind(&ok); | 2457 assembler->BindBlock(&ok); |
2424 break; | 2458 break; |
2425 } | 2459 } |
2426 case AT_START: { | 2460 case AT_START: { |
2427 if (trace->at_start() == Trace::FALSE_VALUE) { | 2461 if (trace->at_start() == Trace::FALSE_VALUE) { |
2428 assembler->GoTo(trace->backtrack()); | 2462 assembler->GoTo(trace->backtrack()); |
2429 return; | 2463 return; |
2430 } | 2464 } |
2431 if (trace->at_start() == Trace::UNKNOWN) { | 2465 if (trace->at_start() == Trace::UNKNOWN) { |
2432 assembler->CheckNotAtStart(trace->backtrack()); | 2466 assembler->CheckNotAtStart(trace->backtrack()); |
2433 Trace at_start_trace = *trace; | 2467 Trace at_start_trace = *trace; |
2434 at_start_trace.set_at_start(true); | 2468 at_start_trace.set_at_start(true); |
2435 on_success()->Emit(compiler, &at_start_trace); | 2469 on_success()->Emit(compiler, &at_start_trace); |
2436 return; | 2470 return; |
2437 } | 2471 } |
2438 } | 2472 } |
2439 break; | 2473 break; |
2440 case AFTER_NEWLINE: | 2474 case AFTER_NEWLINE: |
2441 EmitHat(compiler, on_success(), trace); | 2475 EmitHat(compiler, on_success(), trace); |
2442 return; | 2476 return; |
2443 case AT_BOUNDARY: | 2477 case AT_BOUNDARY: |
2444 case AT_NON_BOUNDARY: { | 2478 case AT_NON_BOUNDARY: { |
2445 EmitBoundaryCheck(compiler, trace); | 2479 EmitBoundaryCheck(compiler, trace); |
2446 return; | 2480 return; |
2447 } | 2481 } |
2448 } | 2482 } |
2449 on_success()->Emit(compiler, trace); | 2483 on_success()->Emit(compiler, trace); |
2450 } | 2484 } |
2451 | 2485 |
2452 | 2486 |
2453 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { | 2487 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { |
2454 if (quick_check == NULL) return false; | 2488 if (quick_check == NULL) return false; |
2455 if (offset >= quick_check->characters()) return false; | 2489 if (offset >= quick_check->characters()) return false; |
2456 return quick_check->positions(offset)->determines_perfectly; | 2490 return quick_check->positions(offset)->determines_perfectly; |
2457 } | 2491 } |
2458 | 2492 |
2459 | 2493 |
2460 static void UpdateBoundsCheck(int index, int* checked_up_to) { | 2494 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { |
2461 if (index > *checked_up_to) { | 2495 if (index > *checked_up_to) { |
2462 *checked_up_to = index; | 2496 *checked_up_to = index; |
2463 } | 2497 } |
2464 } | 2498 } |
2465 | 2499 |
2466 | 2500 |
2467 // We call this repeatedly to generate code for each pass over the text node. | 2501 // We call this repeatedly to generate code for each pass over the text node. |
2468 // The passes are in increasing order of difficulty because we hope one | 2502 // The passes are in increasing order of difficulty because we hope one |
2469 // of the first passes will fail in which case we are saved the work of the | 2503 // of the first passes will fail in which case we are saved the work of the |
2470 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ | 2504 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ |
(...skipping 20 matching lines...) Expand all Loading... |
2491 // order to get to the code we are now generating. The quick check can involve | 2525 // order to get to the code we are now generating. The quick check can involve |
2492 // loading characters, which means we do not need to recheck the bounds | 2526 // loading characters, which means we do not need to recheck the bounds |
2493 // up to the limit the quick check already checked. In addition the quick | 2527 // up to the limit the quick check already checked. In addition the quick |
2494 // check can have involved a mask and compare operation which may simplify | 2528 // check can have involved a mask and compare operation which may simplify |
2495 // or obviate the need for further checks at some character positions. | 2529 // or obviate the need for further checks at some character positions. |
2496 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 2530 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
2497 TextEmitPassType pass, | 2531 TextEmitPassType pass, |
2498 bool preloaded, | 2532 bool preloaded, |
2499 Trace* trace, | 2533 Trace* trace, |
2500 bool first_element_checked, | 2534 bool first_element_checked, |
2501 int* checked_up_to) { | 2535 intptr_t* checked_up_to) { |
2502 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2536 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2503 Isolate* isolate = assembler->zone()->isolate(); | |
2504 bool one_byte = compiler->one_byte(); | 2537 bool one_byte = compiler->one_byte(); |
2505 Label* backtrack = trace->backtrack(); | 2538 BlockLabel* backtrack = trace->backtrack(); |
2506 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 2539 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
2507 int element_count = elms_->length(); | 2540 intptr_t element_count = elms_->length(); |
2508 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 2541 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
2509 TextElement elm = elms_->at(i); | 2542 TextElement elm = elms_->At(i); |
2510 int cp_offset = trace->cp_offset() + elm.cp_offset(); | 2543 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset(); |
2511 if (elm.text_type() == TextElement::ATOM) { | 2544 if (elm.text_type() == TextElement::ATOM) { |
2512 Vector<const uc16> quarks = elm.atom()->data(); | 2545 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); |
2513 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 2546 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) { |
2514 if (first_element_checked && i == 0 && j == 0) continue; | 2547 if (first_element_checked && i == 0 && j == 0) continue; |
2515 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 2548 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
2516 EmitCharacterFunction* emit_function = NULL; | 2549 EmitCharacterFunction* emit_function = NULL; |
2517 switch (pass) { | 2550 switch (pass) { |
2518 case NON_LATIN1_MATCH: | 2551 case NON_LATIN1_MATCH: |
2519 DCHECK(one_byte); | 2552 ASSERT(one_byte); |
2520 if (quarks[j] > String::kMaxOneByteCharCode) { | 2553 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) { |
2521 assembler->GoTo(backtrack); | 2554 assembler->GoTo(backtrack); |
2522 return; | 2555 return; |
2523 } | 2556 } |
2524 break; | 2557 break; |
2525 case NON_LETTER_CHARACTER_MATCH: | 2558 case NON_LETTER_CHARACTER_MATCH: |
2526 emit_function = &EmitAtomNonLetter; | 2559 emit_function = &EmitAtomNonLetter; |
2527 break; | 2560 break; |
2528 case SIMPLE_CHARACTER_MATCH: | 2561 case SIMPLE_CHARACTER_MATCH: |
2529 emit_function = &EmitSimpleCharacter; | 2562 emit_function = &EmitSimpleCharacter; |
2530 break; | 2563 break; |
2531 case CASE_CHARACTER_MATCH: | 2564 case CASE_CHARACTER_MATCH: |
2532 emit_function = &EmitAtomLetter; | 2565 emit_function = &EmitAtomLetter; |
2533 break; | 2566 break; |
2534 default: | 2567 default: |
2535 break; | 2568 break; |
2536 } | 2569 } |
2537 if (emit_function != NULL) { | 2570 if (emit_function != NULL) { |
2538 bool bound_checked = emit_function(isolate, | 2571 bool bound_checked = emit_function(I, |
2539 compiler, | 2572 compiler, |
2540 quarks[j], | 2573 quarks->At(j), |
2541 backtrack, | 2574 backtrack, |
2542 cp_offset + j, | 2575 cp_offset + j, |
2543 *checked_up_to < cp_offset + j, | 2576 *checked_up_to < cp_offset + j, |
2544 preloaded); | 2577 preloaded); |
2545 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2578 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
2546 } | 2579 } |
2547 } | 2580 } |
2548 } else { | 2581 } else { |
2549 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); | 2582 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
2550 if (pass == CHARACTER_CLASS_MATCH) { | 2583 if (pass == CHARACTER_CLASS_MATCH) { |
2551 if (first_element_checked && i == 0) continue; | 2584 if (first_element_checked && i == 0) continue; |
2552 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2585 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
2553 RegExpCharacterClass* cc = elm.char_class(); | 2586 RegExpCharacterClass* cc = elm.char_class(); |
2554 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, | 2587 EmitCharClass(assembler, |
2555 *checked_up_to < cp_offset, preloaded, zone()); | 2588 cc, |
| 2589 one_byte, |
| 2590 backtrack, |
| 2591 cp_offset, |
| 2592 *checked_up_to < cp_offset, |
| 2593 preloaded, |
| 2594 I); |
2556 UpdateBoundsCheck(cp_offset, checked_up_to); | 2595 UpdateBoundsCheck(cp_offset, checked_up_to); |
2557 } | 2596 } |
2558 } | 2597 } |
2559 } | 2598 } |
2560 } | 2599 } |
2561 | 2600 |
2562 | 2601 |
2563 int TextNode::Length() { | 2602 intptr_t TextNode::Length() { |
2564 TextElement elm = elms_->last(); | 2603 TextElement elm = elms_->Last(); |
2565 DCHECK(elm.cp_offset() >= 0); | 2604 ASSERT(elm.cp_offset() >= 0); |
2566 return elm.cp_offset() + elm.length(); | 2605 return elm.cp_offset() + elm.length(); |
2567 } | 2606 } |
2568 | 2607 |
2569 | 2608 |
2570 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 2609 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { |
2571 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 2610 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); |
2572 if (ignore_case) { | 2611 if (ignore_case) { |
2573 return pass == SIMPLE_CHARACTER_MATCH; | 2612 return pass == SIMPLE_CHARACTER_MATCH; |
2574 } else { | 2613 } else { |
2575 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 2614 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
2576 } | 2615 } |
2577 } | 2616 } |
2578 | 2617 |
2579 | 2618 |
2580 // This generates the code to match a text node. A text node can contain | 2619 // This generates the code to match a text node. A text node can contain |
2581 // straight character sequences (possibly to be matched in a case-independent | 2620 // straight character sequences (possibly to be matched in a case-independent |
2582 // way) and character classes. For efficiency we do not do this in a single | 2621 // way) and character classes. For efficiency we do not do this in a single |
2583 // pass from left to right. Instead we pass over the text node several times, | 2622 // pass from left to right. Instead we pass over the text node several times, |
2584 // emitting code for some character positions every time. See the comment on | 2623 // emitting code for some character positions every time. See the comment on |
2585 // TextEmitPass for details. | 2624 // TextEmitPass for details. |
2586 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2625 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2587 LimitResult limit_result = LimitVersions(compiler, trace); | 2626 LimitResult limit_result = LimitVersions(compiler, trace); |
2588 if (limit_result == DONE) return; | 2627 if (limit_result == DONE) return; |
2589 DCHECK(limit_result == CONTINUE); | 2628 ASSERT(limit_result == CONTINUE); |
2590 | 2629 |
2591 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { | 2630 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { |
2592 compiler->SetRegExpTooBig(); | 2631 compiler->SetRegExpTooBig(); |
2593 return; | 2632 return; |
2594 } | 2633 } |
2595 | 2634 |
2596 if (compiler->one_byte()) { | 2635 if (compiler->one_byte()) { |
2597 int dummy = 0; | 2636 intptr_t dummy = 0; |
2598 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); | 2637 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); |
2599 } | 2638 } |
2600 | 2639 |
2601 bool first_elt_done = false; | 2640 bool first_elt_done = false; |
2602 int bound_checked_to = trace->cp_offset() - 1; | 2641 intptr_t bound_checked_to = trace->cp_offset() - 1; |
2603 bound_checked_to += trace->bound_checked_up_to(); | 2642 bound_checked_to += trace->bound_checked_up_to(); |
2604 | 2643 |
2605 // If a character is preloaded into the current character register then | 2644 // If a character is preloaded into the current character register then |
2606 // check that now. | 2645 // check that now. |
2607 if (trace->characters_preloaded() == 1) { | 2646 if (trace->characters_preloaded() == 1) { |
2608 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2647 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2609 if (!SkipPass(pass, compiler->ignore_case())) { | 2648 if (!SkipPass(pass, compiler->ignore_case())) { |
2610 TextEmitPass(compiler, | 2649 TextEmitPass(compiler, |
2611 static_cast<TextEmitPassType>(pass), | 2650 static_cast<TextEmitPassType>(pass), |
2612 true, | 2651 true, |
2613 trace, | 2652 trace, |
2614 false, | 2653 false, |
2615 &bound_checked_to); | 2654 &bound_checked_to); |
2616 } | 2655 } |
2617 } | 2656 } |
2618 first_elt_done = true; | 2657 first_elt_done = true; |
2619 } | 2658 } |
2620 | 2659 |
2621 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2660 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2622 if (!SkipPass(pass, compiler->ignore_case())) { | 2661 if (!SkipPass(pass, compiler->ignore_case())) { |
2623 TextEmitPass(compiler, | 2662 TextEmitPass(compiler, |
2624 static_cast<TextEmitPassType>(pass), | 2663 static_cast<TextEmitPassType>(pass), |
2625 false, | 2664 false, |
2626 trace, | 2665 trace, |
2627 first_elt_done, | 2666 first_elt_done, |
2628 &bound_checked_to); | 2667 &bound_checked_to); |
2629 } | 2668 } |
2630 } | 2669 } |
2631 | 2670 |
2632 Trace successor_trace(*trace); | 2671 Trace successor_trace(*trace); |
2633 successor_trace.set_at_start(false); | 2672 successor_trace.set_at_start(false); |
2634 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2673 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
2635 RecursionCheck rc(compiler); | 2674 RecursionCheck rc(compiler); |
2636 on_success()->Emit(compiler, &successor_trace); | 2675 on_success()->Emit(compiler, &successor_trace); |
2637 } | 2676 } |
2638 | 2677 |
2639 | 2678 |
2640 void Trace::InvalidateCurrentCharacter() { | 2679 void Trace::InvalidateCurrentCharacter() { |
2641 characters_preloaded_ = 0; | 2680 characters_preloaded_ = 0; |
2642 } | 2681 } |
2643 | 2682 |
2644 | 2683 |
2645 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { | 2684 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, |
2646 DCHECK(by > 0); | 2685 RegExpCompiler* compiler) { |
| 2686 ASSERT(by > 0); |
2647 // We don't have an instruction for shifting the current character register | 2687 // We don't have an instruction for shifting the current character register |
2648 // down or for using a shifted value for anything so lets just forget that | 2688 // down or for using a shifted value for anything so lets just forget that |
2649 // we preloaded any characters into it. | 2689 // we preloaded any characters into it. |
2650 characters_preloaded_ = 0; | 2690 characters_preloaded_ = 0; |
2651 // Adjust the offsets of the quick check performed information. This | 2691 // Adjust the offsets of the quick check performed information. This |
2652 // information is used to find out what we already determined about the | 2692 // information is used to find out what we already determined about the |
2653 // characters by means of mask and compare. | 2693 // characters by means of mask and compare. |
2654 quick_check_performed_.Advance(by, compiler->one_byte()); | 2694 quick_check_performed_.Advance(by, compiler->one_byte()); |
2655 cp_offset_ += by; | 2695 cp_offset_ += by; |
2656 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2696 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
2657 compiler->SetRegExpTooBig(); | 2697 compiler->SetRegExpTooBig(); |
2658 cp_offset_ = 0; | 2698 cp_offset_ = 0; |
2659 } | 2699 } |
2660 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 2700 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), |
| 2701 bound_checked_up_to_ - by); |
2661 } | 2702 } |
2662 | 2703 |
2663 | 2704 |
2664 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2705 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
2665 int element_count = elms_->length(); | 2706 intptr_t element_count = elms_->length(); |
2666 for (int i = 0; i < element_count; i++) { | 2707 for (intptr_t i = 0; i < element_count; i++) { |
2667 TextElement elm = elms_->at(i); | 2708 TextElement elm = elms_->At(i); |
2668 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2709 if (elm.text_type() == TextElement::CHAR_CLASS) { |
2669 RegExpCharacterClass* cc = elm.char_class(); | 2710 RegExpCharacterClass* cc = elm.char_class(); |
2670 // None of the standard character classes is different in the case | 2711 // None of the standard character classes is different in the case |
2671 // independent case and it slows us down if we don't know that. | 2712 // independent case and it slows us down if we don't know that. |
2672 if (cc->is_standard(zone())) continue; | 2713 if (cc->is_standard()) continue; |
2673 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2714 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
2674 int range_count = ranges->length(); | 2715 intptr_t range_count = ranges->length(); |
2675 for (int j = 0; j < range_count; j++) { | 2716 for (intptr_t j = 0; j < range_count; j++) { |
2676 ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone()); | 2717 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, I); |
2677 } | 2718 } |
2678 } | 2719 } |
2679 } | 2720 } |
2680 } | 2721 } |
2681 | 2722 |
2682 | 2723 |
2683 int TextNode::GreedyLoopTextLength() { | 2724 intptr_t TextNode::GreedyLoopTextLength() { |
2684 TextElement elm = elms_->at(elms_->length() - 1); | 2725 TextElement elm = elms_->At(elms_->length() - 1); |
2685 return elm.cp_offset() + elm.length(); | 2726 return elm.cp_offset() + elm.length(); |
2686 } | 2727 } |
2687 | 2728 |
2688 | 2729 |
2689 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 2730 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
2690 RegExpCompiler* compiler) { | 2731 RegExpCompiler* compiler) { |
2691 if (elms_->length() != 1) return NULL; | 2732 if (elms_->length() != 1) return NULL; |
2692 TextElement elm = elms_->at(0); | 2733 TextElement elm = elms_->At(0); |
2693 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 2734 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
2694 RegExpCharacterClass* node = elm.char_class(); | 2735 RegExpCharacterClass* node = elm.char_class(); |
2695 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 2736 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); |
2696 if (!CharacterRange::IsCanonical(ranges)) { | 2737 if (!CharacterRange::IsCanonical(ranges)) { |
2697 CharacterRange::Canonicalize(ranges); | 2738 CharacterRange::Canonicalize(ranges); |
2698 } | 2739 } |
2699 if (node->is_negated()) { | 2740 if (node->is_negated()) { |
2700 return ranges->length() == 0 ? on_success() : NULL; | 2741 return ranges->length() == 0 ? on_success() : NULL; |
2701 } | 2742 } |
2702 if (ranges->length() != 1) return NULL; | 2743 if (ranges->length() != 1) return NULL; |
2703 uint32_t max_char; | 2744 uint32_t max_char; |
2704 if (compiler->one_byte()) { | 2745 if (compiler->one_byte()) { |
2705 max_char = String::kMaxOneByteCharCode; | 2746 max_char = Symbols::kMaxOneCharCodeSymbol; |
2706 } else { | 2747 } else { |
2707 max_char = String::kMaxUtf16CodeUnit; | 2748 max_char = Utf16::kMaxCodeUnit; |
2708 } | 2749 } |
2709 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; | 2750 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; |
2710 } | 2751 } |
2711 | 2752 |
2712 | 2753 |
2713 // Finds the fixed match length of a sequence of nodes that goes from | 2754 // Finds the fixed match length of a sequence of nodes that goes from |
2714 // this alternative and back to this choice node. If there are variable | 2755 // this alternative and back to this choice node. If there are variable |
2715 // length nodes or other complications in the way then return a sentinel | 2756 // length nodes or other complications in the way then return a sentinel |
2716 // value indicating that a greedy loop cannot be constructed. | 2757 // value indicating that a greedy loop cannot be constructed. |
2717 int ChoiceNode::GreedyLoopTextLengthForAlternative( | 2758 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( |
2718 GuardedAlternative* alternative) { | 2759 GuardedAlternative* alternative) { |
2719 int length = 0; | 2760 intptr_t length = 0; |
2720 RegExpNode* node = alternative->node(); | 2761 RegExpNode* node = alternative->node(); |
2721 // Later we will generate code for all these text nodes using recursion | 2762 // Later we will generate code for all these text nodes using recursion |
2722 // so we have to limit the max number. | 2763 // so we have to limit the max number. |
2723 int recursion_depth = 0; | 2764 intptr_t recursion_depth = 0; |
2724 while (node != this) { | 2765 while (node != this) { |
2725 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 2766 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
2726 return kNodeIsTooComplexForGreedyLoops; | 2767 return kNodeIsTooComplexForGreedyLoops; |
2727 } | 2768 } |
2728 int node_length = node->GreedyLoopTextLength(); | 2769 intptr_t node_length = node->GreedyLoopTextLength(); |
2729 if (node_length == kNodeIsTooComplexForGreedyLoops) { | 2770 if (node_length == kNodeIsTooComplexForGreedyLoops) { |
2730 return kNodeIsTooComplexForGreedyLoops; | 2771 return kNodeIsTooComplexForGreedyLoops; |
2731 } | 2772 } |
2732 length += node_length; | 2773 length += node_length; |
2733 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | 2774 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); |
2734 node = seq_node->on_success(); | 2775 node = seq_node->on_success(); |
2735 } | 2776 } |
2736 return length; | 2777 return length; |
2737 } | 2778 } |
2738 | 2779 |
2739 | 2780 |
2740 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { | 2781 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { |
2741 DCHECK_EQ(loop_node_, NULL); | 2782 ASSERT(loop_node_ == NULL); |
2742 AddAlternative(alt); | 2783 AddAlternative(alt); |
2743 loop_node_ = alt.node(); | 2784 loop_node_ = alt.node(); |
2744 } | 2785 } |
2745 | 2786 |
2746 | 2787 |
2747 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { | 2788 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { |
2748 DCHECK_EQ(continue_node_, NULL); | 2789 ASSERT(continue_node_ == NULL); |
2749 AddAlternative(alt); | 2790 AddAlternative(alt); |
2750 continue_node_ = alt.node(); | 2791 continue_node_ = alt.node(); |
2751 } | 2792 } |
2752 | 2793 |
2753 | 2794 |
2754 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2795 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2755 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2796 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
2756 if (trace->stop_node() == this) { | 2797 if (trace->stop_node() == this) { |
2757 // Back edge of greedy optimized loop node graph. | 2798 // Back edge of greedy optimized loop node graph. |
2758 int text_length = | 2799 intptr_t text_length = |
2759 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); | 2800 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
2760 DCHECK(text_length != kNodeIsTooComplexForGreedyLoops); | 2801 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); |
2761 // Update the counter-based backtracking info on the stack. This is an | 2802 // Update the counter-based backtracking info on the stack. This is an |
2762 // optimization for greedy loops (see below). | 2803 // optimization for greedy loops (see below). |
2763 DCHECK(trace->cp_offset() == text_length); | 2804 ASSERT(trace->cp_offset() == text_length); |
2764 macro_assembler->AdvanceCurrentPosition(text_length); | 2805 macro_assembler->AdvanceCurrentPosition(text_length); |
2765 macro_assembler->GoTo(trace->loop_label()); | 2806 macro_assembler->GoTo(trace->loop_label()); |
2766 return; | 2807 return; |
2767 } | 2808 } |
2768 DCHECK(trace->stop_node() == NULL); | 2809 ASSERT(trace->stop_node() == NULL); |
2769 if (!trace->is_trivial()) { | 2810 if (!trace->is_trivial()) { |
2770 trace->Flush(compiler, this); | 2811 trace->Flush(compiler, this); |
2771 return; | 2812 return; |
2772 } | 2813 } |
2773 ChoiceNode::Emit(compiler, trace); | 2814 ChoiceNode::Emit(compiler, trace); |
2774 } | 2815 } |
2775 | 2816 |
2776 | 2817 |
2777 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2818 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
2778 int eats_at_least) { | 2819 intptr_t eats_at_least) { |
2779 int preload_characters = Min(4, eats_at_least); | 2820 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), |
| 2821 eats_at_least); |
2780 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2822 if (compiler->macro_assembler()->CanReadUnaligned()) { |
2781 bool one_byte = compiler->one_byte(); | 2823 bool one_byte = compiler->one_byte(); |
2782 if (one_byte) { | 2824 if (one_byte) { |
2783 if (preload_characters > 4) preload_characters = 4; | 2825 if (preload_characters > 4) preload_characters = 4; |
2784 // We can't preload 3 characters because there is no machine instruction | 2826 // We can't preload 3 characters because there is no machine instruction |
2785 // to do that. We can't just load 4 because we could be reading | 2827 // to do that. We can't just load 4 because we could be reading |
2786 // beyond the end of the string, which could cause a memory fault. | 2828 // beyond the end of the string, which could cause a memory fault. |
2787 if (preload_characters == 3) preload_characters = 2; | 2829 if (preload_characters == 3) preload_characters = 2; |
2788 } else { | 2830 } else { |
2789 if (preload_characters > 2) preload_characters = 2; | 2831 if (preload_characters > 2) preload_characters = 2; |
2790 } | 2832 } |
2791 } else { | 2833 } else { |
2792 if (preload_characters > 1) preload_characters = 1; | 2834 if (preload_characters > 1) preload_characters = 1; |
2793 } | 2835 } |
2794 return preload_characters; | 2836 return preload_characters; |
2795 } | 2837 } |
2796 | 2838 |
2797 | 2839 |
2798 // This class is used when generating the alternatives in a choice node. It | 2840 // This structure is used when generating the alternatives in a choice node. It |
2799 // records the way the alternative is being code generated. | 2841 // records the way the alternative is being code generated. |
2800 class AlternativeGeneration: public Malloced { | 2842 struct AlternativeGeneration { |
2801 public: | |
2802 AlternativeGeneration() | 2843 AlternativeGeneration() |
2803 : possible_success(), | 2844 : possible_success(), |
2804 expects_preload(false), | 2845 expects_preload(false), |
2805 after(), | 2846 after(), |
2806 quick_check_details() { } | 2847 quick_check_details() { } |
2807 Label possible_success; | 2848 BlockLabel possible_success; |
2808 bool expects_preload; | 2849 bool expects_preload; |
2809 Label after; | 2850 BlockLabel after; |
2810 QuickCheckDetails quick_check_details; | 2851 QuickCheckDetails quick_check_details; |
2811 }; | 2852 }; |
2812 | 2853 |
2813 | 2854 |
2814 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2855 // Creates a list of AlternativeGenerations. If the list has a reasonable |
2815 // size then it is on the stack, otherwise the excess is on the heap. | 2856 // size then it is on the stack, otherwise the excess is on the heap. |
2816 class AlternativeGenerationList { | 2857 class AlternativeGenerationList { |
2817 public: | 2858 public: |
2818 AlternativeGenerationList(int count, Zone* zone) | 2859 explicit AlternativeGenerationList(intptr_t count) |
2819 : alt_gens_(count, zone) { | 2860 : alt_gens_(count) { |
2820 for (int i = 0; i < count && i < kAFew; i++) { | 2861 for (intptr_t i = 0; i < count && i < kAFew; i++) { |
2821 alt_gens_.Add(a_few_alt_gens_ + i, zone); | 2862 alt_gens_.Add(a_few_alt_gens_ + i); |
2822 } | 2863 } |
2823 for (int i = kAFew; i < count; i++) { | 2864 for (intptr_t i = kAFew; i < count; i++) { |
2824 alt_gens_.Add(new AlternativeGeneration(), zone); | 2865 alt_gens_.Add(new AlternativeGeneration()); |
2825 } | 2866 } |
2826 } | 2867 } |
2827 ~AlternativeGenerationList() { | 2868 ~AlternativeGenerationList() { |
2828 for (int i = kAFew; i < alt_gens_.length(); i++) { | 2869 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { |
2829 delete alt_gens_[i]; | 2870 delete alt_gens_[i]; |
2830 alt_gens_[i] = NULL; | 2871 alt_gens_[i] = NULL; |
2831 } | 2872 } |
2832 } | 2873 } |
2833 | 2874 |
2834 AlternativeGeneration* at(int i) { | 2875 AlternativeGeneration* at(intptr_t i) { |
2835 return alt_gens_[i]; | 2876 return alt_gens_[i]; |
2836 } | 2877 } |
2837 | 2878 |
2838 private: | 2879 private: |
2839 static const int kAFew = 10; | 2880 static const intptr_t kAFew = 10; |
2840 ZoneList<AlternativeGeneration*> alt_gens_; | 2881 GrowableArray<AlternativeGeneration*> alt_gens_; |
2841 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2882 AlternativeGeneration a_few_alt_gens_[kAFew]; |
| 2883 |
| 2884 DISALLOW_ALLOCATION(); |
2842 }; | 2885 }; |
2843 | 2886 |
2844 | 2887 |
2845 // The '2' variant is has inclusive from and exclusive to. | 2888 // The '2' variant is inclusive from and exclusive to. |
2846 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2889 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
2847 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2890 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
2848 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, | 2891 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, |
2849 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, | 2892 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, |
2850 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, | 2893 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, |
2851 0xFEFF, 0xFF00, 0x10000 }; | 2894 0xFEFF, 0xFF00, 0x10000 }; |
2852 static const int kSpaceRangeCount = arraysize(kSpaceRanges); | 2895 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
2853 | 2896 static const intptr_t kWordRanges[] = { |
2854 static const int kWordRanges[] = { | |
2855 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; | 2897 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; |
2856 static const int kWordRangeCount = arraysize(kWordRanges); | 2898 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); |
2857 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 }; | 2899 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; |
2858 static const int kDigitRangeCount = arraysize(kDigitRanges); | 2900 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
2859 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; | 2901 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; |
2860 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges); | 2902 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); |
2861 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E, | 2903 static const intptr_t kLineTerminatorRanges[] = { |
2862 0x2028, 0x202A, 0x10000 }; | 2904 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; |
2863 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges); | 2905 static const intptr_t kLineTerminatorRangeCount = |
| 2906 ARRAY_SIZE(kLineTerminatorRanges); |
2864 | 2907 |
2865 | 2908 |
2866 void BoyerMoorePositionInfo::Set(int character) { | 2909 void BoyerMoorePositionInfo::Set(intptr_t character) { |
2867 SetInterval(Interval(character, character)); | 2910 SetInterval(Interval(character, character)); |
2868 } | 2911 } |
2869 | 2912 |
2870 | 2913 |
2871 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2914 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
2872 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2915 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
2873 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2916 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
2874 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2917 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
2875 surrogate_ = | 2918 surrogate_ = |
2876 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2919 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
2877 if (interval.to() - interval.from() >= kMapSize - 1) { | 2920 if (interval.to() - interval.from() >= kMapSize - 1) { |
2878 if (map_count_ != kMapSize) { | 2921 if (map_count_ != kMapSize) { |
2879 map_count_ = kMapSize; | 2922 map_count_ = kMapSize; |
2880 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 2923 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; |
2881 } | 2924 } |
2882 return; | 2925 return; |
2883 } | 2926 } |
2884 for (int i = interval.from(); i <= interval.to(); i++) { | 2927 for (intptr_t i = interval.from(); i <= interval.to(); i++) { |
2885 int mod_character = (i & kMask); | 2928 intptr_t mod_character = (i & kMask); |
2886 if (!map_->at(mod_character)) { | 2929 if (!map_->At(mod_character)) { |
2887 map_count_++; | 2930 map_count_++; |
2888 map_->at(mod_character) = true; | 2931 (*map_)[mod_character] = true; |
2889 } | 2932 } |
2890 if (map_count_ == kMapSize) return; | 2933 if (map_count_ == kMapSize) return; |
2891 } | 2934 } |
2892 } | 2935 } |
2893 | 2936 |
2894 | 2937 |
2895 void BoyerMoorePositionInfo::SetAll() { | 2938 void BoyerMoorePositionInfo::SetAll() { |
2896 s_ = w_ = d_ = kLatticeUnknown; | 2939 s_ = w_ = d_ = kLatticeUnknown; |
2897 if (map_count_ != kMapSize) { | 2940 if (map_count_ != kMapSize) { |
2898 map_count_ = kMapSize; | 2941 map_count_ = kMapSize; |
2899 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 2942 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; |
2900 } | 2943 } |
2901 } | 2944 } |
2902 | 2945 |
2903 | 2946 |
2904 BoyerMooreLookahead::BoyerMooreLookahead( | 2947 BoyerMooreLookahead::BoyerMooreLookahead( |
2905 int length, RegExpCompiler* compiler, Zone* zone) | 2948 intptr_t length, RegExpCompiler* compiler, Isolate* isolate) |
2906 : length_(length), | 2949 : length_(length), |
2907 compiler_(compiler) { | 2950 compiler_(compiler) { |
2908 if (compiler->one_byte()) { | 2951 if (compiler->one_byte()) { |
2909 max_char_ = String::kMaxOneByteCharCode; | 2952 max_char_ = Symbols::kMaxOneCharCodeSymbol; |
2910 } else { | 2953 } else { |
2911 max_char_ = String::kMaxUtf16CodeUnit; | 2954 max_char_ = Utf16::kMaxCodeUnit; |
2912 } | 2955 } |
2913 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); | 2956 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); |
2914 for (int i = 0; i < length; i++) { | 2957 for (intptr_t i = 0; i < length; i++) { |
2915 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); | 2958 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate)); |
2916 } | 2959 } |
2917 } | 2960 } |
2918 | 2961 |
2919 | 2962 |
2920 // Find the longest range of lookahead that has the fewest number of different | 2963 // Find the longest range of lookahead that has the fewest number of different |
2921 // characters that can occur at a given position. Since we are optimizing two | 2964 // characters that can occur at a given position. Since we are optimizing two |
2922 // different parameters at once this is a tradeoff. | 2965 // different parameters at once this is a tradeoff. |
2923 bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { | 2966 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { |
2924 int biggest_points = 0; | 2967 intptr_t biggest_points = 0; |
2925 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2968 // If more than 32 characters out of 128 can occur it is unlikely that we can |
2926 // be lucky enough to step forwards much of the time. | 2969 // be lucky enough to step forwards much of the time. |
2927 const int kMaxMax = 32; | 2970 const intptr_t kMaxMax = 32; |
2928 for (int max_number_of_chars = 4; | 2971 for (intptr_t max_number_of_chars = 4; |
2929 max_number_of_chars < kMaxMax; | 2972 max_number_of_chars < kMaxMax; |
2930 max_number_of_chars *= 2) { | 2973 max_number_of_chars *= 2) { |
2931 biggest_points = | 2974 biggest_points = |
2932 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2975 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
2933 } | 2976 } |
2934 if (biggest_points == 0) return false; | 2977 if (biggest_points == 0) return false; |
2935 return true; | 2978 return true; |
2936 } | 2979 } |
2937 | 2980 |
2938 | 2981 |
2939 // Find the highest-points range between 0 and length_ where the character | 2982 // Find the highest-points range between 0 and length_ where the character |
2940 // information is not too vague. 'Too vague' means that there are more than | 2983 // information is not too vague. 'Too vague' means that there are more than |
2941 // max_number_of_chars that can occur at this position. Calculates the number | 2984 // max_number_of_chars that can occur at this position. Calculates the number |
2942 // of points as the product of width-of-the-range and | 2985 // of points as the product of width-of-the-range and |
2943 // probability-of-finding-one-of-the-characters, where the probability is | 2986 // probability-of-finding-one-of-the-characters, where the probability is |
2944 // calculated using the frequency distribution of the sample subject string. | 2987 // calculated using the frequency distribution of the sample subject string. |
2945 int BoyerMooreLookahead::FindBestInterval( | 2988 intptr_t BoyerMooreLookahead::FindBestInterval( |
2946 int max_number_of_chars, int old_biggest_points, int* from, int* to) { | 2989 intptr_t max_number_of_chars, |
2947 int biggest_points = old_biggest_points; | 2990 intptr_t old_biggest_points, |
2948 static const int kSize = RegExpMacroAssembler::kTableSize; | 2991 intptr_t* from, |
2949 for (int i = 0; i < length_; ) { | 2992 intptr_t* to) { |
| 2993 intptr_t biggest_points = old_biggest_points; |
| 2994 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| 2995 for (intptr_t i = 0; i < length_; ) { |
2950 while (i < length_ && Count(i) > max_number_of_chars) i++; | 2996 while (i < length_ && Count(i) > max_number_of_chars) i++; |
2951 if (i == length_) break; | 2997 if (i == length_) break; |
2952 int remembered_from = i; | 2998 intptr_t remembered_from = i; |
2953 bool union_map[kSize]; | 2999 bool union_map[kSize]; |
2954 for (int j = 0; j < kSize; j++) union_map[j] = false; | 3000 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; |
2955 while (i < length_ && Count(i) <= max_number_of_chars) { | 3001 while (i < length_ && Count(i) <= max_number_of_chars) { |
2956 BoyerMoorePositionInfo* map = bitmaps_->at(i); | 3002 BoyerMoorePositionInfo* map = bitmaps_->At(i); |
2957 for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j); | 3003 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); |
2958 i++; | 3004 i++; |
2959 } | 3005 } |
2960 int frequency = 0; | 3006 intptr_t frequency = 0; |
2961 for (int j = 0; j < kSize; j++) { | 3007 for (intptr_t j = 0; j < kSize; j++) { |
2962 if (union_map[j]) { | 3008 if (union_map[j]) { |
2963 // Add 1 to the frequency to give a small per-character boost for | 3009 // Add 1 to the frequency to give a small per-character boost for |
2964 // the cases where our sampling is not good enough and many | 3010 // the cases where our sampling is not good enough and many |
2965 // characters have a frequency of zero. This means the frequency | 3011 // characters have a frequency of zero. This means the frequency |
2966 // can theoretically be up to 2*kSize though we treat it mostly as | 3012 // can theoretically be up to 2*kSize though we treat it mostly as |
2967 // a fraction of kSize. | 3013 // a fraction of kSize. |
2968 frequency += compiler_->frequency_collator()->Frequency(j) + 1; | 3014 frequency += compiler_->frequency_collator()->Frequency(j) + 1; |
2969 } | 3015 } |
2970 } | 3016 } |
2971 // We use the probability of skipping times the distance we are skipping to | 3017 // We use the probability of skipping times the distance we are skipping to |
2972 // judge the effectiveness of this. Actually we have a cut-off: By | 3018 // judge the effectiveness of this. Actually we have a cut-off: By |
2973 // dividing by 2 we switch off the skipping if the probability of skipping | 3019 // dividing by 2 we switch off the skipping if the probability of skipping |
2974 // is less than 50%. This is because the multibyte mask-and-compare | 3020 // is less than 50%. This is because the multibyte mask-and-compare |
2975 // skipping in quickcheck is more likely to do well on this case. | 3021 // skipping in quickcheck is more likely to do well on this case. |
2976 bool in_quickcheck_range = | 3022 bool in_quickcheck_range = ((i - remembered_from < 4) || |
2977 ((i - remembered_from < 4) || | 3023 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); |
2978 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); | |
2979 // Called 'probability' but it is only a rough estimate and can actually | 3024 // Called 'probability' but it is only a rough estimate and can actually |
2980 // be outside the 0-kSize range. | 3025 // be outside the 0-kSize range. |
2981 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency; | 3026 intptr_t probability = |
2982 int points = (i - remembered_from) * probability; | 3027 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; |
| 3028 intptr_t points = (i - remembered_from) * probability; |
2983 if (points > biggest_points) { | 3029 if (points > biggest_points) { |
2984 *from = remembered_from; | 3030 *from = remembered_from; |
2985 *to = i - 1; | 3031 *to = i - 1; |
2986 biggest_points = points; | 3032 biggest_points = points; |
2987 } | 3033 } |
2988 } | 3034 } |
2989 return biggest_points; | 3035 return biggest_points; |
2990 } | 3036 } |
2991 | 3037 |
2992 | 3038 |
2993 // Take all the characters that will not prevent a successful match if they | 3039 // Take all the characters that will not prevent a successful match if they |
2994 // occur in the subject string in the range between min_lookahead and | 3040 // occur in the subject string in the range between min_lookahead and |
2995 // max_lookahead (inclusive) measured from the current position. If the | 3041 // max_lookahead (inclusive) measured from the current position. If the |
2996 // character at max_lookahead offset is not one of these characters, then we | 3042 // character at max_lookahead offset is not one of these characters, then we |
2997 // can safely skip forwards by the number of characters in the range. | 3043 // can safely skip forwards by the number of characters in the range. |
2998 int BoyerMooreLookahead::GetSkipTable(int min_lookahead, | 3044 intptr_t BoyerMooreLookahead::GetSkipTable( |
2999 int max_lookahead, | 3045 intptr_t min_lookahead, |
3000 Handle<ByteArray> boolean_skip_table) { | 3046 intptr_t max_lookahead, |
3001 const int kSize = RegExpMacroAssembler::kTableSize; | 3047 const TypedData& boolean_skip_table) { |
| 3048 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
3002 | 3049 |
3003 const int kSkipArrayEntry = 0; | 3050 const intptr_t kSkipArrayEntry = 0; |
3004 const int kDontSkipArrayEntry = 1; | 3051 const intptr_t kDontSkipArrayEntry = 1; |
3005 | 3052 |
3006 for (int i = 0; i < kSize; i++) { | 3053 for (intptr_t i = 0; i < kSize; i++) { |
3007 boolean_skip_table->set(i, kSkipArrayEntry); | 3054 boolean_skip_table.SetUint8(i, kSkipArrayEntry); |
3008 } | 3055 } |
3009 int skip = max_lookahead + 1 - min_lookahead; | 3056 intptr_t skip = max_lookahead + 1 - min_lookahead; |
3010 | 3057 |
3011 for (int i = max_lookahead; i >= min_lookahead; i--) { | 3058 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { |
3012 BoyerMoorePositionInfo* map = bitmaps_->at(i); | 3059 BoyerMoorePositionInfo* map = bitmaps_->At(i); |
3013 for (int j = 0; j < kSize; j++) { | 3060 for (intptr_t j = 0; j < kSize; j++) { |
3014 if (map->at(j)) { | 3061 if (map->at(j)) { |
3015 boolean_skip_table->set(j, kDontSkipArrayEntry); | 3062 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); |
3016 } | 3063 } |
3017 } | 3064 } |
3018 } | 3065 } |
3019 | 3066 |
3020 return skip; | 3067 return skip; |
3021 } | 3068 } |
3022 | 3069 |
3023 | 3070 |
3024 // See comment above on the implementation of GetSkipTable. | 3071 // See comment above on the implementation of GetSkipTable. |
3025 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { | 3072 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
3026 const int kSize = RegExpMacroAssembler::kTableSize; | 3073 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
3027 | 3074 |
3028 int min_lookahead = 0; | 3075 intptr_t min_lookahead = 0; |
3029 int max_lookahead = 0; | 3076 intptr_t max_lookahead = 0; |
3030 | 3077 |
3031 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; | 3078 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; |
3032 | 3079 |
3033 bool found_single_character = false; | 3080 bool found_single_character = false; |
3034 int single_character = 0; | 3081 intptr_t single_character = 0; |
3035 for (int i = max_lookahead; i >= min_lookahead; i--) { | 3082 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) { |
3036 BoyerMoorePositionInfo* map = bitmaps_->at(i); | 3083 BoyerMoorePositionInfo* map = bitmaps_->At(i); |
3037 if (map->map_count() > 1 || | 3084 if (map->map_count() > 1 || |
3038 (found_single_character && map->map_count() != 0)) { | 3085 (found_single_character && map->map_count() != 0)) { |
3039 found_single_character = false; | 3086 found_single_character = false; |
3040 break; | 3087 break; |
3041 } | 3088 } |
3042 for (int j = 0; j < kSize; j++) { | 3089 for (intptr_t j = 0; j < kSize; j++) { |
3043 if (map->at(j)) { | 3090 if (map->at(j)) { |
3044 found_single_character = true; | 3091 found_single_character = true; |
3045 single_character = j; | 3092 single_character = j; |
3046 break; | 3093 break; |
3047 } | 3094 } |
3048 } | 3095 } |
3049 } | 3096 } |
3050 | 3097 |
3051 int lookahead_width = max_lookahead + 1 - min_lookahead; | 3098 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead; |
3052 | 3099 |
3053 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { | 3100 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { |
3054 // The mask-compare can probably handle this better. | 3101 // The mask-compare can probably handle this better. |
3055 return; | 3102 return; |
3056 } | 3103 } |
3057 | 3104 |
3058 if (found_single_character) { | 3105 if (found_single_character) { |
3059 Label cont, again; | 3106 BlockLabel cont, again; |
3060 masm->Bind(&again); | 3107 masm->BindBlock(&again); |
3061 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3108 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3062 if (max_char_ > kSize) { | 3109 if (max_char_ > kSize) { |
3063 masm->CheckCharacterAfterAnd(single_character, | 3110 masm->CheckCharacterAfterAnd(single_character, |
3064 RegExpMacroAssembler::kTableMask, | 3111 RegExpMacroAssembler::kTableMask, |
3065 &cont); | 3112 &cont); |
3066 } else { | 3113 } else { |
3067 masm->CheckCharacter(single_character, &cont); | 3114 masm->CheckCharacter(single_character, &cont); |
3068 } | 3115 } |
3069 masm->AdvanceCurrentPosition(lookahead_width); | 3116 masm->AdvanceCurrentPosition(lookahead_width); |
3070 masm->GoTo(&again); | 3117 masm->GoTo(&again); |
3071 masm->Bind(&cont); | 3118 masm->BindBlock(&cont); |
3072 return; | 3119 return; |
3073 } | 3120 } |
3074 | 3121 |
3075 Factory* factory = masm->zone()->isolate()->factory(); | 3122 const TypedData& boolean_skip_table = TypedData::ZoneHandle( |
3076 Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED); | 3123 compiler_->isolate(), |
3077 int skip_distance = GetSkipTable( | 3124 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
| 3125 intptr_t skip_distance = GetSkipTable( |
3078 min_lookahead, max_lookahead, boolean_skip_table); | 3126 min_lookahead, max_lookahead, boolean_skip_table); |
3079 DCHECK(skip_distance != 0); | 3127 ASSERT(skip_distance != 0); |
3080 | 3128 |
3081 Label cont, again; | 3129 BlockLabel cont, again; |
3082 masm->Bind(&again); | 3130 |
| 3131 masm->BindBlock(&again); |
3083 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3132 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3084 masm->CheckBitInTable(boolean_skip_table, &cont); | 3133 masm->CheckBitInTable(boolean_skip_table, &cont); |
3085 masm->AdvanceCurrentPosition(skip_distance); | 3134 masm->AdvanceCurrentPosition(skip_distance); |
3086 masm->GoTo(&again); | 3135 masm->GoTo(&again); |
3087 masm->Bind(&cont); | 3136 masm->BindBlock(&cont); |
| 3137 |
| 3138 return; |
3088 } | 3139 } |
3089 | 3140 |
3090 | 3141 |
3091 /* Code generation for choice nodes. | 3142 /* Code generation for choice nodes. |
3092 * | 3143 * |
3093 * We generate quick checks that do a mask and compare to eliminate a | 3144 * We generate quick checks that do a mask and compare to eliminate a |
3094 * choice. If the quick check succeeds then it jumps to the continuation to | 3145 * choice. If the quick check succeeds then it jumps to the continuation to |
3095 * do slow checks and check subsequent nodes. If it fails (the common case) | 3146 * do slow checks and check subsequent nodes. If it fails (the common case) |
3096 * it falls through to the next choice. | 3147 * it falls through to the next choice. |
3097 * | 3148 * |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3163 */ | 3214 */ |
3164 | 3215 |
3165 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3216 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
3166 counter_backtrack_trace_.set_backtrack(&label_); | 3217 counter_backtrack_trace_.set_backtrack(&label_); |
3167 if (not_at_start) counter_backtrack_trace_.set_at_start(false); | 3218 if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
3168 } | 3219 } |
3169 | 3220 |
3170 | 3221 |
3171 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3222 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
3172 #ifdef DEBUG | 3223 #ifdef DEBUG |
3173 int choice_count = alternatives_->length(); | 3224 intptr_t choice_count = alternatives_->length(); |
3174 for (int i = 0; i < choice_count - 1; i++) { | 3225 for (intptr_t i = 0; i < choice_count - 1; i++) { |
3175 GuardedAlternative alternative = alternatives_->at(i); | 3226 GuardedAlternative alternative = alternatives_->At(i); |
3176 ZoneList<Guard*>* guards = alternative.guards(); | 3227 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
3177 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3228 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
3178 for (int j = 0; j < guard_count; j++) { | 3229 for (intptr_t j = 0; j < guard_count; j++) { |
3179 DCHECK(!trace->mentions_reg(guards->at(j)->reg())); | 3230 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); |
3180 } | 3231 } |
3181 } | 3232 } |
3182 #endif | 3233 #endif |
3183 } | 3234 } |
3184 | 3235 |
3185 | 3236 |
3186 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3237 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
3187 Trace* current_trace, | 3238 Trace* current_trace, |
3188 PreloadState* state) { | 3239 PreloadState* state) { |
3189 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3240 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
3190 // Save some time by looking at most one machine word ahead. | 3241 // Save some time by looking at most one machine word ahead. |
3191 state->eats_at_least_ = | 3242 state->eats_at_least_ = |
3192 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3243 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
3193 current_trace->at_start() == Trace::FALSE_VALUE); | 3244 current_trace->at_start() == Trace::FALSE_VALUE); |
3194 } | 3245 } |
3195 state->preload_characters_ = | 3246 state->preload_characters_ = |
3196 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3247 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
3197 | 3248 |
3198 state->preload_is_current_ = | 3249 state->preload_is_current_ = |
3199 (current_trace->characters_preloaded() == state->preload_characters_); | 3250 (current_trace->characters_preloaded() == state->preload_characters_); |
3200 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3251 state->preload_has_checked_bounds_ = state->preload_is_current_; |
3201 } | 3252 } |
3202 | 3253 |
3203 | 3254 |
3204 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3255 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3205 int choice_count = alternatives_->length(); | 3256 intptr_t choice_count = alternatives_->length(); |
3206 | 3257 |
3207 AssertGuardsMentionRegisters(trace); | 3258 AssertGuardsMentionRegisters(trace); |
3208 | 3259 |
3209 LimitResult limit_result = LimitVersions(compiler, trace); | 3260 LimitResult limit_result = LimitVersions(compiler, trace); |
3210 if (limit_result == DONE) return; | 3261 if (limit_result == DONE) return; |
3211 DCHECK(limit_result == CONTINUE); | 3262 ASSERT(limit_result == CONTINUE); |
3212 | 3263 |
3213 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for | 3264 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for |
3214 // other choice nodes we only flush if we are out of code size budget. | 3265 // other choice nodes we only flush if we are out of code size budget. |
3215 if (trace->flush_budget() == 0 && trace->actions() != NULL) { | 3266 if (trace->flush_budget() == 0 && trace->actions() != NULL) { |
3216 trace->Flush(compiler, this); | 3267 trace->Flush(compiler, this); |
3217 return; | 3268 return; |
3218 } | 3269 } |
3219 | 3270 |
3220 RecursionCheck rc(compiler); | 3271 RecursionCheck rc(compiler); |
3221 | 3272 |
3222 PreloadState preload; | 3273 PreloadState preload; |
3223 preload.init(); | 3274 preload.init(); |
3224 GreedyLoopState greedy_loop_state(not_at_start()); | 3275 GreedyLoopState greedy_loop_state(not_at_start()); |
3225 | 3276 |
3226 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0)); | 3277 intptr_t text_length = |
3227 AlternativeGenerationList alt_gens(choice_count, zone()); | 3278 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
| 3279 AlternativeGenerationList alt_gens(choice_count); |
3228 | 3280 |
3229 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 3281 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
3230 trace = EmitGreedyLoop(compiler, | 3282 trace = EmitGreedyLoop(compiler, |
3231 trace, | 3283 trace, |
3232 &alt_gens, | 3284 &alt_gens, |
3233 &preload, | 3285 &preload, |
3234 &greedy_loop_state, | 3286 &greedy_loop_state, |
3235 text_length); | 3287 text_length); |
3236 } else { | 3288 } else { |
3237 // TODO(erikcorry): Delete this. We don't need this label, but it makes us | 3289 // TODO(erikcorry): Delete this. We don't need this label, but it makes us |
3238 // match the traces produced pre-cleanup. | 3290 // match the traces produced pre-cleanup. |
3239 Label second_choice; | 3291 BlockLabel second_choice; |
3240 compiler->macro_assembler()->Bind(&second_choice); | 3292 compiler->macro_assembler()->BindBlock(&second_choice); |
3241 | 3293 |
3242 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); | 3294 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); |
3243 | 3295 |
3244 EmitChoices(compiler, | 3296 EmitChoices(compiler, |
3245 &alt_gens, | 3297 &alt_gens, |
3246 0, | 3298 0, |
3247 trace, | 3299 trace, |
3248 &preload); | 3300 &preload); |
3249 } | 3301 } |
3250 | 3302 |
3251 // At this point we need to generate slow checks for the alternatives where | 3303 // At this point we need to generate slow checks for the alternatives where |
3252 // the quick check was inlined. We can recognize these because the associated | 3304 // the quick check was inlined. We can recognize these because the associated |
3253 // label was bound. | 3305 // label was bound. |
3254 int new_flush_budget = trace->flush_budget() / choice_count; | 3306 intptr_t new_flush_budget = trace->flush_budget() / choice_count; |
3255 for (int i = 0; i < choice_count; i++) { | 3307 for (intptr_t i = 0; i < choice_count; i++) { |
3256 AlternativeGeneration* alt_gen = alt_gens.at(i); | 3308 AlternativeGeneration* alt_gen = alt_gens.at(i); |
3257 Trace new_trace(*trace); | 3309 Trace new_trace(*trace); |
3258 // If there are actions to be flushed we have to limit how many times | 3310 // If there are actions to be flushed we have to limit how many times |
3259 // they are flushed. Take the budget of the parent trace and distribute | 3311 // they are flushed. Take the budget of the parent trace and distribute |
3260 // it fairly amongst the children. | 3312 // it fairly amongst the children. |
3261 if (new_trace.actions() != NULL) { | 3313 if (new_trace.actions() != NULL) { |
3262 new_trace.set_flush_budget(new_flush_budget); | 3314 new_trace.set_flush_budget(new_flush_budget); |
3263 } | 3315 } |
3264 bool next_expects_preload = | 3316 bool next_expects_preload = |
3265 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; | 3317 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; |
3266 EmitOutOfLineContinuation(compiler, | 3318 EmitOutOfLineContinuation(compiler, |
3267 &new_trace, | 3319 &new_trace, |
3268 alternatives_->at(i), | 3320 alternatives_->At(i), |
3269 alt_gen, | 3321 alt_gen, |
3270 preload.preload_characters_, | 3322 preload.preload_characters_, |
3271 next_expects_preload); | 3323 next_expects_preload); |
3272 } | 3324 } |
3273 } | 3325 } |
3274 | 3326 |
3275 | |
3276 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, | 3327 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, |
3277 Trace* trace, | 3328 Trace* trace, |
3278 AlternativeGenerationList* alt_gens, | 3329 AlternativeGenerationList* alt_gens, |
3279 PreloadState* preload, | 3330 PreloadState* preload, |
3280 GreedyLoopState* greedy_loop_state, | 3331 GreedyLoopState* greedy_loop_state, |
3281 int text_length) { | 3332 intptr_t text_length) { |
3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3333 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3283 // Here we have special handling for greedy loops containing only text nodes | 3334 // Here we have special handling for greedy loops containing only text nodes |
3284 // and other simple nodes. These are handled by pushing the current | 3335 // and other simple nodes. These are handled by pushing the current |
3285 // position on the stack and then incrementing the current position each | 3336 // position on the stack and then incrementing the current position each |
3286 // time around the switch. On backtrack we decrement the current position | 3337 // time around the switch. On backtrack we decrement the current position |
3287 // and check it against the pushed value. This avoids pushing backtrack | 3338 // and check it against the pushed value. This avoids pushing backtrack |
3288 // information for each iteration of the loop, which could take up a lot of | 3339 // information for each iteration of the loop, which could take up a lot of |
3289 // space. | 3340 // space. |
3290 DCHECK(trace->stop_node() == NULL); | 3341 ASSERT(trace->stop_node() == NULL); |
3291 macro_assembler->PushCurrentPosition(); | 3342 macro_assembler->PushCurrentPosition(); |
3292 Label greedy_match_failed; | 3343 BlockLabel greedy_match_failed; |
3293 Trace greedy_match_trace; | 3344 Trace greedy_match_trace; |
3294 if (not_at_start()) greedy_match_trace.set_at_start(false); | 3345 if (not_at_start()) greedy_match_trace.set_at_start(false); |
3295 greedy_match_trace.set_backtrack(&greedy_match_failed); | 3346 greedy_match_trace.set_backtrack(&greedy_match_failed); |
3296 Label loop_label; | 3347 BlockLabel loop_label; |
3297 macro_assembler->Bind(&loop_label); | 3348 macro_assembler->BindBlock(&loop_label); |
3298 greedy_match_trace.set_stop_node(this); | 3349 greedy_match_trace.set_stop_node(this); |
3299 greedy_match_trace.set_loop_label(&loop_label); | 3350 greedy_match_trace.set_loop_label(&loop_label); |
3300 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); | 3351 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); |
3301 macro_assembler->Bind(&greedy_match_failed); | 3352 macro_assembler->BindBlock(&greedy_match_failed); |
3302 | 3353 |
3303 Label second_choice; // For use in greedy matches. | 3354 BlockLabel second_choice; // For use in greedy matches. |
3304 macro_assembler->Bind(&second_choice); | 3355 macro_assembler->BindBlock(&second_choice); |
3305 | 3356 |
3306 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); | 3357 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); |
3307 | 3358 |
3308 EmitChoices(compiler, | 3359 EmitChoices(compiler, |
3309 alt_gens, | 3360 alt_gens, |
3310 1, | 3361 1, |
3311 new_trace, | 3362 new_trace, |
3312 preload); | 3363 preload); |
3313 | 3364 |
3314 macro_assembler->Bind(greedy_loop_state->label()); | 3365 macro_assembler->BindBlock(greedy_loop_state->label()); |
3315 // If we have unwound to the bottom then backtrack. | 3366 // If we have unwound to the bottom then backtrack. |
3316 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3367 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
3317 // Otherwise try the second priority at an earlier position. | 3368 // Otherwise try the second priority at an earlier position. |
3318 macro_assembler->AdvanceCurrentPosition(-text_length); | 3369 macro_assembler->AdvanceCurrentPosition(-text_length); |
3319 macro_assembler->GoTo(&second_choice); | 3370 macro_assembler->GoTo(&second_choice); |
3320 return new_trace; | 3371 return new_trace; |
3321 } | 3372 } |
3322 | 3373 |
3323 int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, | 3374 |
3324 Trace* trace) { | 3375 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
3325 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; | 3376 Trace* trace) { |
| 3377 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; |
3326 if (alternatives_->length() != 2) return eats_at_least; | 3378 if (alternatives_->length() != 2) return eats_at_least; |
3327 | 3379 |
3328 GuardedAlternative alt1 = alternatives_->at(1); | 3380 GuardedAlternative alt1 = alternatives_->At(1); |
3329 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { | 3381 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { |
3330 return eats_at_least; | 3382 return eats_at_least; |
3331 } | 3383 } |
3332 RegExpNode* eats_anything_node = alt1.node(); | 3384 RegExpNode* eats_anything_node = alt1.node(); |
3333 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { | 3385 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { |
3334 return eats_at_least; | 3386 return eats_at_least; |
3335 } | 3387 } |
3336 | 3388 |
3337 // Really we should be creating a new trace when we execute this function, | 3389 // Really we should be creating a new trace when we execute this function, |
3338 // but there is no need, because the code it generates cannot backtrack, and | 3390 // but there is no need, because the code it generates cannot backtrack, and |
3339 // we always arrive here with a trivial trace (since it's the entry to a | 3391 // we always arrive here with a trivial trace (since it's the entry to a |
3340 // loop. That also implies that there are no preloaded characters, which is | 3392 // loop. That also implies that there are no preloaded characters, which is |
3341 // good, because it means we won't be violating any assumptions by | 3393 // good, because it means we won't be violating any assumptions by |
3342 // overwriting those characters with new load instructions. | 3394 // overwriting those characters with new load instructions. |
3343 DCHECK(trace->is_trivial()); | 3395 ASSERT(trace->is_trivial()); |
3344 | 3396 |
3345 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3397 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3346 // At this point we know that we are at a non-greedy loop that will eat | 3398 // At this point we know that we are at a non-greedy loop that will eat |
3347 // any character one at a time. Any non-anchored regexp has such a | 3399 // any character one at a time. Any non-anchored regexp has such a |
3348 // loop prepended to it in order to find where it starts. We look for | 3400 // loop prepended to it in order to find where it starts. We look for |
3349 // a pattern of the form ...abc... where we can look 6 characters ahead | 3401 // a pattern of the form ...abc... where we can look 6 characters ahead |
3350 // and step forwards 3 if the character is not one of abc. Abc need | 3402 // and step forwards 3 if the character is not one of abc. Abc need |
3351 // not be atoms, they can be any reasonably limited character class or | 3403 // not be atoms, they can be any reasonably limited character class or |
3352 // small alternation. | 3404 // small alternation. |
3353 BoyerMooreLookahead* bm = bm_info(false); | 3405 BoyerMooreLookahead* bm = bm_info(false); |
3354 if (bm == NULL) { | 3406 if (bm == NULL) { |
3355 eats_at_least = Min(kMaxLookaheadForBoyerMoore, | 3407 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, |
3356 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3408 EatsAtLeast(kMaxLookaheadForBoyerMoore, |
3357 kRecursionBudget, | 3409 kRecursionBudget, |
3358 false)); | 3410 false)); |
3359 if (eats_at_least >= 1) { | 3411 if (eats_at_least >= 1) { |
3360 bm = new(zone()) BoyerMooreLookahead(eats_at_least, | 3412 bm = new(I) BoyerMooreLookahead(eats_at_least, compiler, I); |
3361 compiler, | 3413 GuardedAlternative alt0 = alternatives_->At(0); |
3362 zone()); | |
3363 GuardedAlternative alt0 = alternatives_->at(0); | |
3364 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3414 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
3365 } | 3415 } |
3366 } | 3416 } |
3367 if (bm != NULL) { | 3417 if (bm != NULL) { |
3368 bm->EmitSkipInstructions(macro_assembler); | 3418 bm->EmitSkipInstructions(macro_assembler); |
3369 } | 3419 } |
3370 return eats_at_least; | 3420 return eats_at_least; |
3371 } | 3421 } |
3372 | 3422 |
3373 | 3423 |
3374 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 3424 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
3375 AlternativeGenerationList* alt_gens, | 3425 AlternativeGenerationList* alt_gens, |
3376 int first_choice, | 3426 intptr_t first_choice, |
3377 Trace* trace, | 3427 Trace* trace, |
3378 PreloadState* preload) { | 3428 PreloadState* preload) { |
3379 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3380 SetUpPreLoad(compiler, trace, preload); | 3430 SetUpPreLoad(compiler, trace, preload); |
3381 | 3431 |
3382 // For now we just call all choices one after the other. The idea ultimately | 3432 // For now we just call all choices one after the other. The idea ultimately |
3383 // is to use the Dispatch table to try only the relevant ones. | 3433 // is to use the Dispatch table to try only the relevant ones. |
3384 int choice_count = alternatives_->length(); | 3434 intptr_t choice_count = alternatives_->length(); |
3385 | 3435 |
3386 int new_flush_budget = trace->flush_budget() / choice_count; | 3436 intptr_t new_flush_budget = trace->flush_budget() / choice_count; |
3387 | 3437 |
3388 for (int i = first_choice; i < choice_count; i++) { | 3438 for (intptr_t i = first_choice; i < choice_count; i++) { |
3389 bool is_last = i == choice_count - 1; | 3439 bool is_last = i == choice_count - 1; |
3390 bool fall_through_on_failure = !is_last; | 3440 bool fall_through_on_failure = !is_last; |
3391 GuardedAlternative alternative = alternatives_->at(i); | 3441 GuardedAlternative alternative = alternatives_->At(i); |
3392 AlternativeGeneration* alt_gen = alt_gens->at(i); | 3442 AlternativeGeneration* alt_gen = alt_gens->at(i); |
3393 alt_gen->quick_check_details.set_characters(preload->preload_characters_); | 3443 alt_gen->quick_check_details.set_characters(preload->preload_characters_); |
3394 ZoneList<Guard*>* guards = alternative.guards(); | 3444 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
3395 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3445 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
3396 Trace new_trace(*trace); | 3446 Trace new_trace(*trace); |
3397 new_trace.set_characters_preloaded(preload->preload_is_current_ ? | 3447 new_trace.set_characters_preloaded(preload->preload_is_current_ ? |
3398 preload->preload_characters_ : | 3448 preload->preload_characters_ : |
3399 0); | 3449 0); |
3400 if (preload->preload_has_checked_bounds_) { | 3450 if (preload->preload_has_checked_bounds_) { |
3401 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3451 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
3402 } | 3452 } |
3403 new_trace.quick_check_performed()->Clear(); | 3453 new_trace.quick_check_performed()->Clear(); |
3404 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); | 3454 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); |
3405 if (!is_last) { | 3455 if (!is_last) { |
3406 new_trace.set_backtrack(&alt_gen->after); | 3456 new_trace.set_backtrack(&alt_gen->after); |
3407 } | 3457 } |
3408 alt_gen->expects_preload = preload->preload_is_current_; | 3458 alt_gen->expects_preload = preload->preload_is_current_; |
3409 bool generate_full_check_inline = false; | 3459 bool generate_full_check_inline = false; |
3410 if (FLAG_regexp_optimization && | 3460 if (kRegexpOptimization && |
3411 try_to_emit_quick_check_for_alternative(i == 0) && | 3461 try_to_emit_quick_check_for_alternative(i == 0) && |
3412 alternative.node()->EmitQuickCheck(compiler, | 3462 alternative.node()->EmitQuickCheck(compiler, |
3413 trace, | 3463 trace, |
3414 &new_trace, | 3464 &new_trace, |
3415 preload->preload_has_checked_bounds_, | 3465 preload->preload_has_checked_bounds_, |
3416 &alt_gen->possible_success, | 3466 &alt_gen->possible_success, |
3417 &alt_gen->quick_check_details, | 3467 &alt_gen->quick_check_details, |
3418 fall_through_on_failure)) { | 3468 fall_through_on_failure)) { |
3419 // Quick check was generated for this choice. | 3469 // Quick check was generated for this choice. |
3420 preload->preload_is_current_ = true; | 3470 preload->preload_is_current_ = true; |
3421 preload->preload_has_checked_bounds_ = true; | 3471 preload->preload_has_checked_bounds_ = true; |
3422 // If we generated the quick check to fall through on possible success, | 3472 // If we generated the quick check to fall through on possible success, |
3423 // we now need to generate the full check inline. | 3473 // we now need to generate the full check inline. |
3424 if (!fall_through_on_failure) { | 3474 if (!fall_through_on_failure) { |
3425 macro_assembler->Bind(&alt_gen->possible_success); | 3475 macro_assembler->BindBlock(&alt_gen->possible_success); |
3426 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3476 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
3427 new_trace.set_characters_preloaded(preload->preload_characters_); | 3477 new_trace.set_characters_preloaded(preload->preload_characters_); |
3428 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3478 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
3429 generate_full_check_inline = true; | 3479 generate_full_check_inline = true; |
3430 } | 3480 } |
3431 } else if (alt_gen->quick_check_details.cannot_match()) { | 3481 } else if (alt_gen->quick_check_details.cannot_match()) { |
3432 if (!fall_through_on_failure) { | 3482 if (!fall_through_on_failure) { |
3433 macro_assembler->GoTo(trace->backtrack()); | 3483 macro_assembler->GoTo(trace->backtrack()); |
3434 } | 3484 } |
3435 continue; | 3485 continue; |
3436 } else { | 3486 } else { |
3437 // No quick check was generated. Put the full code here. | 3487 // No quick check was generated. Put the full code here. |
3438 // If this is not the first choice then there could be slow checks from | 3488 // If this is not the first choice then there could be slow checks from |
3439 // previous cases that go here when they fail. There's no reason to | 3489 // previous cases that go here when they fail. There's no reason to |
3440 // insist that they preload characters since the slow check we are about | 3490 // insist that they preload characters since the slow check we are about |
3441 // to generate probably can't use it. | 3491 // to generate probably can't use it. |
3442 if (i != first_choice) { | 3492 if (i != first_choice) { |
3443 alt_gen->expects_preload = false; | 3493 alt_gen->expects_preload = false; |
3444 new_trace.InvalidateCurrentCharacter(); | 3494 new_trace.InvalidateCurrentCharacter(); |
3445 } | 3495 } |
3446 generate_full_check_inline = true; | 3496 generate_full_check_inline = true; |
3447 } | 3497 } |
3448 if (generate_full_check_inline) { | 3498 if (generate_full_check_inline) { |
3449 if (new_trace.actions() != NULL) { | 3499 if (new_trace.actions() != NULL) { |
3450 new_trace.set_flush_budget(new_flush_budget); | 3500 new_trace.set_flush_budget(new_flush_budget); |
3451 } | 3501 } |
3452 for (int j = 0; j < guard_count; j++) { | 3502 for (intptr_t j = 0; j < guard_count; j++) { |
3453 GenerateGuard(macro_assembler, guards->at(j), &new_trace); | 3503 GenerateGuard(macro_assembler, guards->At(j), &new_trace); |
3454 } | 3504 } |
3455 alternative.node()->Emit(compiler, &new_trace); | 3505 alternative.node()->Emit(compiler, &new_trace); |
3456 preload->preload_is_current_ = false; | 3506 preload->preload_is_current_ = false; |
3457 } | 3507 } |
3458 macro_assembler->Bind(&alt_gen->after); | 3508 macro_assembler->BindBlock(&alt_gen->after); |
3459 } | 3509 } |
3460 } | 3510 } |
3461 | 3511 |
3462 | 3512 |
3463 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, | 3513 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, |
3464 Trace* trace, | 3514 Trace* trace, |
3465 GuardedAlternative alternative, | 3515 GuardedAlternative alternative, |
3466 AlternativeGeneration* alt_gen, | 3516 AlternativeGeneration* alt_gen, |
3467 int preload_characters, | 3517 intptr_t preload_characters, |
3468 bool next_expects_preload) { | 3518 bool next_expects_preload) { |
3469 if (!alt_gen->possible_success.is_linked()) return; | 3519 if (!alt_gen->possible_success.IsLinked()) return; |
3470 | 3520 |
3471 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3521 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3472 macro_assembler->Bind(&alt_gen->possible_success); | 3522 macro_assembler->BindBlock(&alt_gen->possible_success); |
3473 Trace out_of_line_trace(*trace); | 3523 Trace out_of_line_trace(*trace); |
3474 out_of_line_trace.set_characters_preloaded(preload_characters); | 3524 out_of_line_trace.set_characters_preloaded(preload_characters); |
3475 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3525 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
3476 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); | 3526 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); |
3477 ZoneList<Guard*>* guards = alternative.guards(); | 3527 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
3478 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3528 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
3479 if (next_expects_preload) { | 3529 if (next_expects_preload) { |
3480 Label reload_current_char; | 3530 BlockLabel reload_current_char; |
3481 out_of_line_trace.set_backtrack(&reload_current_char); | 3531 out_of_line_trace.set_backtrack(&reload_current_char); |
3482 for (int j = 0; j < guard_count; j++) { | 3532 for (intptr_t j = 0; j < guard_count; j++) { |
3483 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); | 3533 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
3484 } | 3534 } |
3485 alternative.node()->Emit(compiler, &out_of_line_trace); | 3535 alternative.node()->Emit(compiler, &out_of_line_trace); |
3486 macro_assembler->Bind(&reload_current_char); | 3536 macro_assembler->BindBlock(&reload_current_char); |
3487 // Reload the current character, since the next quick check expects that. | 3537 // Reload the current character, since the next quick check expects that. |
3488 // We don't need to check bounds here because we only get into this | 3538 // We don't need to check bounds here because we only get into this |
3489 // code through a quick check which already did the checked load. | 3539 // code through a quick check which already did the checked load. |
3490 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), | 3540 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), |
3491 NULL, | 3541 NULL, |
3492 false, | 3542 false, |
3493 preload_characters); | 3543 preload_characters); |
3494 macro_assembler->GoTo(&(alt_gen->after)); | 3544 macro_assembler->GoTo(&(alt_gen->after)); |
3495 } else { | 3545 } else { |
3496 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3546 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
3497 for (int j = 0; j < guard_count; j++) { | 3547 for (intptr_t j = 0; j < guard_count; j++) { |
3498 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); | 3548 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
3499 } | 3549 } |
3500 alternative.node()->Emit(compiler, &out_of_line_trace); | 3550 alternative.node()->Emit(compiler, &out_of_line_trace); |
3501 } | 3551 } |
3502 } | 3552 } |
3503 | 3553 |
3504 | 3554 |
3505 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3555 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3506 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3556 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3507 LimitResult limit_result = LimitVersions(compiler, trace); | 3557 LimitResult limit_result = LimitVersions(compiler, trace); |
3508 if (limit_result == DONE) return; | 3558 if (limit_result == DONE) return; |
3509 DCHECK(limit_result == CONTINUE); | 3559 ASSERT(limit_result == CONTINUE); |
3510 | 3560 |
3511 RecursionCheck rc(compiler); | 3561 RecursionCheck rc(compiler); |
3512 | 3562 |
3513 switch (action_type_) { | 3563 switch (action_type_) { |
3514 case STORE_POSITION: { | 3564 case STORE_POSITION: { |
3515 Trace::DeferredCapture | 3565 Trace::DeferredCapture |
3516 new_capture(data_.u_position_register.reg, | 3566 new_capture(data_.u_position_register.reg, |
3517 data_.u_position_register.is_capture, | 3567 data_.u_position_register.is_capture, |
3518 trace); | 3568 trace); |
3519 Trace new_trace = *trace; | 3569 Trace new_trace = *trace; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3551 trace->Flush(compiler, this); | 3601 trace->Flush(compiler, this); |
3552 } else { | 3602 } else { |
3553 assembler->WriteCurrentPositionToRegister( | 3603 assembler->WriteCurrentPositionToRegister( |
3554 data_.u_submatch.current_position_register, 0); | 3604 data_.u_submatch.current_position_register, 0); |
3555 assembler->WriteStackPointerToRegister( | 3605 assembler->WriteStackPointerToRegister( |
3556 data_.u_submatch.stack_pointer_register); | 3606 data_.u_submatch.stack_pointer_register); |
3557 on_success()->Emit(compiler, trace); | 3607 on_success()->Emit(compiler, trace); |
3558 } | 3608 } |
3559 break; | 3609 break; |
3560 case EMPTY_MATCH_CHECK: { | 3610 case EMPTY_MATCH_CHECK: { |
3561 int start_pos_reg = data_.u_empty_match_check.start_register; | 3611 intptr_t start_pos_reg = data_.u_empty_match_check.start_register; |
3562 int stored_pos = 0; | 3612 intptr_t stored_pos = 0; |
3563 int rep_reg = data_.u_empty_match_check.repetition_register; | 3613 intptr_t rep_reg = data_.u_empty_match_check.repetition_register; |
3564 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); | 3614 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); |
3565 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); | 3615 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); |
3566 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { | 3616 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { |
3567 // If we know we haven't advanced and there is no minimum we | 3617 // If we know we haven't advanced and there is no minimum we |
3568 // can just backtrack immediately. | 3618 // can just backtrack immediately. |
3569 assembler->GoTo(trace->backtrack()); | 3619 assembler->GoTo(trace->backtrack()); |
3570 } else if (know_dist && stored_pos < trace->cp_offset()) { | 3620 } else if (know_dist && stored_pos < trace->cp_offset()) { |
3571 // If we know we've advanced we can generate the continuation | 3621 // If we know we've advanced we can generate the continuation |
3572 // immediately. | 3622 // immediately. |
3573 on_success()->Emit(compiler, trace); | 3623 on_success()->Emit(compiler, trace); |
3574 } else if (!trace->is_trivial()) { | 3624 } else if (!trace->is_trivial()) { |
3575 trace->Flush(compiler, this); | 3625 trace->Flush(compiler, this); |
3576 } else { | 3626 } else { |
3577 Label skip_empty_check; | 3627 BlockLabel skip_empty_check; |
3578 // If we have a minimum number of repetitions we check the current | 3628 // If we have a minimum number of repetitions we check the current |
3579 // number first and skip the empty check if it's not enough. | 3629 // number first and skip the empty check if it's not enough. |
3580 if (has_minimum) { | 3630 if (has_minimum) { |
3581 int limit = data_.u_empty_match_check.repetition_limit; | 3631 intptr_t limit = data_.u_empty_match_check.repetition_limit; |
3582 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); | 3632 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); |
3583 } | 3633 } |
3584 // If the match is empty we bail out, otherwise we fall through | 3634 // If the match is empty we bail out, otherwise we fall through |
3585 // to the on-success continuation. | 3635 // to the on-success continuation. |
3586 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, | 3636 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, |
3587 trace->backtrack()); | 3637 trace->backtrack()); |
3588 assembler->Bind(&skip_empty_check); | 3638 assembler->BindBlock(&skip_empty_check); |
3589 on_success()->Emit(compiler, trace); | 3639 on_success()->Emit(compiler, trace); |
3590 } | 3640 } |
3591 break; | 3641 break; |
3592 } | 3642 } |
3593 case POSITIVE_SUBMATCH_SUCCESS: { | 3643 case POSITIVE_SUBMATCH_SUCCESS: { |
3594 if (!trace->is_trivial()) { | 3644 if (!trace->is_trivial()) { |
3595 trace->Flush(compiler, this); | 3645 trace->Flush(compiler, this); |
3596 return; | 3646 return; |
3597 } | 3647 } |
3598 assembler->ReadCurrentPositionFromRegister( | 3648 assembler->ReadCurrentPositionFromRegister( |
3599 data_.u_submatch.current_position_register); | 3649 data_.u_submatch.current_position_register); |
3600 assembler->ReadStackPointerFromRegister( | 3650 assembler->ReadStackPointerFromRegister( |
3601 data_.u_submatch.stack_pointer_register); | 3651 data_.u_submatch.stack_pointer_register); |
3602 int clear_register_count = data_.u_submatch.clear_register_count; | 3652 intptr_t clear_register_count = data_.u_submatch.clear_register_count; |
3603 if (clear_register_count == 0) { | 3653 if (clear_register_count == 0) { |
3604 on_success()->Emit(compiler, trace); | 3654 on_success()->Emit(compiler, trace); |
3605 return; | 3655 return; |
3606 } | 3656 } |
3607 int clear_registers_from = data_.u_submatch.clear_register_from; | 3657 intptr_t clear_registers_from = data_.u_submatch.clear_register_from; |
3608 Label clear_registers_backtrack; | 3658 BlockLabel clear_registers_backtrack; |
3609 Trace new_trace = *trace; | 3659 Trace new_trace = *trace; |
3610 new_trace.set_backtrack(&clear_registers_backtrack); | 3660 new_trace.set_backtrack(&clear_registers_backtrack); |
3611 on_success()->Emit(compiler, &new_trace); | 3661 on_success()->Emit(compiler, &new_trace); |
3612 | 3662 |
3613 assembler->Bind(&clear_registers_backtrack); | 3663 assembler->BindBlock(&clear_registers_backtrack); |
3614 int clear_registers_to = clear_registers_from + clear_register_count - 1; | 3664 intptr_t clear_registers_to = |
| 3665 clear_registers_from + clear_register_count - 1; |
3615 assembler->ClearRegisters(clear_registers_from, clear_registers_to); | 3666 assembler->ClearRegisters(clear_registers_from, clear_registers_to); |
3616 | 3667 |
3617 DCHECK(trace->backtrack() == NULL); | 3668 ASSERT(trace->backtrack() == NULL); |
3618 assembler->Backtrack(); | 3669 assembler->Backtrack(); |
3619 return; | 3670 return; |
3620 } | 3671 } |
3621 default: | 3672 default: |
3622 UNREACHABLE(); | 3673 UNREACHABLE(); |
3623 } | 3674 } |
3624 } | 3675 } |
3625 | 3676 |
3626 | 3677 |
3627 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3678 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3628 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3679 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3629 if (!trace->is_trivial()) { | 3680 if (!trace->is_trivial()) { |
3630 trace->Flush(compiler, this); | 3681 trace->Flush(compiler, this); |
3631 return; | 3682 return; |
3632 } | 3683 } |
3633 | 3684 |
3634 LimitResult limit_result = LimitVersions(compiler, trace); | 3685 LimitResult limit_result = LimitVersions(compiler, trace); |
3635 if (limit_result == DONE) return; | 3686 if (limit_result == DONE) return; |
3636 DCHECK(limit_result == CONTINUE); | 3687 ASSERT(limit_result == CONTINUE); |
3637 | 3688 |
3638 RecursionCheck rc(compiler); | 3689 RecursionCheck rc(compiler); |
3639 | 3690 |
3640 DCHECK_EQ(start_reg_ + 1, end_reg_); | 3691 ASSERT(start_reg_ + 1 == end_reg_); |
3641 if (compiler->ignore_case()) { | 3692 if (compiler->ignore_case()) { |
3642 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 3693 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
3643 trace->backtrack()); | 3694 trace->backtrack()); |
3644 } else { | 3695 } else { |
3645 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3696 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
3646 } | 3697 } |
3647 on_success()->Emit(compiler, trace); | 3698 on_success()->Emit(compiler, trace); |
3648 } | 3699 } |
3649 | 3700 |
3650 | 3701 |
3651 // ------------------------------------------------------------------- | 3702 // ------------------------------------------------------------------- |
3652 // Dot/dotty output | 3703 // Dot/dotty output |
3653 | 3704 |
3654 | 3705 |
3655 #ifdef DEBUG | 3706 #ifdef DEBUG |
3656 | 3707 |
3657 | 3708 |
3658 class DotPrinter: public NodeVisitor { | 3709 class DotPrinter: public NodeVisitor { |
3659 public: | 3710 public: |
3660 DotPrinter(OStream& os, bool ignore_case) // NOLINT | 3711 explicit DotPrinter(bool ignore_case) {} |
3661 : os_(os), | |
3662 ignore_case_(ignore_case) {} | |
3663 void PrintNode(const char* label, RegExpNode* node); | 3712 void PrintNode(const char* label, RegExpNode* node); |
3664 void Visit(RegExpNode* node); | 3713 void Visit(RegExpNode* node); |
3665 void PrintAttributes(RegExpNode* from); | 3714 void PrintAttributes(RegExpNode* from); |
3666 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3715 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
3667 #define DECLARE_VISIT(Type) \ | 3716 #define DECLARE_VISIT(Type) \ |
3668 virtual void Visit##Type(Type##Node* that); | 3717 virtual void Visit##Type(Type##Node* that); |
3669 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 3718 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
3670 #undef DECLARE_VISIT | 3719 #undef DECLARE_VISIT |
3671 private: | |
3672 OStream& os_; | |
3673 bool ignore_case_; | |
3674 }; | 3720 }; |
3675 | 3721 |
3676 | 3722 |
3677 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3723 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
3678 os_ << "digraph G {\n graph [label=\""; | 3724 OS::Print("digraph G {\n graph [label=\""); |
3679 for (int i = 0; label[i]; i++) { | 3725 for (intptr_t i = 0; label[i]; i++) { |
3680 switch (label[i]) { | 3726 switch (label[i]) { |
3681 case '\\': | 3727 case '\\': |
3682 os_ << "\\\\"; | 3728 OS::Print("\\\\"); |
3683 break; | 3729 break; |
3684 case '"': | 3730 case '"': |
3685 os_ << "\""; | 3731 OS::Print("\""); |
3686 break; | 3732 break; |
3687 default: | 3733 default: |
3688 os_ << label[i]; | 3734 OS::Print("%c", label[i]); |
3689 break; | 3735 break; |
3690 } | 3736 } |
3691 } | 3737 } |
3692 os_ << "\"];\n"; | 3738 OS::Print("\"];\n"); |
3693 Visit(node); | 3739 Visit(node); |
3694 os_ << "}" << endl; | 3740 OS::Print("}\n"); |
3695 } | 3741 } |
3696 | 3742 |
3697 | 3743 |
3698 void DotPrinter::Visit(RegExpNode* node) { | 3744 void DotPrinter::Visit(RegExpNode* node) { |
3699 if (node->info()->visited) return; | 3745 if (node->info()->visited) return; |
3700 node->info()->visited = true; | 3746 node->info()->visited = true; |
3701 node->Accept(this); | 3747 node->Accept(this); |
3702 } | 3748 } |
3703 | 3749 |
3704 | 3750 |
3705 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 3751 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
3706 os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n"; | 3752 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); |
3707 Visit(on_failure); | 3753 Visit(on_failure); |
3708 } | 3754 } |
3709 | 3755 |
3710 | 3756 |
3711 class TableEntryBodyPrinter { | 3757 class AttributePrinter : public ValueObject { |
3712 public: | 3758 public: |
3713 TableEntryBodyPrinter(OStream& os, ChoiceNode* choice) // NOLINT | 3759 AttributePrinter() : first_(true) {} |
3714 : os_(os), | |
3715 choice_(choice) {} | |
3716 void Call(uc16 from, DispatchTable::Entry entry) { | |
3717 OutSet* out_set = entry.out_set(); | |
3718 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { | |
3719 if (out_set->Get(i)) { | |
3720 os_ << " n" << choice() << ":s" << from << "o" << i << " -> n" | |
3721 << choice()->alternatives()->at(i).node() << ";\n"; | |
3722 } | |
3723 } | |
3724 } | |
3725 private: | |
3726 ChoiceNode* choice() { return choice_; } | |
3727 OStream& os_; | |
3728 ChoiceNode* choice_; | |
3729 }; | |
3730 | |
3731 | |
3732 class TableEntryHeaderPrinter { | |
3733 public: | |
3734 explicit TableEntryHeaderPrinter(OStream& os) // NOLINT | |
3735 : first_(true), | |
3736 os_(os) {} | |
3737 void Call(uc16 from, DispatchTable::Entry entry) { | |
3738 if (first_) { | |
3739 first_ = false; | |
3740 } else { | |
3741 os_ << "|"; | |
3742 } | |
3743 os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{"; | |
3744 OutSet* out_set = entry.out_set(); | |
3745 int priority = 0; | |
3746 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { | |
3747 if (out_set->Get(i)) { | |
3748 if (priority > 0) os_ << "|"; | |
3749 os_ << "<s" << from << "o" << i << "> " << priority; | |
3750 priority++; | |
3751 } | |
3752 } | |
3753 os_ << "}}"; | |
3754 } | |
3755 | |
3756 private: | |
3757 bool first_; | |
3758 OStream& os_; | |
3759 }; | |
3760 | |
3761 | |
3762 class AttributePrinter { | |
3763 public: | |
3764 explicit AttributePrinter(OStream& os) // NOLINT | |
3765 : os_(os), | |
3766 first_(true) {} | |
3767 void PrintSeparator() { | 3760 void PrintSeparator() { |
3768 if (first_) { | 3761 if (first_) { |
3769 first_ = false; | 3762 first_ = false; |
3770 } else { | 3763 } else { |
3771 os_ << "|"; | 3764 OS::Print("|"); |
3772 } | 3765 } |
3773 } | 3766 } |
3774 void PrintBit(const char* name, bool value) { | 3767 void PrintBit(const char* name, bool value) { |
3775 if (!value) return; | 3768 if (!value) return; |
3776 PrintSeparator(); | 3769 PrintSeparator(); |
3777 os_ << "{" << name << "}"; | 3770 OS::Print("{%s}", name); |
3778 } | 3771 } |
3779 void PrintPositive(const char* name, int value) { | 3772 void PrintPositive(const char* name, intptr_t value) { |
3780 if (value < 0) return; | 3773 if (value < 0) return; |
3781 PrintSeparator(); | 3774 PrintSeparator(); |
3782 os_ << "{" << name << "|" << value << "}"; | 3775 OS::Print("{%s|%" Pd "}", name, value); |
3783 } | 3776 } |
3784 | 3777 |
3785 private: | 3778 private: |
3786 OStream& os_; | |
3787 bool first_; | 3779 bool first_; |
3788 }; | 3780 }; |
3789 | 3781 |
3790 | 3782 |
3791 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3783 void DotPrinter::PrintAttributes(RegExpNode* that) { |
3792 os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, " | 3784 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " |
3793 << "margin=0.1, fontsize=10, label=\"{"; | 3785 "margin=0.1, fontsize=10, label=\"{", that); |
3794 AttributePrinter printer(os_); | 3786 AttributePrinter printer; |
3795 NodeInfo* info = that->info(); | 3787 NodeInfo* info = that->info(); |
3796 printer.PrintBit("NI", info->follows_newline_interest); | 3788 printer.PrintBit("NI", info->follows_newline_interest); |
3797 printer.PrintBit("WI", info->follows_word_interest); | 3789 printer.PrintBit("WI", info->follows_word_interest); |
3798 printer.PrintBit("SI", info->follows_start_interest); | 3790 printer.PrintBit("SI", info->follows_start_interest); |
3799 Label* label = that->label(); | 3791 BlockLabel* label = that->label(); |
3800 if (label->is_bound()) | 3792 if (label->IsBound()) |
3801 printer.PrintPositive("@", label->pos()); | 3793 printer.PrintPositive("@", label->Position()); |
3802 os_ << "}\"];\n" | 3794 OS::Print("}\"];\n" |
3803 << " a" << that << " -> n" << that | 3795 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", |
3804 << " [style=dashed, color=grey, arrowhead=none];\n"; | 3796 that, that); |
3805 } | 3797 } |
3806 | 3798 |
3807 | 3799 |
3808 static const bool kPrintDispatchTable = false; | |
3809 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3800 void DotPrinter::VisitChoice(ChoiceNode* that) { |
3810 if (kPrintDispatchTable) { | 3801 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
3811 os_ << " n" << that << " [shape=Mrecord, label=\""; | 3802 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
3812 TableEntryHeaderPrinter header_printer(os_); | 3803 GuardedAlternative alt = that->alternatives()->At(i); |
3813 that->GetTable(ignore_case_)->ForEach(&header_printer); | 3804 OS::Print(" n%p -> n%p", that, alt.node()); |
3814 os_ << "\"]\n"; | |
3815 PrintAttributes(that); | |
3816 TableEntryBodyPrinter body_printer(os_, that); | |
3817 that->GetTable(ignore_case_)->ForEach(&body_printer); | |
3818 } else { | |
3819 os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n"; | |
3820 for (int i = 0; i < that->alternatives()->length(); i++) { | |
3821 GuardedAlternative alt = that->alternatives()->at(i); | |
3822 os_ << " n" << that << " -> n" << alt.node(); | |
3823 } | |
3824 } | 3805 } |
3825 for (int i = 0; i < that->alternatives()->length(); i++) { | 3806 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
3826 GuardedAlternative alt = that->alternatives()->at(i); | 3807 GuardedAlternative alt = that->alternatives()->At(i); |
3827 alt.node()->Accept(this); | 3808 alt.node()->Accept(this); |
3828 } | 3809 } |
3829 } | 3810 } |
3830 | 3811 |
3831 | 3812 |
3832 void DotPrinter::VisitText(TextNode* that) { | 3813 void DotPrinter::VisitText(TextNode* that) { |
3833 Zone* zone = that->zone(); | 3814 OS::Print(" n%p [label=\"", that); |
3834 os_ << " n" << that << " [label=\""; | 3815 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
3835 for (int i = 0; i < that->elements()->length(); i++) { | 3816 if (i > 0) OS::Print(" "); |
3836 if (i > 0) os_ << " "; | 3817 TextElement elm = that->elements()->At(i); |
3837 TextElement elm = that->elements()->at(i); | |
3838 switch (elm.text_type()) { | 3818 switch (elm.text_type()) { |
3839 case TextElement::ATOM: { | 3819 case TextElement::ATOM: { |
3840 Vector<const uc16> data = elm.atom()->data(); | 3820 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); |
3841 for (int i = 0; i < data.length(); i++) { | 3821 for (intptr_t i = 0; i < data->length(); i++) { |
3842 os_ << static_cast<char>(data[i]); | 3822 OS::Print("%c", static_cast<char>(data->At(i))); |
3843 } | 3823 } |
3844 break; | 3824 break; |
3845 } | 3825 } |
3846 case TextElement::CHAR_CLASS: { | 3826 case TextElement::CHAR_CLASS: { |
3847 RegExpCharacterClass* node = elm.char_class(); | 3827 RegExpCharacterClass* node = elm.char_class(); |
3848 os_ << "["; | 3828 OS::Print("["); |
3849 if (node->is_negated()) os_ << "^"; | 3829 if (node->is_negated()) OS::Print("^"); |
3850 for (int j = 0; j < node->ranges(zone)->length(); j++) { | 3830 for (intptr_t j = 0; j < node->ranges()->length(); j++) { |
3851 CharacterRange range = node->ranges(zone)->at(j); | 3831 CharacterRange range = node->ranges()->At(j); |
3852 os_ << AsUC16(range.from()) << "-" << AsUC16(range.to()); | 3832 PrintUtf16(range.from()); |
| 3833 OS::Print("-"); |
| 3834 PrintUtf16(range.to()); |
3853 } | 3835 } |
3854 os_ << "]"; | 3836 OS::Print("]"); |
3855 break; | 3837 break; |
3856 } | 3838 } |
3857 default: | 3839 default: |
3858 UNREACHABLE(); | 3840 UNREACHABLE(); |
3859 } | 3841 } |
3860 } | 3842 } |
3861 os_ << "\", shape=box, peripheries=2];\n"; | 3843 OS::Print("\", shape=box, peripheries=2];\n"); |
3862 PrintAttributes(that); | 3844 PrintAttributes(that); |
3863 os_ << " n" << that << " -> n" << that->on_success() << ";\n"; | 3845 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
3864 Visit(that->on_success()); | 3846 Visit(that->on_success()); |
3865 } | 3847 } |
3866 | 3848 |
3867 | 3849 |
3868 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 3850 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
3869 os_ << " n" << that << " [label=\"$" << that->start_register() << "..$" | 3851 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", |
3870 << that->end_register() << "\", shape=doubleoctagon];\n"; | 3852 that, that->start_register(), that->end_register()); |
3871 PrintAttributes(that); | 3853 PrintAttributes(that); |
3872 os_ << " n" << that << " -> n" << that->on_success() << ";\n"; | 3854 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
3873 Visit(that->on_success()); | 3855 Visit(that->on_success()); |
3874 } | 3856 } |
3875 | 3857 |
3876 | 3858 |
3877 void DotPrinter::VisitEnd(EndNode* that) { | 3859 void DotPrinter::VisitEnd(EndNode* that) { |
3878 os_ << " n" << that << " [style=bold, shape=point];\n"; | 3860 OS::Print(" n%p [style=bold, shape=point];\n", that); |
3879 PrintAttributes(that); | 3861 PrintAttributes(that); |
3880 } | 3862 } |
3881 | 3863 |
3882 | 3864 |
3883 void DotPrinter::VisitAssertion(AssertionNode* that) { | 3865 void DotPrinter::VisitAssertion(AssertionNode* that) { |
3884 os_ << " n" << that << " ["; | 3866 OS::Print(" n%p [", that); |
3885 switch (that->assertion_type()) { | 3867 switch (that->assertion_type()) { |
3886 case AssertionNode::AT_END: | 3868 case AssertionNode::AT_END: |
3887 os_ << "label=\"$\", shape=septagon"; | 3869 OS::Print("label=\"$\", shape=septagon"); |
3888 break; | 3870 break; |
3889 case AssertionNode::AT_START: | 3871 case AssertionNode::AT_START: |
3890 os_ << "label=\"^\", shape=septagon"; | 3872 OS::Print("label=\"^\", shape=septagon"); |
3891 break; | 3873 break; |
3892 case AssertionNode::AT_BOUNDARY: | 3874 case AssertionNode::AT_BOUNDARY: |
3893 os_ << "label=\"\\b\", shape=septagon"; | 3875 OS::Print("label=\"\\b\", shape=septagon"); |
3894 break; | 3876 break; |
3895 case AssertionNode::AT_NON_BOUNDARY: | 3877 case AssertionNode::AT_NON_BOUNDARY: |
3896 os_ << "label=\"\\B\", shape=septagon"; | 3878 OS::Print("label=\"\\B\", shape=septagon"); |
3897 break; | 3879 break; |
3898 case AssertionNode::AFTER_NEWLINE: | 3880 case AssertionNode::AFTER_NEWLINE: |
3899 os_ << "label=\"(?<=\\n)\", shape=septagon"; | 3881 OS::Print("label=\"(?<=\\n)\", shape=septagon"); |
3900 break; | 3882 break; |
3901 } | 3883 } |
3902 os_ << "];\n"; | 3884 OS::Print("];\n"); |
3903 PrintAttributes(that); | 3885 PrintAttributes(that); |
3904 RegExpNode* successor = that->on_success(); | 3886 RegExpNode* successor = that->on_success(); |
3905 os_ << " n" << that << " -> n" << successor << ";\n"; | 3887 OS::Print(" n%p -> n%p;\n", that, successor); |
3906 Visit(successor); | 3888 Visit(successor); |
3907 } | 3889 } |
3908 | 3890 |
3909 | 3891 |
3910 void DotPrinter::VisitAction(ActionNode* that) { | 3892 void DotPrinter::VisitAction(ActionNode* that) { |
3911 os_ << " n" << that << " ["; | 3893 OS::Print(" n%p [", that); |
3912 switch (that->action_type_) { | 3894 switch (that->action_type_) { |
3913 case ActionNode::SET_REGISTER: | 3895 case ActionNode::SET_REGISTER: |
3914 os_ << "label=\"$" << that->data_.u_store_register.reg | 3896 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", |
3915 << ":=" << that->data_.u_store_register.value << "\", shape=octagon"; | 3897 that->data_.u_store_register.reg, |
| 3898 that->data_.u_store_register.value); |
3916 break; | 3899 break; |
3917 case ActionNode::INCREMENT_REGISTER: | 3900 case ActionNode::INCREMENT_REGISTER: |
3918 os_ << "label=\"$" << that->data_.u_increment_register.reg | 3901 OS::Print("label=\"$%" Pd "++\", shape=octagon", |
3919 << "++\", shape=octagon"; | 3902 that->data_.u_increment_register.reg); |
3920 break; | 3903 break; |
3921 case ActionNode::STORE_POSITION: | 3904 case ActionNode::STORE_POSITION: |
3922 os_ << "label=\"$" << that->data_.u_position_register.reg | 3905 OS::Print("label=\"$%" Pd ":=$pos\", shape=octagon", |
3923 << ":=$pos\", shape=octagon"; | 3906 that->data_.u_position_register.reg); |
3924 break; | 3907 break; |
3925 case ActionNode::BEGIN_SUBMATCH: | 3908 case ActionNode::BEGIN_SUBMATCH: |
3926 os_ << "label=\"$" << that->data_.u_submatch.current_position_register | 3909 OS::Print("label=\"$%" Pd ":=$pos,begin\", shape=septagon", |
3927 << ":=$pos,begin\", shape=septagon"; | 3910 that->data_.u_submatch.current_position_register); |
3928 break; | 3911 break; |
3929 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: | 3912 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: |
3930 os_ << "label=\"escape\", shape=septagon"; | 3913 OS::Print("label=\"escape\", shape=septagon"); |
3931 break; | 3914 break; |
3932 case ActionNode::EMPTY_MATCH_CHECK: | 3915 case ActionNode::EMPTY_MATCH_CHECK: |
3933 os_ << "label=\"$" << that->data_.u_empty_match_check.start_register | 3916 OS::Print("label=\"$%" Pd "=$pos?,$%" Pd "<%" Pd "?\", shape=septagon", |
3934 << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register | 3917 that->data_.u_empty_match_check.start_register, |
3935 << "<" << that->data_.u_empty_match_check.repetition_limit | 3918 that->data_.u_empty_match_check.repetition_register, |
3936 << "?\", shape=septagon"; | 3919 that->data_.u_empty_match_check.repetition_limit); |
3937 break; | 3920 break; |
3938 case ActionNode::CLEAR_CAPTURES: { | 3921 case ActionNode::CLEAR_CAPTURES: { |
3939 os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from | 3922 OS::Print("label=\"clear $%" Pd " to $%" Pd "\", shape=septagon", |
3940 << " to $" << that->data_.u_clear_captures.range_to | 3923 that->data_.u_clear_captures.range_from, |
3941 << "\", shape=septagon"; | 3924 that->data_.u_clear_captures.range_to); |
3942 break; | 3925 break; |
3943 } | 3926 } |
3944 } | 3927 } |
3945 os_ << "];\n"; | 3928 OS::Print("];\n"); |
3946 PrintAttributes(that); | 3929 PrintAttributes(that); |
3947 RegExpNode* successor = that->on_success(); | 3930 RegExpNode* successor = that->on_success(); |
3948 os_ << " n" << that << " -> n" << successor << ";\n"; | 3931 OS::Print(" n%p -> n%p;\n", that, successor); |
3949 Visit(successor); | 3932 Visit(successor); |
3950 } | 3933 } |
3951 | 3934 |
3952 | 3935 |
3953 class DispatchTableDumper { | |
3954 public: | |
3955 explicit DispatchTableDumper(OStream& os) : os_(os) {} | |
3956 void Call(uc16 key, DispatchTable::Entry entry); | |
3957 private: | |
3958 OStream& os_; | |
3959 }; | |
3960 | |
3961 | |
3962 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) { | |
3963 os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {"; | |
3964 OutSet* set = entry.out_set(); | |
3965 bool first = true; | |
3966 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { | |
3967 if (set->Get(i)) { | |
3968 if (first) { | |
3969 first = false; | |
3970 } else { | |
3971 os_ << ", "; | |
3972 } | |
3973 os_ << i; | |
3974 } | |
3975 } | |
3976 os_ << "}\n"; | |
3977 } | |
3978 | |
3979 | |
3980 void DispatchTable::Dump() { | |
3981 OFStream os(stderr); | |
3982 DispatchTableDumper dumper(os); | |
3983 tree()->ForEach(&dumper); | |
3984 } | |
3985 | |
3986 | |
3987 void RegExpEngine::DotPrint(const char* label, | 3936 void RegExpEngine::DotPrint(const char* label, |
3988 RegExpNode* node, | 3937 RegExpNode* node, |
3989 bool ignore_case) { | 3938 bool ignore_case) { |
3990 OFStream os(stdout); | 3939 DotPrinter printer(ignore_case); |
3991 DotPrinter printer(os, ignore_case); | |
3992 printer.PrintNode(label, node); | 3940 printer.PrintNode(label, node); |
3993 } | 3941 } |
3994 | 3942 |
3995 | 3943 |
3996 #endif // DEBUG | 3944 #endif // DEBUG |
3997 | 3945 |
3998 | 3946 |
3999 // ------------------------------------------------------------------- | 3947 // ------------------------------------------------------------------- |
4000 // Tree to graph conversion | 3948 // Tree to graph conversion |
4001 | 3949 |
4002 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3950 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
4003 RegExpNode* on_success) { | 3951 RegExpNode* on_success) { |
4004 ZoneList<TextElement>* elms = | 3952 ZoneGrowableArray<TextElement>* elms = |
4005 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); | 3953 new(CI) ZoneGrowableArray<TextElement>(1); |
4006 elms->Add(TextElement::Atom(this), compiler->zone()); | 3954 elms->Add(TextElement::Atom(this)); |
4007 return new(compiler->zone()) TextNode(elms, on_success); | 3955 return new(CI) TextNode(elms, on_success); |
4008 } | 3956 } |
4009 | 3957 |
4010 | 3958 |
4011 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3959 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
4012 RegExpNode* on_success) { | 3960 RegExpNode* on_success) { |
4013 return new(compiler->zone()) TextNode(elements(), on_success); | 3961 ZoneGrowableArray<TextElement>* elms = |
| 3962 new(CI) ZoneGrowableArray<TextElement>(1); |
| 3963 for (intptr_t i = 0; i < elements()->length(); i++) { |
| 3964 elms->Add(elements()->At(i)); |
| 3965 } |
| 3966 return new(CI) TextNode(elms, on_success); |
4014 } | 3967 } |
4015 | 3968 |
4016 | 3969 |
4017 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, | 3970 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, |
4018 const int* special_class, | 3971 const intptr_t* special_class, |
4019 int length) { | 3972 intptr_t length) { |
4020 length--; // Remove final 0x10000. | 3973 length--; // Remove final 0x10000. |
4021 DCHECK(special_class[length] == 0x10000); | 3974 ASSERT(special_class[length] == 0x10000); |
4022 DCHECK(ranges->length() != 0); | 3975 ASSERT(ranges->length() != 0); |
4023 DCHECK(length != 0); | 3976 ASSERT(length != 0); |
4024 DCHECK(special_class[0] != 0); | 3977 ASSERT(special_class[0] != 0); |
4025 if (ranges->length() != (length >> 1) + 1) { | 3978 if (ranges->length() != (length >> 1) + 1) { |
4026 return false; | 3979 return false; |
4027 } | 3980 } |
4028 CharacterRange range = ranges->at(0); | 3981 CharacterRange range = ranges->At(0); |
4029 if (range.from() != 0) { | 3982 if (range.from() != 0) { |
4030 return false; | 3983 return false; |
4031 } | 3984 } |
4032 for (int i = 0; i < length; i += 2) { | 3985 for (intptr_t i = 0; i < length; i += 2) { |
4033 if (special_class[i] != (range.to() + 1)) { | 3986 if (special_class[i] != (range.to() + 1)) { |
4034 return false; | 3987 return false; |
4035 } | 3988 } |
4036 range = ranges->at((i >> 1) + 1); | 3989 range = ranges->At((i >> 1) + 1); |
4037 if (special_class[i+1] != range.from()) { | 3990 if (special_class[i+1] != range.from()) { |
4038 return false; | 3991 return false; |
4039 } | 3992 } |
4040 } | 3993 } |
4041 if (range.to() != 0xffff) { | 3994 if (range.to() != 0xffff) { |
4042 return false; | 3995 return false; |
4043 } | 3996 } |
4044 return true; | 3997 return true; |
4045 } | 3998 } |
4046 | 3999 |
4047 | 4000 |
4048 static bool CompareRanges(ZoneList<CharacterRange>* ranges, | 4001 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, |
4049 const int* special_class, | 4002 const intptr_t* special_class, |
4050 int length) { | 4003 intptr_t length) { |
4051 length--; // Remove final 0x10000. | 4004 length--; // Remove final 0x10000. |
4052 DCHECK(special_class[length] == 0x10000); | 4005 ASSERT(special_class[length] == 0x10000); |
4053 if (ranges->length() * 2 != length) { | 4006 if (ranges->length() * 2 != length) { |
4054 return false; | 4007 return false; |
4055 } | 4008 } |
4056 for (int i = 0; i < length; i += 2) { | 4009 for (intptr_t i = 0; i < length; i += 2) { |
4057 CharacterRange range = ranges->at(i >> 1); | 4010 CharacterRange range = ranges->At(i >> 1); |
4058 if (range.from() != special_class[i] || | 4011 if (range.from() != special_class[i] || |
4059 range.to() != special_class[i + 1] - 1) { | 4012 range.to() != special_class[i + 1] - 1) { |
4060 return false; | 4013 return false; |
4061 } | 4014 } |
4062 } | 4015 } |
4063 return true; | 4016 return true; |
4064 } | 4017 } |
4065 | 4018 |
4066 | 4019 |
4067 bool RegExpCharacterClass::is_standard(Zone* zone) { | 4020 bool RegExpCharacterClass::is_standard() { |
4068 // TODO(lrn): Remove need for this function, by not throwing away information | 4021 // TODO(lrn): Remove need for this function, by not throwing away information |
4069 // along the way. | 4022 // along the way. |
4070 if (is_negated_) { | 4023 if (is_negated_) { |
4071 return false; | 4024 return false; |
4072 } | 4025 } |
4073 if (set_.is_standard()) { | 4026 if (set_.is_standard()) { |
4074 return true; | 4027 return true; |
4075 } | 4028 } |
4076 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { | 4029 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
4077 set_.set_standard_set_type('s'); | 4030 set_.set_standard_set_type('s'); |
4078 return true; | 4031 return true; |
4079 } | 4032 } |
4080 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { | 4033 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
4081 set_.set_standard_set_type('S'); | 4034 set_.set_standard_set_type('S'); |
4082 return true; | 4035 return true; |
4083 } | 4036 } |
4084 if (CompareInverseRanges(set_.ranges(zone), | 4037 if (CompareInverseRanges(set_.ranges(), |
4085 kLineTerminatorRanges, | 4038 kLineTerminatorRanges, |
4086 kLineTerminatorRangeCount)) { | 4039 kLineTerminatorRangeCount)) { |
4087 set_.set_standard_set_type('.'); | 4040 set_.set_standard_set_type('.'); |
4088 return true; | 4041 return true; |
4089 } | 4042 } |
4090 if (CompareRanges(set_.ranges(zone), | 4043 if (CompareRanges(set_.ranges(), |
4091 kLineTerminatorRanges, | 4044 kLineTerminatorRanges, |
4092 kLineTerminatorRangeCount)) { | 4045 kLineTerminatorRangeCount)) { |
4093 set_.set_standard_set_type('n'); | 4046 set_.set_standard_set_type('n'); |
4094 return true; | 4047 return true; |
4095 } | 4048 } |
4096 if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { | 4049 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
4097 set_.set_standard_set_type('w'); | 4050 set_.set_standard_set_type('w'); |
4098 return true; | 4051 return true; |
4099 } | 4052 } |
4100 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { | 4053 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
4101 set_.set_standard_set_type('W'); | 4054 set_.set_standard_set_type('W'); |
4102 return true; | 4055 return true; |
4103 } | 4056 } |
4104 return false; | 4057 return false; |
4105 } | 4058 } |
4106 | 4059 |
4107 | 4060 |
4108 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4061 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4109 RegExpNode* on_success) { | 4062 RegExpNode* on_success) { |
4110 return new(compiler->zone()) TextNode(this, on_success); | 4063 return new(CI) TextNode(this, on_success); |
4111 } | 4064 } |
4112 | 4065 |
4113 | 4066 |
4114 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 4067 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
4115 RegExpNode* on_success) { | 4068 RegExpNode* on_success) { |
4116 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 4069 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
4117 int length = alternatives->length(); | 4070 intptr_t length = alternatives->length(); |
4118 ChoiceNode* result = | 4071 ChoiceNode* result = |
4119 new(compiler->zone()) ChoiceNode(length, compiler->zone()); | 4072 new(CI) ChoiceNode(length, CI); |
4120 for (int i = 0; i < length; i++) { | 4073 for (intptr_t i = 0; i < length; i++) { |
4121 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 4074 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, |
4122 on_success)); | 4075 on_success)); |
4123 result->AddAlternative(alternative); | 4076 result->AddAlternative(alternative); |
4124 } | 4077 } |
4125 return result; | 4078 return result; |
4126 } | 4079 } |
4127 | 4080 |
4128 | 4081 |
4129 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 4082 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
4130 RegExpNode* on_success) { | 4083 RegExpNode* on_success) { |
4131 return ToNode(min(), | 4084 return ToNode(min(), |
4132 max(), | 4085 max(), |
4133 is_greedy(), | 4086 is_greedy(), |
4134 body(), | 4087 body(), |
4135 compiler, | 4088 compiler, |
4136 on_success); | 4089 on_success); |
4137 } | 4090 } |
4138 | 4091 |
4139 | 4092 |
4140 // Scoped object to keep track of how much we unroll quantifier loops in the | 4093 // Scoped object to keep track of how much we unroll quantifier loops in the |
4141 // regexp graph generator. | 4094 // regexp graph generator. |
4142 class RegExpExpansionLimiter { | 4095 class RegExpExpansionLimiter : public ValueObject { |
4143 public: | 4096 public: |
4144 static const int kMaxExpansionFactor = 6; | 4097 static const intptr_t kMaxExpansionFactor = 6; |
4145 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) | 4098 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) |
4146 : compiler_(compiler), | 4099 : compiler_(compiler), |
4147 saved_expansion_factor_(compiler->current_expansion_factor()), | 4100 saved_expansion_factor_(compiler->current_expansion_factor()), |
4148 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { | 4101 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { |
4149 DCHECK(factor > 0); | 4102 ASSERT(factor > 0); |
4150 if (ok_to_expand_) { | 4103 if (ok_to_expand_) { |
4151 if (factor > kMaxExpansionFactor) { | 4104 if (factor > kMaxExpansionFactor) { |
4152 // Avoid integer overflow of the current expansion factor. | 4105 // Avoid integer overflow of the current expansion factor. |
4153 ok_to_expand_ = false; | 4106 ok_to_expand_ = false; |
4154 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); | 4107 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); |
4155 } else { | 4108 } else { |
4156 int new_factor = saved_expansion_factor_ * factor; | 4109 intptr_t new_factor = saved_expansion_factor_ * factor; |
4157 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); | 4110 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); |
4158 compiler->set_current_expansion_factor(new_factor); | 4111 compiler->set_current_expansion_factor(new_factor); |
4159 } | 4112 } |
4160 } | 4113 } |
4161 } | 4114 } |
4162 | 4115 |
4163 ~RegExpExpansionLimiter() { | 4116 ~RegExpExpansionLimiter() { |
4164 compiler_->set_current_expansion_factor(saved_expansion_factor_); | 4117 compiler_->set_current_expansion_factor(saved_expansion_factor_); |
4165 } | 4118 } |
4166 | 4119 |
4167 bool ok_to_expand() { return ok_to_expand_; } | 4120 bool ok_to_expand() { return ok_to_expand_; } |
4168 | 4121 |
4169 private: | 4122 private: |
4170 RegExpCompiler* compiler_; | 4123 RegExpCompiler* compiler_; |
4171 int saved_expansion_factor_; | 4124 intptr_t saved_expansion_factor_; |
4172 bool ok_to_expand_; | 4125 bool ok_to_expand_; |
4173 | 4126 |
4174 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | 4127 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
4175 }; | 4128 }; |
4176 | 4129 |
4177 | 4130 |
4178 RegExpNode* RegExpQuantifier::ToNode(int min, | 4131 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, |
4179 int max, | 4132 intptr_t max, |
4180 bool is_greedy, | 4133 bool is_greedy, |
4181 RegExpTree* body, | 4134 RegExpTree* body, |
4182 RegExpCompiler* compiler, | 4135 RegExpCompiler* compiler, |
4183 RegExpNode* on_success, | 4136 RegExpNode* on_success, |
4184 bool not_at_start) { | 4137 bool not_at_start) { |
4185 // x{f, t} becomes this: | 4138 // x{f, t} becomes this: |
4186 // | 4139 // |
4187 // (r++)<-. | 4140 // (r++)<-. |
4188 // | ` | 4141 // | ` |
4189 // | (x) | 4142 // | (x) |
4190 // v ^ | 4143 // v ^ |
4191 // (r=0)-->(?)---/ [if r < t] | 4144 // (r=0)-->(?)---/ [if r < t] |
4192 // | | 4145 // | |
4193 // [if r >= f] \----> ... | 4146 // [if r >= f] \----> ... |
4194 // | 4147 // |
4195 | 4148 |
4196 // 15.10.2.5 RepeatMatcher algorithm. | 4149 // 15.10.2.5 RepeatMatcher algorithm. |
4197 // The parser has already eliminated the case where max is 0. In the case | 4150 // The parser has already eliminated the case where max is 0. In the case |
4198 // where max_match is zero the parser has removed the quantifier if min was | 4151 // where max_match is zero the parser has removed the quantifier if min was |
4199 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. | 4152 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. |
4200 | 4153 |
4201 // If we know that we cannot match zero length then things are a little | 4154 // If we know that we cannot match zero length then things are a little |
4202 // simpler since we don't need to make the special zero length match check | 4155 // simpler since we don't need to make the special zero length match check |
4203 // from step 2.1. If the min and max are small we can unroll a little in | 4156 // from step 2.1. If the min and max are small we can unroll a little in |
4204 // this case. | 4157 // this case. |
4205 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} | 4158 // Unroll (foo)+ and (foo){3,} |
4206 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} | 4159 static const intptr_t kMaxUnrolledMinMatches = 3; |
| 4160 // Unroll (foo)? and (foo){x,3} |
| 4161 static const intptr_t kMaxUnrolledMaxMatches = 3; |
4207 if (max == 0) return on_success; // This can happen due to recursion. | 4162 if (max == 0) return on_success; // This can happen due to recursion. |
4208 bool body_can_be_empty = (body->min_match() == 0); | 4163 bool body_can_be_empty = (body->min_match() == 0); |
4209 int body_start_reg = RegExpCompiler::kNoRegister; | 4164 intptr_t body_start_reg = RegExpCompiler::kNoRegister; |
4210 Interval capture_registers = body->CaptureRegisters(); | 4165 Interval capture_registers = body->CaptureRegisters(); |
4211 bool needs_capture_clearing = !capture_registers.is_empty(); | 4166 bool needs_capture_clearing = !capture_registers.is_empty(); |
4212 Zone* zone = compiler->zone(); | 4167 Isolate* isolate = compiler->isolate(); |
4213 | 4168 |
4214 if (body_can_be_empty) { | 4169 if (body_can_be_empty) { |
4215 body_start_reg = compiler->AllocateRegister(); | 4170 body_start_reg = compiler->AllocateRegister(); |
4216 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { | 4171 } else if (kRegexpOptimization && !needs_capture_clearing) { |
4217 // Only unroll if there are no captures and the body can't be | 4172 // Only unroll if there are no captures and the body can't be |
4218 // empty. | 4173 // empty. |
4219 { | 4174 { |
4220 RegExpExpansionLimiter limiter( | 4175 RegExpExpansionLimiter limiter( |
4221 compiler, min + ((max != min) ? 1 : 0)); | 4176 compiler, min + ((max != min) ? 1 : 0)); |
4222 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { | 4177 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
4223 int new_max = (max == kInfinity) ? max : max - min; | 4178 intptr_t new_max = (max == kInfinity) ? max : max - min; |
4224 // Recurse once to get the loop or optional matches after the fixed | 4179 // Recurse once to get the loop or optional matches after the fixed |
4225 // ones. | 4180 // ones. |
4226 RegExpNode* answer = ToNode( | 4181 RegExpNode* answer = ToNode( |
4227 0, new_max, is_greedy, body, compiler, on_success, true); | 4182 0, new_max, is_greedy, body, compiler, on_success, true); |
4228 // Unroll the forced matches from 0 to min. This can cause chains of | 4183 // Unroll the forced matches from 0 to min. This can cause chains of |
4229 // TextNodes (which the parser does not generate). These should be | 4184 // TextNodes (which the parser does not generate). These should be |
4230 // combined if it turns out they hinder good code generation. | 4185 // combined if it turns out they hinder good code generation. |
4231 for (int i = 0; i < min; i++) { | 4186 for (intptr_t i = 0; i < min; i++) { |
4232 answer = body->ToNode(compiler, answer); | 4187 answer = body->ToNode(compiler, answer); |
4233 } | 4188 } |
4234 return answer; | 4189 return answer; |
4235 } | 4190 } |
4236 } | 4191 } |
4237 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 4192 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
4238 DCHECK(max > 0); // Due to the 'if' above. | 4193 ASSERT(max > 0); // Due to the 'if' above. |
4239 RegExpExpansionLimiter limiter(compiler, max); | 4194 RegExpExpansionLimiter limiter(compiler, max); |
4240 if (limiter.ok_to_expand()) { | 4195 if (limiter.ok_to_expand()) { |
4241 // Unroll the optional matches up to max. | 4196 // Unroll the optional matches up to max. |
4242 RegExpNode* answer = on_success; | 4197 RegExpNode* answer = on_success; |
4243 for (int i = 0; i < max; i++) { | 4198 for (intptr_t i = 0; i < max; i++) { |
4244 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); | 4199 ChoiceNode* alternation = new(isolate) ChoiceNode(2, isolate); |
4245 if (is_greedy) { | 4200 if (is_greedy) { |
4246 alternation->AddAlternative( | 4201 alternation->AddAlternative( |
4247 GuardedAlternative(body->ToNode(compiler, answer))); | 4202 GuardedAlternative(body->ToNode(compiler, answer))); |
4248 alternation->AddAlternative(GuardedAlternative(on_success)); | 4203 alternation->AddAlternative(GuardedAlternative(on_success)); |
4249 } else { | 4204 } else { |
4250 alternation->AddAlternative(GuardedAlternative(on_success)); | 4205 alternation->AddAlternative(GuardedAlternative(on_success)); |
4251 alternation->AddAlternative( | 4206 alternation->AddAlternative( |
4252 GuardedAlternative(body->ToNode(compiler, answer))); | 4207 GuardedAlternative(body->ToNode(compiler, answer))); |
4253 } | 4208 } |
4254 answer = alternation; | 4209 answer = alternation; |
4255 if (not_at_start) alternation->set_not_at_start(); | 4210 if (not_at_start) alternation->set_not_at_start(); |
4256 } | 4211 } |
4257 return answer; | 4212 return answer; |
4258 } | 4213 } |
4259 } | 4214 } |
4260 } | 4215 } |
4261 bool has_min = min > 0; | 4216 bool has_min = min > 0; |
4262 bool has_max = max < RegExpTree::kInfinity; | 4217 bool has_max = max < RegExpTree::kInfinity; |
4263 bool needs_counter = has_min || has_max; | 4218 bool needs_counter = has_min || has_max; |
4264 int reg_ctr = needs_counter | 4219 intptr_t reg_ctr = needs_counter |
4265 ? compiler->AllocateRegister() | 4220 ? compiler->AllocateRegister() |
4266 : RegExpCompiler::kNoRegister; | 4221 : RegExpCompiler::kNoRegister; |
4267 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, | 4222 LoopChoiceNode* center = new(isolate) LoopChoiceNode(body->min_match() == 0, |
4268 zone); | 4223 isolate); |
4269 if (not_at_start) center->set_not_at_start(); | 4224 if (not_at_start) center->set_not_at_start(); |
4270 RegExpNode* loop_return = needs_counter | 4225 RegExpNode* loop_return = needs_counter |
4271 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 4226 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
4272 : static_cast<RegExpNode*>(center); | 4227 : static_cast<RegExpNode*>(center); |
4273 if (body_can_be_empty) { | 4228 if (body_can_be_empty) { |
4274 // If the body can be empty we need to check if it was and then | 4229 // If the body can be empty we need to check if it was and then |
4275 // backtrack. | 4230 // backtrack. |
4276 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 4231 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, |
4277 reg_ctr, | 4232 reg_ctr, |
4278 min, | 4233 min, |
4279 loop_return); | 4234 loop_return); |
4280 } | 4235 } |
4281 RegExpNode* body_node = body->ToNode(compiler, loop_return); | 4236 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
4282 if (body_can_be_empty) { | 4237 if (body_can_be_empty) { |
4283 // If the body can be empty we need to store the start position | 4238 // If the body can be empty we need to store the start position |
4284 // so we can bail out if it was empty. | 4239 // so we can bail out if it was empty. |
4285 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); | 4240 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); |
4286 } | 4241 } |
4287 if (needs_capture_clearing) { | 4242 if (needs_capture_clearing) { |
4288 // Before entering the body of this loop we need to clear captures. | 4243 // Before entering the body of this loop we need to clear captures. |
4289 body_node = ActionNode::ClearCaptures(capture_registers, body_node); | 4244 body_node = ActionNode::ClearCaptures(capture_registers, body_node); |
4290 } | 4245 } |
4291 GuardedAlternative body_alt(body_node); | 4246 GuardedAlternative body_alt(body_node); |
4292 if (has_max) { | 4247 if (has_max) { |
4293 Guard* body_guard = | 4248 Guard* body_guard = |
4294 new(zone) Guard(reg_ctr, Guard::LT, max); | 4249 new(isolate) Guard(reg_ctr, Guard::LT, max); |
4295 body_alt.AddGuard(body_guard, zone); | 4250 body_alt.AddGuard(body_guard, isolate); |
4296 } | 4251 } |
4297 GuardedAlternative rest_alt(on_success); | 4252 GuardedAlternative rest_alt(on_success); |
4298 if (has_min) { | 4253 if (has_min) { |
4299 Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min); | 4254 Guard* rest_guard = new(isolate) Guard(reg_ctr, Guard::GEQ, min); |
4300 rest_alt.AddGuard(rest_guard, zone); | 4255 rest_alt.AddGuard(rest_guard, isolate); |
4301 } | 4256 } |
4302 if (is_greedy) { | 4257 if (is_greedy) { |
4303 center->AddLoopAlternative(body_alt); | 4258 center->AddLoopAlternative(body_alt); |
4304 center->AddContinueAlternative(rest_alt); | 4259 center->AddContinueAlternative(rest_alt); |
4305 } else { | 4260 } else { |
4306 center->AddContinueAlternative(rest_alt); | 4261 center->AddContinueAlternative(rest_alt); |
4307 center->AddLoopAlternative(body_alt); | 4262 center->AddLoopAlternative(body_alt); |
4308 } | 4263 } |
4309 if (needs_counter) { | 4264 if (needs_counter) { |
4310 return ActionNode::SetRegister(reg_ctr, 0, center); | 4265 return ActionNode::SetRegister(reg_ctr, 0, center); |
4311 } else { | 4266 } else { |
4312 return center; | 4267 return center; |
4313 } | 4268 } |
4314 } | 4269 } |
4315 | 4270 |
4316 | 4271 |
4317 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 4272 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
4318 RegExpNode* on_success) { | 4273 RegExpNode* on_success) { |
4319 NodeInfo info; | |
4320 Zone* zone = compiler->zone(); | |
4321 | |
4322 switch (assertion_type()) { | 4274 switch (assertion_type()) { |
4323 case START_OF_LINE: | 4275 case START_OF_LINE: |
4324 return AssertionNode::AfterNewline(on_success); | 4276 return AssertionNode::AfterNewline(on_success); |
4325 case START_OF_INPUT: | 4277 case START_OF_INPUT: |
4326 return AssertionNode::AtStart(on_success); | 4278 return AssertionNode::AtStart(on_success); |
4327 case BOUNDARY: | 4279 case BOUNDARY: |
4328 return AssertionNode::AtBoundary(on_success); | 4280 return AssertionNode::AtBoundary(on_success); |
4329 case NON_BOUNDARY: | 4281 case NON_BOUNDARY: |
4330 return AssertionNode::AtNonBoundary(on_success); | 4282 return AssertionNode::AtNonBoundary(on_success); |
4331 case END_OF_INPUT: | 4283 case END_OF_INPUT: |
4332 return AssertionNode::AtEnd(on_success); | 4284 return AssertionNode::AtEnd(on_success); |
4333 case END_OF_LINE: { | 4285 case END_OF_LINE: { |
4334 // Compile $ in multiline regexps as an alternation with a positive | 4286 // Compile $ in multiline regexps as an alternation with a positive |
4335 // lookahead in one side and an end-of-input on the other side. | 4287 // lookahead in one side and an end-of-input on the other side. |
4336 // We need two registers for the lookahead. | 4288 // We need two registers for the lookahead. |
4337 int stack_pointer_register = compiler->AllocateRegister(); | 4289 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
4338 int position_register = compiler->AllocateRegister(); | 4290 intptr_t position_register = compiler->AllocateRegister(); |
4339 // The ChoiceNode to distinguish between a newline and end-of-input. | 4291 // The ChoiceNode to distinguish between a newline and end-of-input. |
4340 ChoiceNode* result = new(zone) ChoiceNode(2, zone); | 4292 ChoiceNode* result = new ChoiceNode(2, on_success->isolate()); |
4341 // Create a newline atom. | 4293 // Create a newline atom. |
4342 ZoneList<CharacterRange>* newline_ranges = | 4294 ZoneGrowableArray<CharacterRange>* newline_ranges = |
4343 new(zone) ZoneList<CharacterRange>(3, zone); | 4295 new ZoneGrowableArray<CharacterRange>(3); |
4344 CharacterRange::AddClassEscape('n', newline_ranges, zone); | 4296 CharacterRange::AddClassEscape('n', newline_ranges); |
4345 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); | 4297 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); |
4346 TextNode* newline_matcher = new(zone) TextNode( | 4298 TextNode* newline_matcher = new TextNode( |
4347 newline_atom, | 4299 newline_atom, |
4348 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
4349 position_register, | 4301 position_register, |
4350 0, // No captures inside. | 4302 0, // No captures inside. |
4351 -1, // Ignored if no captures. | 4303 -1, // Ignored if no captures. |
4352 on_success)); | 4304 on_success)); |
4353 // Create an end-of-input matcher. | 4305 // Create an end-of-input matcher. |
4354 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 4306 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
4355 stack_pointer_register, | 4307 stack_pointer_register, |
4356 position_register, | 4308 position_register, |
4357 newline_matcher); | 4309 newline_matcher); |
4358 // Add the two alternatives to the ChoiceNode. | 4310 // Add the two alternatives to the ChoiceNode. |
4359 GuardedAlternative eol_alternative(end_of_line); | 4311 GuardedAlternative eol_alternative(end_of_line); |
4360 result->AddAlternative(eol_alternative); | 4312 result->AddAlternative(eol_alternative); |
4361 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
4362 result->AddAlternative(end_alternative); | 4314 result->AddAlternative(end_alternative); |
4363 return result; | 4315 return result; |
4364 } | 4316 } |
4365 default: | 4317 default: |
4366 UNREACHABLE(); | 4318 UNREACHABLE(); |
4367 } | 4319 } |
4368 return on_success; | 4320 return on_success; |
4369 } | 4321 } |
4370 | 4322 |
4371 | 4323 |
4372 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
4373 RegExpNode* on_success) { | 4325 RegExpNode* on_success) { |
4374 return new(compiler->zone()) | 4326 return new(CI) |
4375 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4327 BackReferenceNode(RegExpCapture::StartRegister(index()), |
4376 RegExpCapture::EndRegister(index()), | 4328 RegExpCapture::EndRegister(index()), |
4377 on_success); | 4329 on_success); |
4378 } | 4330 } |
4379 | 4331 |
4380 | 4332 |
4381 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
4382 RegExpNode* on_success) { | 4334 RegExpNode* on_success) { |
4383 return on_success; | 4335 return on_success; |
4384 } | 4336 } |
4385 | 4337 |
4386 | 4338 |
4387 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
4388 RegExpNode* on_success) { | 4340 RegExpNode* on_success) { |
4389 int stack_pointer_register = compiler->AllocateRegister(); | 4341 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
4390 int position_register = compiler->AllocateRegister(); | 4342 intptr_t position_register = compiler->AllocateRegister(); |
4391 | 4343 |
4392 const int registers_per_capture = 2; | 4344 const intptr_t registers_per_capture = 2; |
4393 const int register_of_first_capture = 2; | 4345 const intptr_t register_of_first_capture = 2; |
4394 int register_count = capture_count_ * registers_per_capture; | 4346 intptr_t register_count = capture_count_ * registers_per_capture; |
4395 int register_start = | 4347 intptr_t register_start = |
4396 register_of_first_capture + capture_from_ * registers_per_capture; | 4348 register_of_first_capture + capture_from_ * registers_per_capture; |
4397 | 4349 |
4398 RegExpNode* success; | 4350 RegExpNode* success; |
4399 if (is_positive()) { | 4351 if (is_positive()) { |
4400 RegExpNode* node = ActionNode::BeginSubmatch( | 4352 RegExpNode* node = ActionNode::BeginSubmatch( |
4401 stack_pointer_register, | 4353 stack_pointer_register, |
4402 position_register, | 4354 position_register, |
4403 body()->ToNode( | 4355 body()->ToNode( |
4404 compiler, | 4356 compiler, |
4405 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
4406 position_register, | 4358 position_register, |
4407 register_count, | 4359 register_count, |
4408 register_start, | 4360 register_start, |
4409 on_success))); | 4361 on_success))); |
4410 return node; | 4362 return node; |
4411 } else { | 4363 } else { |
4412 // We use a ChoiceNode for a negative lookahead because it has most of | 4364 // We use a ChoiceNode for a negative lookahead because it has most of |
4413 // the characteristics we need. It has the body of the lookahead as its | 4365 // the characteristics we need. It has the body of the lookahead as its |
4414 // first alternative and the expression after the lookahead of the second | 4366 // first alternative and the expression after the lookahead of the second |
4415 // alternative. If the first alternative succeeds then the | 4367 // alternative. If the first alternative succeeds then the |
4416 // NegativeSubmatchSuccess will unwind the stack including everything the | 4368 // NegativeSubmatchSuccess will unwind the stack including everything the |
4417 // choice node set up and backtrack. If the first alternative fails then | 4369 // choice node set up and backtrack. If the first alternative fails then |
4418 // the second alternative is tried, which is exactly the desired result | 4370 // the second alternative is tried, which is exactly the desired result |
4419 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 4371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
4420 // ChoiceNode that knows to ignore the first exit when calculating quick | 4372 // ChoiceNode that knows to ignore the first exit when calculating quick |
4421 // checks. | 4373 // checks. |
4422 Zone* zone = compiler->zone(); | |
4423 | 4374 |
4424 GuardedAlternative body_alt( | 4375 GuardedAlternative body_alt( |
4425 body()->ToNode( | 4376 body()->ToNode( |
4426 compiler, | 4377 compiler, |
4427 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, | 4378 success = new(CI) NegativeSubmatchSuccess(stack_pointer_register, |
4428 position_register, | 4379 position_register, |
4429 register_count, | 4380 register_count, |
4430 register_start, | 4381 register_start, |
4431 zone))); | 4382 CI))); |
4432 ChoiceNode* choice_node = | 4383 ChoiceNode* choice_node = |
4433 new(zone) NegativeLookaheadChoiceNode(body_alt, | 4384 new(CI) NegativeLookaheadChoiceNode(body_alt, |
4434 GuardedAlternative(on_success), | 4385 GuardedAlternative(on_success), |
4435 zone); | 4386 CI); |
4436 return ActionNode::BeginSubmatch(stack_pointer_register, | 4387 return ActionNode::BeginSubmatch(stack_pointer_register, |
4437 position_register, | 4388 position_register, |
4438 choice_node); | 4389 choice_node); |
4439 } | 4390 } |
4440 } | 4391 } |
4441 | 4392 |
4442 | 4393 |
4443 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4394 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
4444 RegExpNode* on_success) { | 4395 RegExpNode* on_success) { |
4445 return ToNode(body(), index(), compiler, on_success); | 4396 return ToNode(body(), index(), compiler, on_success); |
4446 } | 4397 } |
4447 | 4398 |
4448 | 4399 |
4449 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 4400 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
4450 int index, | 4401 intptr_t index, |
4451 RegExpCompiler* compiler, | 4402 RegExpCompiler* compiler, |
4452 RegExpNode* on_success) { | 4403 RegExpNode* on_success) { |
4453 int start_reg = RegExpCapture::StartRegister(index); | 4404 intptr_t start_reg = RegExpCapture::StartRegister(index); |
4454 int end_reg = RegExpCapture::EndRegister(index); | 4405 intptr_t end_reg = RegExpCapture::EndRegister(index); |
4455 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 4406 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
4456 RegExpNode* body_node = body->ToNode(compiler, store_end); | 4407 RegExpNode* body_node = body->ToNode(compiler, store_end); |
4457 return ActionNode::StorePosition(start_reg, true, body_node); | 4408 return ActionNode::StorePosition(start_reg, true, body_node); |
4458 } | 4409 } |
4459 | 4410 |
4460 | 4411 |
4461 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 4412 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
4462 RegExpNode* on_success) { | 4413 RegExpNode* on_success) { |
4463 ZoneList<RegExpTree*>* children = nodes(); | 4414 ZoneGrowableArray<RegExpTree*>* children = nodes(); |
4464 RegExpNode* current = on_success; | 4415 RegExpNode* current = on_success; |
4465 for (int i = children->length() - 1; i >= 0; i--) { | 4416 for (intptr_t i = children->length() - 1; i >= 0; i--) { |
4466 current = children->at(i)->ToNode(compiler, current); | 4417 current = children->At(i)->ToNode(compiler, current); |
4467 } | 4418 } |
4468 return current; | 4419 return current; |
4469 } | 4420 } |
4470 | 4421 |
4471 | 4422 |
4472 static void AddClass(const int* elmv, | 4423 static void AddClass(const intptr_t* elmv, |
4473 int elmc, | 4424 intptr_t elmc, |
4474 ZoneList<CharacterRange>* ranges, | 4425 ZoneGrowableArray<CharacterRange>* ranges) { |
4475 Zone* zone) { | |
4476 elmc--; | 4426 elmc--; |
4477 DCHECK(elmv[elmc] == 0x10000); | 4427 ASSERT(elmv[elmc] == 0x10000); |
4478 for (int i = 0; i < elmc; i += 2) { | 4428 for (intptr_t i = 0; i < elmc; i += 2) { |
4479 DCHECK(elmv[i] < elmv[i + 1]); | 4429 ASSERT(elmv[i] < elmv[i + 1]); |
4480 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone); | 4430 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
4481 } | 4431 } |
4482 } | 4432 } |
4483 | 4433 |
4484 | 4434 |
4485 static void AddClassNegated(const int *elmv, | 4435 static void AddClassNegated(const intptr_t *elmv, |
4486 int elmc, | 4436 intptr_t elmc, |
4487 ZoneList<CharacterRange>* ranges, | 4437 ZoneGrowableArray<CharacterRange>* ranges) { |
4488 Zone* zone) { | |
4489 elmc--; | 4438 elmc--; |
4490 DCHECK(elmv[elmc] == 0x10000); | 4439 ASSERT(elmv[elmc] == 0x10000); |
4491 DCHECK(elmv[0] != 0x0000); | 4440 ASSERT(elmv[0] != 0x0000); |
4492 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit); | 4441 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); |
4493 uc16 last = 0x0000; | 4442 uint16_t last = 0x0000; |
4494 for (int i = 0; i < elmc; i += 2) { | 4443 for (intptr_t i = 0; i < elmc; i += 2) { |
4495 DCHECK(last <= elmv[i] - 1); | 4444 ASSERT(last <= elmv[i] - 1); |
4496 DCHECK(elmv[i] < elmv[i + 1]); | 4445 ASSERT(elmv[i] < elmv[i + 1]); |
4497 ranges->Add(CharacterRange(last, elmv[i] - 1), zone); | 4446 ranges->Add(CharacterRange(last, elmv[i] - 1)); |
4498 last = elmv[i + 1]; | 4447 last = elmv[i + 1]; |
4499 } | 4448 } |
4500 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone); | 4449 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); |
4501 } | 4450 } |
4502 | 4451 |
4503 | 4452 |
4504 void CharacterRange::AddClassEscape(uc16 type, | 4453 void CharacterRange::AddClassEscape(uint16_t type, |
4505 ZoneList<CharacterRange>* ranges, | 4454 ZoneGrowableArray<CharacterRange>* ranges) { |
4506 Zone* zone) { | |
4507 switch (type) { | 4455 switch (type) { |
4508 case 's': | 4456 case 's': |
4509 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); | 4457 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); |
4510 break; | 4458 break; |
4511 case 'S': | 4459 case 'S': |
4512 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone); | 4460 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); |
4513 break; | 4461 break; |
4514 case 'w': | 4462 case 'w': |
4515 AddClass(kWordRanges, kWordRangeCount, ranges, zone); | 4463 AddClass(kWordRanges, kWordRangeCount, ranges); |
4516 break; | 4464 break; |
4517 case 'W': | 4465 case 'W': |
4518 AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone); | 4466 AddClassNegated(kWordRanges, kWordRangeCount, ranges); |
4519 break; | 4467 break; |
4520 case 'd': | 4468 case 'd': |
4521 AddClass(kDigitRanges, kDigitRangeCount, ranges, zone); | 4469 AddClass(kDigitRanges, kDigitRangeCount, ranges); |
4522 break; | 4470 break; |
4523 case 'D': | 4471 case 'D': |
4524 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone); | 4472 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); |
4525 break; | 4473 break; |
4526 case '.': | 4474 case '.': |
4527 AddClassNegated(kLineTerminatorRanges, | 4475 AddClassNegated(kLineTerminatorRanges, |
4528 kLineTerminatorRangeCount, | 4476 kLineTerminatorRangeCount, |
4529 ranges, | 4477 ranges); |
4530 zone); | |
4531 break; | 4478 break; |
4532 // This is not a character range as defined by the spec but a | 4479 // This is not a character range as defined by the spec but a |
4533 // convenient shorthand for a character class that matches any | 4480 // convenient shorthand for a character class that matches any |
4534 // character. | 4481 // character. |
4535 case '*': | 4482 case '*': |
4536 ranges->Add(CharacterRange::Everything(), zone); | 4483 ranges->Add(CharacterRange::Everything()); |
4537 break; | 4484 break; |
4538 // This is the set of characters matched by the $ and ^ symbols | 4485 // This is the set of characters matched by the $ and ^ symbols |
4539 // in multiline mode. | 4486 // in multiline mode. |
4540 case 'n': | 4487 case 'n': |
4541 AddClass(kLineTerminatorRanges, | 4488 AddClass(kLineTerminatorRanges, |
4542 kLineTerminatorRangeCount, | 4489 kLineTerminatorRangeCount, |
4543 ranges, | 4490 ranges); |
4544 zone); | |
4545 break; | 4491 break; |
4546 default: | 4492 default: |
4547 UNREACHABLE(); | 4493 UNREACHABLE(); |
4548 } | 4494 } |
4549 } | 4495 } |
4550 | 4496 |
4551 | 4497 |
4552 Vector<const int> CharacterRange::GetWordBounds() { | 4498 void CharacterRange::AddCaseEquivalents( |
4553 return Vector<const int>(kWordRanges, kWordRangeCount - 1); | 4499 ZoneGrowableArray<CharacterRange>* ranges, |
4554 } | 4500 bool is_one_byte, |
| 4501 Isolate* isolate) { |
| 4502 uint16_t bottom = from(); |
| 4503 uint16_t top = to(); |
| 4504 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
| 4505 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; |
| 4506 if (top > Symbols::kMaxOneCharCodeSymbol) { |
| 4507 top = Symbols::kMaxOneCharCodeSymbol; |
| 4508 } |
| 4509 } |
4555 | 4510 |
4556 | 4511 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
4557 class CharacterRangeSplitter { | 4512 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; |
4558 public: | 4513 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4559 CharacterRangeSplitter(ZoneList<CharacterRange>** included, | |
4560 ZoneList<CharacterRange>** excluded, | |
4561 Zone* zone) | |
4562 : included_(included), | |
4563 excluded_(excluded), | |
4564 zone_(zone) { } | |
4565 void Call(uc16 from, DispatchTable::Entry entry); | |
4566 | |
4567 static const int kInBase = 0; | |
4568 static const int kInOverlay = 1; | |
4569 | |
4570 private: | |
4571 ZoneList<CharacterRange>** included_; | |
4572 ZoneList<CharacterRange>** excluded_; | |
4573 Zone* zone_; | |
4574 }; | |
4575 | |
4576 | |
4577 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) { | |
4578 if (!entry.out_set()->Get(kInBase)) return; | |
4579 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay) | |
4580 ? included_ | |
4581 : excluded_; | |
4582 if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_); | |
4583 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_); | |
4584 } | |
4585 | |
4586 | |
4587 void CharacterRange::Split(ZoneList<CharacterRange>* base, | |
4588 Vector<const int> overlay, | |
4589 ZoneList<CharacterRange>** included, | |
4590 ZoneList<CharacterRange>** excluded, | |
4591 Zone* zone) { | |
4592 DCHECK_EQ(NULL, *included); | |
4593 DCHECK_EQ(NULL, *excluded); | |
4594 DispatchTable table(zone); | |
4595 for (int i = 0; i < base->length(); i++) | |
4596 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone); | |
4597 for (int i = 0; i < overlay.length(); i += 2) { | |
4598 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1), | |
4599 CharacterRangeSplitter::kInOverlay, zone); | |
4600 } | |
4601 CharacterRangeSplitter callback(included, excluded, zone); | |
4602 table.ForEach(&callback); | |
4603 } | |
4604 | |
4605 | |
4606 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | |
4607 bool is_one_byte, Zone* zone) { | |
4608 Isolate* isolate = zone->isolate(); | |
4609 uc16 bottom = from(); | |
4610 uc16 top = to(); | |
4611 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | |
4612 if (bottom > String::kMaxOneByteCharCode) return; | |
4613 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; | |
4614 } | |
4615 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
4616 if (top == bottom) { | 4514 if (top == bottom) { |
4617 // If this is a singleton we just expand the one character. | 4515 // If this is a singleton we just expand the one character. |
4618 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); | 4516 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN
T |
4619 for (int i = 0; i < length; i++) { | 4517 for (intptr_t i = 0; i < length; i++) { |
4620 uc32 chr = chars[i]; | 4518 uint32_t chr = chars[i]; |
4621 if (chr != bottom) { | 4519 if (chr != bottom) { |
4622 ranges->Add(CharacterRange::Singleton(chars[i]), zone); | 4520 ranges->Add(CharacterRange::Singleton(chars[i])); |
4623 } | 4521 } |
4624 } | 4522 } |
4625 } else { | 4523 } else { |
4626 // If this is a range we expand the characters block by block, | 4524 // If this is a range we expand the characters block by block, |
4627 // expanding contiguous subranges (blocks) one at a time. | 4525 // expanding contiguous subranges (blocks) one at a time. |
4628 // The approach is as follows. For a given start character we | 4526 // The approach is as follows. For a given start character we |
4629 // look up the remainder of the block that contains it (represented | 4527 // look up the remainder of the block that contains it (represented |
4630 // by the end point), for instance we find 'z' if the character | 4528 // by the end point), for instance we find 'z' if the character |
4631 // is 'c'. A block is characterized by the property | 4529 // is 'c'. A block is characterized by the property |
4632 // that all characters uncanonicalize in the same way, except that | 4530 // that all characters uncanonicalize in the same way, except that |
4633 // each entry in the result is incremented by the distance from the first | 4531 // each entry in the result is incremented by the distance from the first |
4634 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and | 4532 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and |
4635 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. | 4533 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. |
4636 // Once we've found the end point we look up its uncanonicalization | 4534 // Once we've found the end point we look up its uncanonicalization |
4637 // and produce a range for each element. For instance for [c-f] | 4535 // and produce a range for each element. For instance for [c-f] |
4638 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only | 4536 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only |
4639 // add a range if it is not already contained in the input, so [c-f] | 4537 // add a range if it is not already contained in the input, so [c-f] |
4640 // will be skipped but [C-F] will be added. If this range is not | 4538 // will be skipped but [C-F] will be added. If this range is not |
4641 // completely contained in a block we do this for all the blocks | 4539 // completely contained in a block we do this for all the blocks |
4642 // covered by the range (handling characters that is not in a block | 4540 // covered by the range (handling characters that is not in a block |
4643 // as a "singleton block"). | 4541 // as a "singleton block"). |
4644 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4542 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4645 int pos = bottom; | 4543 intptr_t pos = bottom; |
4646 while (pos <= top) { | 4544 while (pos <= top) { |
4647 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); | 4545 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); |
4648 uc16 block_end; | 4546 uint16_t block_end; |
4649 if (length == 0) { | 4547 if (length == 0) { |
4650 block_end = pos; | 4548 block_end = pos; |
4651 } else { | 4549 } else { |
4652 DCHECK_EQ(1, length); | 4550 ASSERT(length == 1); |
4653 block_end = range[0]; | 4551 block_end = range[0]; |
4654 } | 4552 } |
4655 int end = (block_end > top) ? top : block_end; | 4553 intptr_t end = (block_end > top) ? top : block_end; |
4656 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); | 4554 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT |
4657 for (int i = 0; i < length; i++) { | 4555 for (intptr_t i = 0; i < length; i++) { |
4658 uc32 c = range[i]; | 4556 uint32_t c = range[i]; |
4659 uc16 range_from = c - (block_end - pos); | 4557 uint16_t range_from = c - (block_end - pos); |
4660 uc16 range_to = c - (block_end - end); | 4558 uint16_t range_to = c - (block_end - end); |
4661 if (!(bottom <= range_from && range_to <= top)) { | 4559 if (!(bottom <= range_from && range_to <= top)) { |
4662 ranges->Add(CharacterRange(range_from, range_to), zone); | 4560 ranges->Add(CharacterRange(range_from, range_to)); |
4663 } | 4561 } |
4664 } | 4562 } |
4665 pos = end + 1; | 4563 pos = end + 1; |
4666 } | 4564 } |
4667 } | 4565 } |
4668 } | 4566 } |
4669 | 4567 |
4670 | 4568 |
4671 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { | 4569 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { |
4672 DCHECK_NOT_NULL(ranges); | 4570 ASSERT(ranges != NULL); |
4673 int n = ranges->length(); | 4571 intptr_t n = ranges->length(); |
4674 if (n <= 1) return true; | 4572 if (n <= 1) return true; |
4675 int max = ranges->at(0).to(); | 4573 intptr_t max = ranges->At(0).to(); |
4676 for (int i = 1; i < n; i++) { | 4574 for (intptr_t i = 1; i < n; i++) { |
4677 CharacterRange next_range = ranges->at(i); | 4575 CharacterRange next_range = ranges->At(i); |
4678 if (next_range.from() <= max + 1) return false; | 4576 if (next_range.from() <= max + 1) return false; |
4679 max = next_range.to(); | 4577 max = next_range.to(); |
4680 } | 4578 } |
4681 return true; | 4579 return true; |
4682 } | 4580 } |
4683 | 4581 |
4684 | 4582 |
4685 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) { | 4583 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { |
4686 if (ranges_ == NULL) { | 4584 if (ranges_ == NULL) { |
4687 ranges_ = new(zone) ZoneList<CharacterRange>(2, zone); | 4585 ranges_ = new ZoneGrowableArray<CharacterRange>(2); |
4688 CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone); | 4586 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4689 } | 4587 } |
4690 return ranges_; | 4588 return ranges_; |
4691 } | 4589 } |
4692 | 4590 |
4693 | 4591 |
4694 // Move a number of elements in a zonelist to another position | 4592 // Move a number of elements in a zone array to another position |
4695 // in the same list. Handles overlapping source and target areas. | 4593 // in the same array. Handles overlapping source and target areas. |
4696 static void MoveRanges(ZoneList<CharacterRange>* list, | 4594 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, |
4697 int from, | 4595 intptr_t from, |
4698 int to, | 4596 intptr_t to, |
4699 int count) { | 4597 intptr_t count) { |
4700 // Ranges are potentially overlapping. | 4598 // Ranges are potentially overlapping. |
4701 if (from < to) { | 4599 if (from < to) { |
4702 for (int i = count - 1; i >= 0; i--) { | 4600 for (intptr_t i = count - 1; i >= 0; i--) { |
4703 list->at(to + i) = list->at(from + i); | 4601 (*list)[to + i] = list->At(from + i); |
4704 } | 4602 } |
4705 } else { | 4603 } else { |
4706 for (int i = 0; i < count; i++) { | 4604 for (intptr_t i = 0; i < count; i++) { |
4707 list->at(to + i) = list->at(from + i); | 4605 (*list)[to + i] = list->At(from + i); |
4708 } | 4606 } |
4709 } | 4607 } |
4710 } | 4608 } |
4711 | 4609 |
4712 | 4610 |
4713 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, | 4611 static intptr_t InsertRangeInCanonicalList( |
4714 int count, | 4612 ZoneGrowableArray<CharacterRange>* list, |
4715 CharacterRange insert) { | 4613 intptr_t count, |
| 4614 CharacterRange insert) { |
4716 // Inserts a range into list[0..count[, which must be sorted | 4615 // Inserts a range into list[0..count[, which must be sorted |
4717 // by from value and non-overlapping and non-adjacent, using at most | 4616 // by from value and non-overlapping and non-adjacent, using at most |
4718 // list[0..count] for the result. Returns the number of resulting | 4617 // list[0..count] for the result. Returns the number of resulting |
4719 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4618 // canonicalized ranges. Inserting a range may collapse existing ranges into |
4720 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4619 // fewer ranges, so the return value can be anything in the range 1..count+1. |
4721 uc16 from = insert.from(); | 4620 uint16_t from = insert.from(); |
4722 uc16 to = insert.to(); | 4621 uint16_t to = insert.to(); |
4723 int start_pos = 0; | 4622 intptr_t start_pos = 0; |
4724 int end_pos = count; | 4623 intptr_t end_pos = count; |
4725 for (int i = count - 1; i >= 0; i--) { | 4624 for (intptr_t i = count - 1; i >= 0; i--) { |
4726 CharacterRange current = list->at(i); | 4625 CharacterRange current = list->At(i); |
4727 if (current.from() > to + 1) { | 4626 if (current.from() > to + 1) { |
4728 end_pos = i; | 4627 end_pos = i; |
4729 } else if (current.to() + 1 < from) { | 4628 } else if (current.to() + 1 < from) { |
4730 start_pos = i + 1; | 4629 start_pos = i + 1; |
4731 break; | 4630 break; |
4732 } | 4631 } |
4733 } | 4632 } |
4734 | 4633 |
4735 // Inserted range overlaps, or is adjacent to, ranges at positions | 4634 // Inserted range overlaps, or is adjacent to, ranges at positions |
4736 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are | 4635 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are |
4737 // not affected by the insertion. | 4636 // not affected by the insertion. |
4738 // If start_pos == end_pos, the range must be inserted before start_pos. | 4637 // If start_pos == end_pos, the range must be inserted before start_pos. |
4739 // if start_pos < end_pos, the entire range from start_pos to end_pos | 4638 // if start_pos < end_pos, the entire range from start_pos to end_pos |
4740 // must be merged with the insert range. | 4639 // must be merged with the insert range. |
4741 | 4640 |
4742 if (start_pos == end_pos) { | 4641 if (start_pos == end_pos) { |
4743 // Insert between existing ranges at position start_pos. | 4642 // Insert between existing ranges at position start_pos. |
4744 if (start_pos < count) { | 4643 if (start_pos < count) { |
4745 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); | 4644 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); |
4746 } | 4645 } |
4747 list->at(start_pos) = insert; | 4646 (*list)[start_pos] = insert; |
4748 return count + 1; | 4647 return count + 1; |
4749 } | 4648 } |
4750 if (start_pos + 1 == end_pos) { | 4649 if (start_pos + 1 == end_pos) { |
4751 // Replace single existing range at position start_pos. | 4650 // Replace single existing range at position start_pos. |
4752 CharacterRange to_replace = list->at(start_pos); | 4651 CharacterRange to_replace = list->At(start_pos); |
4753 int new_from = Min(to_replace.from(), from); | 4652 intptr_t new_from = Utils::Minimum(to_replace.from(), from); |
4754 int new_to = Max(to_replace.to(), to); | 4653 intptr_t new_to = Utils::Maximum(to_replace.to(), to); |
4755 list->at(start_pos) = CharacterRange(new_from, new_to); | 4654 (*list)[start_pos] = CharacterRange(new_from, new_to); |
4756 return count; | 4655 return count; |
4757 } | 4656 } |
4758 // Replace a number of existing ranges from start_pos to end_pos - 1. | 4657 // Replace a number of existing ranges from start_pos to end_pos - 1. |
4759 // Move the remaining ranges down. | 4658 // Move the remaining ranges down. |
4760 | 4659 |
4761 int new_from = Min(list->at(start_pos).from(), from); | 4660 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); |
4762 int new_to = Max(list->at(end_pos - 1).to(), to); | 4661 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); |
4763 if (end_pos < count) { | 4662 if (end_pos < count) { |
4764 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); | 4663 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); |
4765 } | 4664 } |
4766 list->at(start_pos) = CharacterRange(new_from, new_to); | 4665 (*list)[start_pos] = CharacterRange(new_from, new_to); |
4767 return count - (end_pos - start_pos) + 1; | 4666 return count - (end_pos - start_pos) + 1; |
4768 } | 4667 } |
4769 | 4668 |
4770 | 4669 |
4771 void CharacterSet::Canonicalize() { | 4670 void CharacterSet::Canonicalize() { |
4772 // Special/default classes are always considered canonical. The result | 4671 // Special/default classes are always considered canonical. The result |
4773 // of calling ranges() will be sorted. | 4672 // of calling ranges() will be sorted. |
4774 if (ranges_ == NULL) return; | 4673 if (ranges_ == NULL) return; |
4775 CharacterRange::Canonicalize(ranges_); | 4674 CharacterRange::Canonicalize(ranges_); |
4776 } | 4675 } |
4777 | 4676 |
4778 | 4677 |
4779 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { | 4678 void CharacterRange::Canonicalize( |
| 4679 ZoneGrowableArray<CharacterRange>* character_ranges) { |
4780 if (character_ranges->length() <= 1) return; | 4680 if (character_ranges->length() <= 1) return; |
4781 // Check whether ranges are already canonical (increasing, non-overlapping, | 4681 // Check whether ranges are already canonical (increasing, non-overlapping, |
4782 // non-adjacent). | 4682 // non-adjacent). |
4783 int n = character_ranges->length(); | 4683 intptr_t n = character_ranges->length(); |
4784 int max = character_ranges->at(0).to(); | 4684 intptr_t max = character_ranges->At(0).to(); |
4785 int i = 1; | 4685 intptr_t i = 1; |
4786 while (i < n) { | 4686 while (i < n) { |
4787 CharacterRange current = character_ranges->at(i); | 4687 CharacterRange current = character_ranges->At(i); |
4788 if (current.from() <= max + 1) { | 4688 if (current.from() <= max + 1) { |
4789 break; | 4689 break; |
4790 } | 4690 } |
4791 max = current.to(); | 4691 max = current.to(); |
4792 i++; | 4692 i++; |
4793 } | 4693 } |
4794 // Canonical until the i'th range. If that's all of them, we are done. | 4694 // Canonical until the i'th range. If that's all of them, we are done. |
4795 if (i == n) return; | 4695 if (i == n) return; |
4796 | 4696 |
4797 // The ranges at index i and forward are not canonicalized. Make them so by | 4697 // The ranges at index i and forward are not canonicalized. Make them so by |
4798 // doing the equivalent of insertion sort (inserting each into the previous | 4698 // doing the equivalent of insertion sort (inserting each into the previous |
4799 // list, in order). | 4699 // list, in order). |
4800 // Notice that inserting a range can reduce the number of ranges in the | 4700 // Notice that inserting a range can reduce the number of ranges in the |
4801 // result due to combining of adjacent and overlapping ranges. | 4701 // result due to combining of adjacent and overlapping ranges. |
4802 int read = i; // Range to insert. | 4702 intptr_t read = i; // Range to insert. |
4803 int num_canonical = i; // Length of canonicalized part of list. | 4703 intptr_t num_canonical = i; // Length of canonicalized part of list. |
4804 do { | 4704 do { |
4805 num_canonical = InsertRangeInCanonicalList(character_ranges, | 4705 num_canonical = InsertRangeInCanonicalList(character_ranges, |
4806 num_canonical, | 4706 num_canonical, |
4807 character_ranges->at(read)); | 4707 character_ranges->At(read)); |
4808 read++; | 4708 read++; |
4809 } while (read < n); | 4709 } while (read < n); |
4810 character_ranges->Rewind(num_canonical); | 4710 character_ranges->TruncateTo(num_canonical); |
4811 | 4711 |
4812 DCHECK(CharacterRange::IsCanonical(character_ranges)); | 4712 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
4813 } | 4713 } |
4814 | 4714 |
4815 | 4715 |
4816 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, | 4716 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, |
4817 ZoneList<CharacterRange>* negated_ranges, | 4717 ZoneGrowableArray<CharacterRange>* negated_ranges) { |
4818 Zone* zone) { | 4718 ASSERT(CharacterRange::IsCanonical(ranges)); |
4819 DCHECK(CharacterRange::IsCanonical(ranges)); | 4719 ASSERT(negated_ranges->length() == 0); |
4820 DCHECK_EQ(0, negated_ranges->length()); | 4720 intptr_t range_count = ranges->length(); |
4821 int range_count = ranges->length(); | 4721 uint16_t from = 0; |
4822 uc16 from = 0; | 4722 intptr_t i = 0; |
4823 int i = 0; | 4723 if (range_count > 0 && ranges->At(0).from() == 0) { |
4824 if (range_count > 0 && ranges->at(0).from() == 0) { | 4724 from = ranges->At(0).to(); |
4825 from = ranges->at(0).to(); | |
4826 i = 1; | 4725 i = 1; |
4827 } | 4726 } |
4828 while (i < range_count) { | 4727 while (i < range_count) { |
4829 CharacterRange range = ranges->at(i); | 4728 CharacterRange range = ranges->At(i); |
4830 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone); | 4729 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); |
4831 from = range.to(); | 4730 from = range.to(); |
4832 i++; | 4731 i++; |
4833 } | 4732 } |
4834 if (from < String::kMaxUtf16CodeUnit) { | 4733 if (from < Utf16::kMaxCodeUnit) { |
4835 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit), | 4734 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); |
4836 zone); | |
4837 } | 4735 } |
4838 } | 4736 } |
4839 | 4737 |
4840 | 4738 |
4841 // ------------------------------------------------------------------- | 4739 // ------------------------------------------------------------------- |
4842 // Splay tree | 4740 // Splay tree |
4843 | 4741 |
4844 | 4742 |
4845 OutSet* OutSet::Extend(unsigned value, Zone* zone) { | 4743 // Workaround for the fact that ZoneGrowableArray does not have contains(). |
4846 if (Get(value)) | 4744 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, |
4847 return this; | 4745 unsigned value) { |
4848 if (successors(zone) != NULL) { | 4746 for (intptr_t i = 0; i < array->length(); i++) { |
4849 for (int i = 0; i < successors(zone)->length(); i++) { | 4747 if (array->At(i) == value) { |
4850 OutSet* successor = successors(zone)->at(i); | 4748 return true; |
4851 if (successor->Get(value)) | |
4852 return successor; | |
4853 } | 4749 } |
4854 } else { | |
4855 successors_ = new(zone) ZoneList<OutSet*>(2, zone); | |
4856 } | 4750 } |
4857 OutSet* result = new(zone) OutSet(first_, remaining_); | 4751 return false; |
4858 result->Set(value, zone); | |
4859 successors(zone)->Add(result, zone); | |
4860 return result; | |
4861 } | 4752 } |
4862 | 4753 |
4863 | 4754 |
4864 void OutSet::Set(unsigned value, Zone *zone) { | 4755 void OutSet::Set(unsigned value, Isolate* isolate) { |
4865 if (value < kFirstLimit) { | 4756 if (value < kFirstLimit) { |
4866 first_ |= (1 << value); | 4757 first_ |= (1 << value); |
4867 } else { | 4758 } else { |
4868 if (remaining_ == NULL) | 4759 if (remaining_ == NULL) |
4869 remaining_ = new(zone) ZoneList<unsigned>(1, zone); | 4760 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1); |
4870 if (remaining_->is_empty() || !remaining_->Contains(value)) | 4761 |
4871 remaining_->Add(value, zone); | 4762 bool remaining_contains_value = ArrayContains(remaining_, value); |
| 4763 if (remaining_->is_empty() || !remaining_contains_value) { |
| 4764 remaining_->Add(value); |
| 4765 } |
4872 } | 4766 } |
4873 } | 4767 } |
4874 | 4768 |
4875 | 4769 |
4876 bool OutSet::Get(unsigned value) const { | 4770 bool OutSet::Get(unsigned value) const { |
4877 if (value < kFirstLimit) { | 4771 if (value < kFirstLimit) { |
4878 return (first_ & (1 << value)) != 0; | 4772 return (first_ & (1 << value)) != 0; |
4879 } else if (remaining_ == NULL) { | 4773 } else if (remaining_ == NULL) { |
4880 return false; | 4774 return false; |
4881 } else { | 4775 } else { |
4882 return remaining_->Contains(value); | 4776 return ArrayContains(remaining_, value); |
4883 } | 4777 } |
4884 } | 4778 } |
4885 | 4779 |
4886 | 4780 |
4887 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar; | |
4888 | |
4889 | |
4890 void DispatchTable::AddRange(CharacterRange full_range, int value, | |
4891 Zone* zone) { | |
4892 CharacterRange current = full_range; | |
4893 if (tree()->is_empty()) { | |
4894 // If this is the first range we just insert into the table. | |
4895 ZoneSplayTree<Config>::Locator loc; | |
4896 DCHECK_RESULT(tree()->Insert(current.from(), &loc)); | |
4897 loc.set_value(Entry(current.from(), current.to(), | |
4898 empty()->Extend(value, zone))); | |
4899 return; | |
4900 } | |
4901 // First see if there is a range to the left of this one that | |
4902 // overlaps. | |
4903 ZoneSplayTree<Config>::Locator loc; | |
4904 if (tree()->FindGreatestLessThan(current.from(), &loc)) { | |
4905 Entry* entry = &loc.value(); | |
4906 // If we've found a range that overlaps with this one, and it | |
4907 // starts strictly to the left of this one, we have to fix it | |
4908 // because the following code only handles ranges that start on | |
4909 // or after the start point of the range we're adding. | |
4910 if (entry->from() < current.from() && entry->to() >= current.from()) { | |
4911 // Snap the overlapping range in half around the start point of | |
4912 // the range we're adding. | |
4913 CharacterRange left(entry->from(), current.from() - 1); | |
4914 CharacterRange right(current.from(), entry->to()); | |
4915 // The left part of the overlapping range doesn't overlap. | |
4916 // Truncate the whole entry to be just the left part. | |
4917 entry->set_to(left.to()); | |
4918 // The right part is the one that overlaps. We add this part | |
4919 // to the map and let the next step deal with merging it with | |
4920 // the range we're adding. | |
4921 ZoneSplayTree<Config>::Locator loc; | |
4922 DCHECK_RESULT(tree()->Insert(right.from(), &loc)); | |
4923 loc.set_value(Entry(right.from(), | |
4924 right.to(), | |
4925 entry->out_set())); | |
4926 } | |
4927 } | |
4928 while (current.is_valid()) { | |
4929 if (tree()->FindLeastGreaterThan(current.from(), &loc) && | |
4930 (loc.value().from() <= current.to()) && | |
4931 (loc.value().to() >= current.from())) { | |
4932 Entry* entry = &loc.value(); | |
4933 // We have overlap. If there is space between the start point of | |
4934 // the range we're adding and where the overlapping range starts | |
4935 // then we have to add a range covering just that space. | |
4936 if (current.from() < entry->from()) { | |
4937 ZoneSplayTree<Config>::Locator ins; | |
4938 DCHECK_RESULT(tree()->Insert(current.from(), &ins)); | |
4939 ins.set_value(Entry(current.from(), | |
4940 entry->from() - 1, | |
4941 empty()->Extend(value, zone))); | |
4942 current.set_from(entry->from()); | |
4943 } | |
4944 DCHECK_EQ(current.from(), entry->from()); | |
4945 // If the overlapping range extends beyond the one we want to add | |
4946 // we have to snap the right part off and add it separately. | |
4947 if (entry->to() > current.to()) { | |
4948 ZoneSplayTree<Config>::Locator ins; | |
4949 DCHECK_RESULT(tree()->Insert(current.to() + 1, &ins)); | |
4950 ins.set_value(Entry(current.to() + 1, | |
4951 entry->to(), | |
4952 entry->out_set())); | |
4953 entry->set_to(current.to()); | |
4954 } | |
4955 DCHECK(entry->to() <= current.to()); | |
4956 // The overlapping range is now completely contained by the range | |
4957 // we're adding so we can just update it and move the start point | |
4958 // of the range we're adding just past it. | |
4959 entry->AddValue(value, zone); | |
4960 // Bail out if the last interval ended at 0xFFFF since otherwise | |
4961 // adding 1 will wrap around to 0. | |
4962 if (entry->to() == String::kMaxUtf16CodeUnit) | |
4963 break; | |
4964 DCHECK(entry->to() + 1 > current.from()); | |
4965 current.set_from(entry->to() + 1); | |
4966 } else { | |
4967 // There is no overlap so we can just add the range | |
4968 ZoneSplayTree<Config>::Locator ins; | |
4969 DCHECK_RESULT(tree()->Insert(current.from(), &ins)); | |
4970 ins.set_value(Entry(current.from(), | |
4971 current.to(), | |
4972 empty()->Extend(value, zone))); | |
4973 break; | |
4974 } | |
4975 } | |
4976 } | |
4977 | |
4978 | |
4979 OutSet* DispatchTable::Get(uc16 value) { | |
4980 ZoneSplayTree<Config>::Locator loc; | |
4981 if (!tree()->FindGreatestLessThan(value, &loc)) | |
4982 return empty(); | |
4983 Entry* entry = &loc.value(); | |
4984 if (value <= entry->to()) | |
4985 return entry->out_set(); | |
4986 else | |
4987 return empty(); | |
4988 } | |
4989 | |
4990 | |
4991 // ------------------------------------------------------------------- | 4781 // ------------------------------------------------------------------- |
4992 // Analysis | 4782 // Analysis |
4993 | 4783 |
4994 | 4784 |
4995 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4785 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
4996 StackLimitCheck check(that->zone()->isolate()); | |
4997 if (check.HasOverflowed()) { | |
4998 fail("Stack overflow"); | |
4999 return; | |
5000 } | |
5001 if (that->info()->been_analyzed || that->info()->being_analyzed) | 4786 if (that->info()->been_analyzed || that->info()->being_analyzed) |
5002 return; | 4787 return; |
5003 that->info()->being_analyzed = true; | 4788 that->info()->being_analyzed = true; |
5004 that->Accept(this); | 4789 that->Accept(this); |
5005 that->info()->being_analyzed = false; | 4790 that->info()->being_analyzed = false; |
5006 that->info()->been_analyzed = true; | 4791 that->info()->been_analyzed = true; |
5007 } | 4792 } |
5008 | 4793 |
5009 | 4794 |
5010 void Analysis::VisitEnd(EndNode* that) { | 4795 void Analysis::VisitEnd(EndNode* that) { |
5011 // nothing to do | 4796 // nothing to do |
5012 } | 4797 } |
5013 | 4798 |
5014 | 4799 |
5015 void TextNode::CalculateOffsets() { | 4800 void TextNode::CalculateOffsets() { |
5016 int element_count = elements()->length(); | 4801 intptr_t element_count = elements()->length(); |
5017 // Set up the offsets of the elements relative to the start. This is a fixed | 4802 // Set up the offsets of the elements relative to the start. This is a fixed |
5018 // quantity since a TextNode can only contain fixed-width things. | 4803 // quantity since a TextNode can only contain fixed-width things. |
5019 int cp_offset = 0; | 4804 intptr_t cp_offset = 0; |
5020 for (int i = 0; i < element_count; i++) { | 4805 for (intptr_t i = 0; i < element_count; i++) { |
5021 TextElement& elm = elements()->at(i); | 4806 TextElement& elm = (*elements())[i]; |
5022 elm.set_cp_offset(cp_offset); | 4807 elm.set_cp_offset(cp_offset); |
5023 cp_offset += elm.length(); | 4808 cp_offset += elm.length(); |
5024 } | 4809 } |
5025 } | 4810 } |
5026 | 4811 |
5027 | 4812 |
5028 void Analysis::VisitText(TextNode* that) { | 4813 void Analysis::VisitText(TextNode* that) { |
5029 if (ignore_case_) { | 4814 if (ignore_case_) { |
5030 that->MakeCaseIndependent(is_one_byte_); | 4815 that->MakeCaseIndependent(is_one_byte_); |
5031 } | 4816 } |
(...skipping 10 matching lines...) Expand all Loading... |
5042 if (!has_failed()) { | 4827 if (!has_failed()) { |
5043 // If the next node is interested in what it follows then this node | 4828 // If the next node is interested in what it follows then this node |
5044 // has to be interested too so it can pass the information on. | 4829 // has to be interested too so it can pass the information on. |
5045 that->info()->AddFromFollowing(target->info()); | 4830 that->info()->AddFromFollowing(target->info()); |
5046 } | 4831 } |
5047 } | 4832 } |
5048 | 4833 |
5049 | 4834 |
5050 void Analysis::VisitChoice(ChoiceNode* that) { | 4835 void Analysis::VisitChoice(ChoiceNode* that) { |
5051 NodeInfo* info = that->info(); | 4836 NodeInfo* info = that->info(); |
5052 for (int i = 0; i < that->alternatives()->length(); i++) { | 4837 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
5053 RegExpNode* node = that->alternatives()->at(i).node(); | 4838 RegExpNode* node = (*that->alternatives())[i].node(); |
5054 EnsureAnalyzed(node); | 4839 EnsureAnalyzed(node); |
5055 if (has_failed()) return; | 4840 if (has_failed()) return; |
5056 // Anything the following nodes need to know has to be known by | 4841 // Anything the following nodes need to know has to be known by |
5057 // this node also, so it can pass it on. | 4842 // this node also, so it can pass it on. |
5058 info->AddFromFollowing(node->info()); | 4843 info->AddFromFollowing(node->info()); |
5059 } | 4844 } |
5060 } | 4845 } |
5061 | 4846 |
5062 | 4847 |
5063 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { | 4848 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { |
5064 NodeInfo* info = that->info(); | 4849 NodeInfo* info = that->info(); |
5065 for (int i = 0; i < that->alternatives()->length(); i++) { | 4850 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
5066 RegExpNode* node = that->alternatives()->at(i).node(); | 4851 RegExpNode* node = (*that->alternatives())[i].node(); |
5067 if (node != that->loop_node()) { | 4852 if (node != that->loop_node()) { |
5068 EnsureAnalyzed(node); | 4853 EnsureAnalyzed(node); |
5069 if (has_failed()) return; | 4854 if (has_failed()) return; |
5070 info->AddFromFollowing(node->info()); | 4855 info->AddFromFollowing(node->info()); |
5071 } | 4856 } |
5072 } | 4857 } |
5073 // Check the loop last since it may need the value of this node | 4858 // Check the loop last since it may need the value of this node |
5074 // to get a correct result. | 4859 // to get a correct result. |
5075 EnsureAnalyzed(that->loop_node()); | 4860 EnsureAnalyzed(that->loop_node()); |
5076 if (!has_failed()) { | 4861 if (!has_failed()) { |
5077 info->AddFromFollowing(that->loop_node()->info()); | 4862 info->AddFromFollowing(that->loop_node()->info()); |
5078 } | 4863 } |
5079 } | 4864 } |
5080 | 4865 |
5081 | 4866 |
5082 void Analysis::VisitBackReference(BackReferenceNode* that) { | 4867 void Analysis::VisitBackReference(BackReferenceNode* that) { |
5083 EnsureAnalyzed(that->on_success()); | 4868 EnsureAnalyzed(that->on_success()); |
5084 } | 4869 } |
5085 | 4870 |
5086 | 4871 |
5087 void Analysis::VisitAssertion(AssertionNode* that) { | 4872 void Analysis::VisitAssertion(AssertionNode* that) { |
5088 EnsureAnalyzed(that->on_success()); | 4873 EnsureAnalyzed(that->on_success()); |
5089 } | 4874 } |
5090 | 4875 |
5091 | 4876 |
5092 void BackReferenceNode::FillInBMInfo(int offset, | 4877 void BackReferenceNode::FillInBMInfo(intptr_t offset, |
5093 int budget, | 4878 intptr_t budget, |
5094 BoyerMooreLookahead* bm, | 4879 BoyerMooreLookahead* bm, |
5095 bool not_at_start) { | 4880 bool not_at_start) { |
5096 // Working out the set of characters that a backreference can match is too | 4881 // Working out the set of characters that a backreference can match is too |
5097 // hard, so we just say that any character can match. | 4882 // hard, so we just say that any character can match. |
5098 bm->SetRest(offset); | 4883 bm->SetRest(offset); |
5099 SaveBMInfo(bm, not_at_start, offset); | 4884 SaveBMInfo(bm, not_at_start, offset); |
5100 } | 4885 } |
5101 | 4886 |
5102 | 4887 |
5103 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == | 4888 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == |
5104 RegExpMacroAssembler::kTableSize); | 4889 RegExpMacroAssembler::kTableSize); |
5105 | 4890 |
5106 | 4891 |
5107 void ChoiceNode::FillInBMInfo(int offset, | 4892 void ChoiceNode::FillInBMInfo(intptr_t offset, |
5108 int budget, | 4893 intptr_t budget, |
5109 BoyerMooreLookahead* bm, | 4894 BoyerMooreLookahead* bm, |
5110 bool not_at_start) { | 4895 bool not_at_start) { |
5111 ZoneList<GuardedAlternative>* alts = alternatives(); | 4896 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); |
5112 budget = (budget - 1) / alts->length(); | 4897 budget = (budget - 1) / alts->length(); |
5113 for (int i = 0; i < alts->length(); i++) { | 4898 for (intptr_t i = 0; i < alts->length(); i++) { |
5114 GuardedAlternative& alt = alts->at(i); | 4899 GuardedAlternative& alt = (*alts)[i]; |
5115 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 4900 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
5116 bm->SetRest(offset); // Give up trying to fill in info. | 4901 bm->SetRest(offset); // Give up trying to fill in info. |
5117 SaveBMInfo(bm, not_at_start, offset); | 4902 SaveBMInfo(bm, not_at_start, offset); |
5118 return; | 4903 return; |
5119 } | 4904 } |
5120 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); | 4905 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); |
5121 } | 4906 } |
5122 SaveBMInfo(bm, not_at_start, offset); | 4907 SaveBMInfo(bm, not_at_start, offset); |
5123 } | 4908 } |
5124 | 4909 |
5125 | 4910 |
5126 void TextNode::FillInBMInfo(int initial_offset, | 4911 void TextNode::FillInBMInfo(intptr_t initial_offset, |
5127 int budget, | 4912 intptr_t budget, |
5128 BoyerMooreLookahead* bm, | 4913 BoyerMooreLookahead* bm, |
5129 bool not_at_start) { | 4914 bool not_at_start) { |
5130 if (initial_offset >= bm->length()) return; | 4915 if (initial_offset >= bm->length()) return; |
5131 int offset = initial_offset; | 4916 intptr_t offset = initial_offset; |
5132 int max_char = bm->max_char(); | 4917 intptr_t max_char = bm->max_char(); |
5133 for (int i = 0; i < elements()->length(); i++) { | 4918 for (intptr_t i = 0; i < elements()->length(); i++) { |
5134 if (offset >= bm->length()) { | 4919 if (offset >= bm->length()) { |
5135 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4920 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5136 return; | 4921 return; |
5137 } | 4922 } |
5138 TextElement text = elements()->at(i); | 4923 TextElement text = elements()->At(i); |
5139 if (text.text_type() == TextElement::ATOM) { | 4924 if (text.text_type() == TextElement::ATOM) { |
5140 RegExpAtom* atom = text.atom(); | 4925 RegExpAtom* atom = text.atom(); |
5141 for (int j = 0; j < atom->length(); j++, offset++) { | 4926 for (intptr_t j = 0; j < atom->length(); j++, offset++) { |
5142 if (offset >= bm->length()) { | 4927 if (offset >= bm->length()) { |
5143 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4928 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5144 return; | 4929 return; |
5145 } | 4930 } |
5146 uc16 character = atom->data()[j]; | 4931 uint16_t character = atom->data()->At(j); |
5147 if (bm->compiler()->ignore_case()) { | 4932 if (bm->compiler()->ignore_case()) { |
5148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4933 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5149 int length = GetCaseIndependentLetters( | 4934 intptr_t length = GetCaseIndependentLetters( |
5150 Isolate::Current(), | |
5151 character, | 4935 character, |
5152 bm->max_char() == String::kMaxOneByteCharCode, | 4936 bm->max_char() == Symbols::kMaxOneCharCodeSymbol, |
5153 chars); | 4937 chars); |
5154 for (int j = 0; j < length; j++) { | 4938 for (intptr_t j = 0; j < length; j++) { |
5155 bm->Set(offset, chars[j]); | 4939 bm->Set(offset, chars[j]); |
5156 } | 4940 } |
5157 } else { | 4941 } else { |
5158 if (character <= max_char) bm->Set(offset, character); | 4942 if (character <= max_char) bm->Set(offset, character); |
5159 } | 4943 } |
5160 } | 4944 } |
5161 } else { | 4945 } else { |
5162 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); | 4946 ASSERT(text.text_type() == TextElement::CHAR_CLASS); |
5163 RegExpCharacterClass* char_class = text.char_class(); | 4947 RegExpCharacterClass* char_class = text.char_class(); |
5164 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 4948 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); |
5165 if (char_class->is_negated()) { | 4949 if (char_class->is_negated()) { |
5166 bm->SetAll(offset); | 4950 bm->SetAll(offset); |
5167 } else { | 4951 } else { |
5168 for (int k = 0; k < ranges->length(); k++) { | 4952 for (intptr_t k = 0; k < ranges->length(); k++) { |
5169 CharacterRange& range = ranges->at(k); | 4953 CharacterRange& range = (*ranges)[k]; |
5170 if (range.from() > max_char) continue; | 4954 if (range.from() > max_char) continue; |
5171 int to = Min(max_char, static_cast<int>(range.to())); | 4955 intptr_t to = Utils::Minimum(max_char, |
| 4956 static_cast<intptr_t>(range.to())); |
5172 bm->SetInterval(offset, Interval(range.from(), to)); | 4957 bm->SetInterval(offset, Interval(range.from(), to)); |
5173 } | 4958 } |
5174 } | 4959 } |
5175 offset++; | 4960 offset++; |
5176 } | 4961 } |
5177 } | 4962 } |
5178 if (offset >= bm->length()) { | 4963 if (offset >= bm->length()) { |
5179 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4964 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5180 return; | 4965 return; |
5181 } | 4966 } |
5182 on_success()->FillInBMInfo(offset, | 4967 on_success()->FillInBMInfo(offset, |
5183 budget - 1, | 4968 budget - 1, |
5184 bm, | 4969 bm, |
5185 true); // Not at start after a text node. | 4970 true); // Not at start after a text node. |
5186 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4971 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5187 } | 4972 } |
5188 | 4973 |
5189 | 4974 |
5190 // ------------------------------------------------------------------- | |
5191 // Dispatch table construction | |
5192 | |
5193 | |
5194 void DispatchTableConstructor::VisitEnd(EndNode* that) { | |
5195 AddRange(CharacterRange::Everything()); | |
5196 } | |
5197 | |
5198 | |
5199 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { | |
5200 node->set_being_calculated(true); | |
5201 ZoneList<GuardedAlternative>* alternatives = node->alternatives(); | |
5202 for (int i = 0; i < alternatives->length(); i++) { | |
5203 set_choice_index(i); | |
5204 alternatives->at(i).node()->Accept(this); | |
5205 } | |
5206 node->set_being_calculated(false); | |
5207 } | |
5208 | |
5209 | |
5210 class AddDispatchRange { | |
5211 public: | |
5212 explicit AddDispatchRange(DispatchTableConstructor* constructor) | |
5213 : constructor_(constructor) { } | |
5214 void Call(uc32 from, DispatchTable::Entry entry); | |
5215 private: | |
5216 DispatchTableConstructor* constructor_; | |
5217 }; | |
5218 | |
5219 | |
5220 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { | |
5221 CharacterRange range(from, entry.to()); | |
5222 constructor_->AddRange(range); | |
5223 } | |
5224 | |
5225 | |
5226 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) { | |
5227 if (node->being_calculated()) | |
5228 return; | |
5229 DispatchTable* table = node->GetTable(ignore_case_); | |
5230 AddDispatchRange adder(this); | |
5231 table->ForEach(&adder); | |
5232 } | |
5233 | |
5234 | |
5235 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) { | |
5236 // TODO(160): Find the node that we refer back to and propagate its start | |
5237 // set back to here. For now we just accept anything. | |
5238 AddRange(CharacterRange::Everything()); | |
5239 } | |
5240 | |
5241 | |
5242 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { | |
5243 RegExpNode* target = that->on_success(); | |
5244 target->Accept(this); | |
5245 } | |
5246 | |
5247 | |
5248 static int CompareRangeByFrom(const CharacterRange* a, | |
5249 const CharacterRange* b) { | |
5250 return Compare<uc16>(a->from(), b->from()); | |
5251 } | |
5252 | |
5253 | |
5254 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { | |
5255 ranges->Sort(CompareRangeByFrom); | |
5256 uc16 last = 0; | |
5257 for (int i = 0; i < ranges->length(); i++) { | |
5258 CharacterRange range = ranges->at(i); | |
5259 if (last < range.from()) | |
5260 AddRange(CharacterRange(last, range.from() - 1)); | |
5261 if (range.to() >= last) { | |
5262 if (range.to() == String::kMaxUtf16CodeUnit) { | |
5263 return; | |
5264 } else { | |
5265 last = range.to() + 1; | |
5266 } | |
5267 } | |
5268 } | |
5269 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); | |
5270 } | |
5271 | |
5272 | |
5273 void DispatchTableConstructor::VisitText(TextNode* that) { | |
5274 TextElement elm = that->elements()->at(0); | |
5275 switch (elm.text_type()) { | |
5276 case TextElement::ATOM: { | |
5277 uc16 c = elm.atom()->data()[0]; | |
5278 AddRange(CharacterRange(c, c)); | |
5279 break; | |
5280 } | |
5281 case TextElement::CHAR_CLASS: { | |
5282 RegExpCharacterClass* tree = elm.char_class(); | |
5283 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); | |
5284 if (tree->is_negated()) { | |
5285 AddInverse(ranges); | |
5286 } else { | |
5287 for (int i = 0; i < ranges->length(); i++) | |
5288 AddRange(ranges->at(i)); | |
5289 } | |
5290 break; | |
5291 } | |
5292 default: { | |
5293 UNIMPLEMENTED(); | |
5294 } | |
5295 } | |
5296 } | |
5297 | |
5298 | |
5299 void DispatchTableConstructor::VisitAction(ActionNode* that) { | |
5300 RegExpNode* target = that->on_success(); | |
5301 target->Accept(this); | |
5302 } | |
5303 | |
5304 | |
5305 RegExpEngine::CompilationResult RegExpEngine::Compile( | 4975 RegExpEngine::CompilationResult RegExpEngine::Compile( |
5306 RegExpCompileData* data, bool ignore_case, bool is_global, | 4976 RegExpCompileData* data, |
5307 bool is_multiline, bool is_sticky, Handle<String> pattern, | 4977 const ParsedFunction* parsed_function, |
5308 Handle<String> sample_subject, bool is_one_byte, Zone* zone) { | 4978 const ZoneGrowableArray<const ICData*>& ic_data_array) { |
5309 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 4979 Isolate* isolate = Isolate::Current(); |
5310 return IrregexpRegExpTooBig(zone->isolate()); | |
5311 } | |
5312 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone); | |
5313 | 4980 |
5314 // Sample some characters from the middle of the string. | 4981 const Function& function = parsed_function->function(); |
5315 static const int kSampleSize = 128; | 4982 const intptr_t specialization_cid = function.regexp_cid(); |
| 4983 const bool is_one_byte = (specialization_cid == kOneByteStringCid || |
| 4984 specialization_cid == kExternalOneByteStringCid); |
| 4985 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp()); |
| 4986 const String& pattern = String::Handle(isolate, regexp.pattern()); |
5316 | 4987 |
5317 sample_subject = String::Flatten(sample_subject); | 4988 ASSERT(!regexp.IsNull()); |
5318 int chars_sampled = 0; | 4989 ASSERT(!pattern.IsNull()); |
5319 int half_way = (sample_subject->length() - kSampleSize) / 2; | 4990 |
5320 for (int i = Max(0, half_way); | 4991 const bool ignore_case = regexp.is_ignore_case(); |
5321 i < sample_subject->length() && chars_sampled < kSampleSize; | 4992 const bool is_global = regexp.is_global(); |
5322 i++, chars_sampled++) { | 4993 |
5323 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i)); | 4994 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); |
5324 } | 4995 |
| 4996 // TODO(zerny): Frequency sampling is currently disabled because of several |
| 4997 // issues. We do not want to store subject strings in the regexp object since |
| 4998 // they might be long and we should not prevent their garbage collection. |
| 4999 // Passing them to this function explicitly does not help, since we must |
| 5000 // generate exactly the same IR for both the unoptimizing and optimizing |
| 5001 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
| 5002 // An option would be to store sampling results in the regexp object, but |
| 5003 // I'm not sure the performance gains are relevant enough. |
5325 | 5004 |
5326 // Wrap the body of the regexp in capture #0. | 5005 // Wrap the body of the regexp in capture #0. |
5327 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 5006 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
5328 0, | 5007 0, |
5329 &compiler, | 5008 &compiler, |
5330 compiler.accept()); | 5009 compiler.accept()); |
| 5010 |
5331 RegExpNode* node = captured_body; | 5011 RegExpNode* node = captured_body; |
5332 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 5012 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
5333 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 5013 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
5334 int max_length = data->tree->max_match(); | 5014 intptr_t max_length = data->tree->max_match(); |
5335 if (!is_start_anchored && !is_sticky) { | 5015 if (!is_start_anchored) { |
5336 // Add a .*? at the beginning, outside the body capture, unless | 5016 // Add a .*? at the beginning, outside the body capture, unless |
5337 // this expression is anchored at the beginning or sticky. | 5017 // this expression is anchored at the beginning. |
5338 RegExpNode* loop_node = | 5018 RegExpNode* loop_node = |
5339 RegExpQuantifier::ToNode(0, | 5019 RegExpQuantifier::ToNode(0, |
5340 RegExpTree::kInfinity, | 5020 RegExpTree::kInfinity, |
5341 false, | 5021 false, |
5342 new(zone) RegExpCharacterClass('*'), | 5022 new(isolate) RegExpCharacterClass('*'), |
5343 &compiler, | 5023 &compiler, |
5344 captured_body, | 5024 captured_body, |
5345 data->contains_anchor); | 5025 data->contains_anchor); |
5346 | 5026 |
5347 if (data->contains_anchor) { | 5027 if (data->contains_anchor) { |
5348 // Unroll loop once, to take care of the case that might start | 5028 // Unroll loop once, to take care of the case that might start |
5349 // at the start of input. | 5029 // at the start of input. |
5350 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 5030 ChoiceNode* first_step_node = new(isolate) ChoiceNode(2, isolate); |
5351 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 5031 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5352 first_step_node->AddAlternative(GuardedAlternative( | 5032 first_step_node->AddAlternative(GuardedAlternative( |
5353 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); | 5033 new(isolate) TextNode( |
| 5034 new(isolate) RegExpCharacterClass('*'), loop_node))); |
5354 node = first_step_node; | 5035 node = first_step_node; |
5355 } else { | 5036 } else { |
5356 node = loop_node; | 5037 node = loop_node; |
5357 } | 5038 } |
5358 } | 5039 } |
5359 if (is_one_byte) { | 5040 if (is_one_byte) { |
5360 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 5041 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5361 // Do it again to propagate the new nodes to places where they were not | 5042 // Do it again to propagate the new nodes to places where they were not |
5362 // put because they had not been calculated yet. | 5043 // put because they had not been calculated yet. |
5363 if (node != NULL) { | 5044 if (node != NULL) { |
5364 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 5045 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5365 } | 5046 } |
5366 } | 5047 } |
5367 | 5048 |
5368 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 5049 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate); |
5369 data->node = node; | 5050 data->node = node; |
5370 Analysis analysis(ignore_case, is_one_byte); | 5051 Analysis analysis(ignore_case, is_one_byte); |
5371 analysis.EnsureAnalyzed(node); | 5052 analysis.EnsureAnalyzed(node); |
5372 if (analysis.has_failed()) { | 5053 if (analysis.has_failed()) { |
5373 const char* error_message = analysis.error_message(); | 5054 const char* error_message = analysis.error_message(); |
5374 return CompilationResult(zone->isolate(), error_message); | 5055 return CompilationResult(error_message); |
5375 } | 5056 } |
5376 | 5057 |
5377 // Create the correct assembler for the architecture. | |
5378 #ifndef V8_INTERPRETED_REGEXP | |
5379 // Native regexp implementation. | 5058 // Native regexp implementation. |
5380 | 5059 |
5381 NativeRegExpMacroAssembler::Mode mode = | 5060 IRRegExpMacroAssembler* macro_assembler = |
5382 is_one_byte ? NativeRegExpMacroAssembler::LATIN1 | 5061 new(isolate) IRRegExpMacroAssembler(specialization_cid, |
5383 : NativeRegExpMacroAssembler::UC16; | 5062 data->capture_count, |
5384 | 5063 parsed_function, |
5385 #if V8_TARGET_ARCH_IA32 | 5064 ic_data_array, |
5386 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2, | 5065 isolate); |
5387 zone); | |
5388 #elif V8_TARGET_ARCH_X64 | |
5389 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2, | |
5390 zone); | |
5391 #elif V8_TARGET_ARCH_ARM | |
5392 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2, | |
5393 zone); | |
5394 #elif V8_TARGET_ARCH_ARM64 | |
5395 RegExpMacroAssemblerARM64 macro_assembler(mode, (data->capture_count + 1) * 2, | |
5396 zone); | |
5397 #elif V8_TARGET_ARCH_MIPS | |
5398 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2, | |
5399 zone); | |
5400 #elif V8_TARGET_ARCH_MIPS64 | |
5401 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2, | |
5402 zone); | |
5403 #elif V8_TARGET_ARCH_X87 | |
5404 RegExpMacroAssemblerX87 macro_assembler(mode, (data->capture_count + 1) * 2, | |
5405 zone); | |
5406 #else | |
5407 #error "Unsupported architecture" | |
5408 #endif | |
5409 | |
5410 #else // V8_INTERPRETED_REGEXP | |
5411 // Interpreted regexp implementation. | |
5412 EmbeddedVector<byte, 1024> codes; | |
5413 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone); | |
5414 #endif // V8_INTERPRETED_REGEXP | |
5415 | 5066 |
5416 // Inserted here, instead of in Assembler, because it depends on information | 5067 // Inserted here, instead of in Assembler, because it depends on information |
5417 // in the AST that isn't replicated in the Node structure. | 5068 // in the AST that isn't replicated in the Node structure. |
5418 static const int kMaxBacksearchLimit = 1024; | 5069 static const intptr_t kMaxBacksearchLimit = 1024; |
5419 if (is_end_anchored && | 5070 if (is_end_anchored && |
5420 !is_start_anchored && | 5071 !is_start_anchored && |
5421 max_length < kMaxBacksearchLimit) { | 5072 max_length < kMaxBacksearchLimit) { |
5422 macro_assembler.SetCurrentPositionFromEnd(max_length); | 5073 macro_assembler->SetCurrentPositionFromEnd(max_length); |
5423 } | 5074 } |
5424 | 5075 |
5425 if (is_global) { | 5076 if (is_global) { |
5426 macro_assembler.set_global_mode( | 5077 macro_assembler->set_global_mode( |
5427 (data->tree->min_match() > 0) | 5078 (data->tree->min_match() > 0) |
5428 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 5079 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
5429 : RegExpMacroAssembler::GLOBAL); | 5080 : RegExpMacroAssembler::GLOBAL); |
5430 } | 5081 } |
5431 | 5082 |
5432 return compiler.Assemble(¯o_assembler, | 5083 RegExpEngine::CompilationResult result = |
5433 node, | 5084 compiler.Assemble(macro_assembler, |
5434 data->capture_count, | 5085 node, |
5435 pattern); | 5086 data->capture_count, |
| 5087 pattern); |
| 5088 |
| 5089 if (FLAG_trace_irregexp) { |
| 5090 macro_assembler->PrintBlocks(); |
| 5091 } |
| 5092 |
| 5093 return result; |
5436 } | 5094 } |
5437 | 5095 |
| 5096 |
| 5097 static void CreateSpecializedFunction(Isolate* isolate, |
| 5098 const JSRegExp& regexp, |
| 5099 intptr_t specialization_cid, |
| 5100 const Object& owner) { |
| 5101 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
| 5102 |
| 5103 Function& fn = Function::Handle(isolate, Function::New( |
| 5104 Symbols::IrregExp(), |
| 5105 RawFunction::kIrregexpFunction, |
| 5106 true, // Static. |
| 5107 false, // Not const. |
| 5108 false, // Not abstract. |
| 5109 false, // Not external. |
| 5110 false, // Not native. |
| 5111 owner, |
| 5112 0)); // No token position. |
| 5113 |
| 5114 // TODO(zerny): Share these arrays between all irregexp functions. |
| 5115 fn.set_num_fixed_parameters(kParamCount); |
| 5116 fn.set_parameter_types(Array::Handle(isolate, Array::New(kParamCount, |
| 5117 Heap::kOld))); |
| 5118 fn.set_parameter_names(Array::Handle(isolate, Array::New(kParamCount, |
| 5119 Heap::kOld))); |
| 5120 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType())); |
| 5121 fn.SetParameterNameAt(0, Symbols::string_param()); |
| 5122 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType())); |
| 5123 fn.SetParameterNameAt(1, Symbols::start_index_param()); |
| 5124 fn.set_result_type(Type::Handle(isolate, Type::ArrayType())); |
| 5125 |
| 5126 // Cache the result. |
| 5127 regexp.set_function(specialization_cid, fn); |
| 5128 |
| 5129 fn.set_regexp(regexp); |
| 5130 fn.set_regexp_cid(specialization_cid); |
| 5131 |
| 5132 // The function is compiled lazily during the first call. |
| 5133 } |
| 5134 |
| 5135 |
| 5136 RawJSRegExp* RegExpEngine::CreateJSRegExp(Isolate* isolate, |
| 5137 const String& pattern, |
| 5138 bool multi_line, |
| 5139 bool ignore_case) { |
| 5140 const JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0)); |
| 5141 |
| 5142 regexp.set_pattern(pattern); |
| 5143 |
| 5144 if (multi_line) { |
| 5145 regexp.set_is_multi_line(); |
| 5146 } |
| 5147 if (ignore_case) { |
| 5148 regexp.set_is_ignore_case(); |
| 5149 } |
| 5150 |
| 5151 // TODO(zerny): We might want to use normal string searching algorithms |
| 5152 // for simple patterns. |
| 5153 regexp.set_is_complex(); |
| 5154 regexp.set_is_global(); // All dart regexps are global. |
| 5155 |
| 5156 const Library& lib = Library::Handle(isolate, Library::CoreLibrary()); |
| 5157 const Class& owner = Class::Handle( |
| 5158 isolate, lib.LookupClass(Symbols::RegExp())); |
| 5159 |
| 5160 CreateSpecializedFunction(isolate, regexp, kOneByteStringCid, owner); |
| 5161 CreateSpecializedFunction(isolate, regexp, kTwoByteStringCid, owner); |
| 5162 CreateSpecializedFunction(isolate, regexp, kExternalOneByteStringCid, owner); |
| 5163 CreateSpecializedFunction(isolate, regexp, kExternalTwoByteStringCid, owner); |
| 5164 |
| 5165 return regexp.raw(); |
| 5166 } |
| 5167 |
| 5168 |
5438 } // namespace dart | 5169 } // namespace dart |
OLD | NEW |