| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); | 120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); |
| 121 bool in_cache = !cached.is_null(); | 121 bool in_cache = !cached.is_null(); |
| 122 LOG(isolate, RegExpCompileEvent(re, in_cache)); | 122 LOG(isolate, RegExpCompileEvent(re, in_cache)); |
| 123 | 123 |
| 124 Handle<Object> result; | 124 Handle<Object> result; |
| 125 if (in_cache) { | 125 if (in_cache) { |
| 126 re->set_data(*cached); | 126 re->set_data(*cached); |
| 127 return re; | 127 return re; |
| 128 } | 128 } |
| 129 pattern = FlattenGetString(pattern); | 129 pattern = FlattenGetString(pattern); |
| 130 CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT); | 130 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); |
| 131 PostponeInterruptsScope postpone(isolate); | 131 PostponeInterruptsScope postpone(isolate); |
| 132 RegExpCompileData parse_result; | 132 RegExpCompileData parse_result; |
| 133 FlatStringReader reader(isolate, pattern); | 133 FlatStringReader reader(isolate, pattern); |
| 134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 135 &parse_result)) { | 135 &parse_result)) { |
| 136 // Throw an exception if we fail to parse the pattern. | 136 // Throw an exception if we fail to parse the pattern. |
| 137 ThrowRegExpException(re, | 137 ThrowRegExpException(re, |
| 138 pattern, | 138 pattern, |
| 139 parse_result.error, | 139 parse_result.error, |
| 140 "malformed_regexp"); | 140 "malformed_regexp"); |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 288 // from the source pattern. | 288 // from the source pattern. |
| 289 // If compilation fails, an exception is thrown and this function | 289 // If compilation fails, an exception is thrown and this function |
| 290 // returns false. | 290 // returns false. |
| 291 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { | 291 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { |
| 292 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); | 292 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); |
| 293 #ifdef V8_INTERPRETED_REGEXP | 293 #ifdef V8_INTERPRETED_REGEXP |
| 294 if (compiled_code->IsByteArray()) return true; | 294 if (compiled_code->IsByteArray()) return true; |
| 295 #else // V8_INTERPRETED_REGEXP (RegExp native code) | 295 #else // V8_INTERPRETED_REGEXP (RegExp native code) |
| 296 if (compiled_code->IsCode()) return true; | 296 if (compiled_code->IsCode()) return true; |
| 297 #endif | 297 #endif |
| 298 // We could potentially have marked this as flushable, but have kept |
| 299 // a saved version if we did not flush it yet. |
| 300 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
| 301 if (saved_code->IsCode()) { |
| 302 // Reinstate the code in the original place. |
| 303 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code); |
| 304 ASSERT(compiled_code->IsSmi()); |
| 305 return true; |
| 306 } |
| 298 return CompileIrregexp(re, is_ascii); | 307 return CompileIrregexp(re, is_ascii); |
| 299 } | 308 } |
| 300 | 309 |
| 301 | 310 |
| 311 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, |
| 312 bool is_ascii, |
| 313 Handle<String> error_message, |
| 314 Isolate* isolate) { |
| 315 Factory* factory = isolate->factory(); |
| 316 Handle<FixedArray> elements = factory->NewFixedArray(2); |
| 317 elements->set(0, re->Pattern()); |
| 318 elements->set(1, *error_message); |
| 319 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
| 320 Handle<Object> regexp_err = |
| 321 factory->NewSyntaxError("malformed_regexp", array); |
| 322 isolate->Throw(*regexp_err); |
| 323 return false; |
| 324 } |
| 325 |
| 326 |
| 302 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { | 327 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { |
| 303 // Compile the RegExp. | 328 // Compile the RegExp. |
| 304 Isolate* isolate = re->GetIsolate(); | 329 Isolate* isolate = re->GetIsolate(); |
| 305 CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT); | 330 ZoneScope zone_scope(isolate, DELETE_ON_EXIT); |
| 306 PostponeInterruptsScope postpone(isolate); | 331 PostponeInterruptsScope postpone(isolate); |
| 332 // If we had a compilation error the last time this is saved at the |
| 333 // saved code index. |
| 307 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); | 334 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); |
| 308 if (entry->IsJSObject()) { | 335 // When arriving here entry can only be a smi, either representing an |
| 309 // If it's a JSObject, a previous compilation failed and threw this object. | 336 // uncompiled regexp, a previous compilation error, or code that has |
| 310 // Re-throw the object without trying again. | 337 // been flushed. |
| 311 isolate->Throw(entry); | 338 ASSERT(entry->IsSmi()); |
| 339 int entry_value = Smi::cast(entry)->value(); |
| 340 ASSERT(entry_value == JSRegExp::kUninitializedValue || |
| 341 entry_value == JSRegExp::kCompilationErrorValue || |
| 342 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0)); |
| 343 |
| 344 if (entry_value == JSRegExp::kCompilationErrorValue) { |
| 345 // A previous compilation failed and threw an error which we store in |
| 346 // the saved code index (we store the error message, not the actual |
| 347 // error). Recreate the error object and throw it. |
| 348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
| 349 ASSERT(error_string->IsString()); |
| 350 Handle<String> error_message(String::cast(error_string)); |
| 351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
| 312 return false; | 352 return false; |
| 313 } | 353 } |
| 314 ASSERT(entry->IsTheHole()); | |
| 315 | 354 |
| 316 JSRegExp::Flags flags = re->GetFlags(); | 355 JSRegExp::Flags flags = re->GetFlags(); |
| 317 | 356 |
| 318 Handle<String> pattern(re->Pattern()); | 357 Handle<String> pattern(re->Pattern()); |
| 319 if (!pattern->IsFlat()) { | 358 if (!pattern->IsFlat()) { |
| 320 FlattenString(pattern); | 359 FlattenString(pattern); |
| 321 } | 360 } |
| 322 | 361 |
| 323 RegExpCompileData compile_data; | 362 RegExpCompileData compile_data; |
| 324 FlatStringReader reader(isolate, pattern); | 363 FlatStringReader reader(isolate, pattern); |
| 325 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 326 &compile_data)) { | 365 &compile_data)) { |
| 327 // Throw an exception if we fail to parse the pattern. | 366 // Throw an exception if we fail to parse the pattern. |
| 328 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
| 329 ThrowRegExpException(re, | 368 ThrowRegExpException(re, |
| 330 pattern, | 369 pattern, |
| 331 compile_data.error, | 370 compile_data.error, |
| 332 "malformed_regexp"); | 371 "malformed_regexp"); |
| 333 return false; | 372 return false; |
| 334 } | 373 } |
| 335 RegExpEngine::CompilationResult result = | 374 RegExpEngine::CompilationResult result = |
| 336 RegExpEngine::Compile(&compile_data, | 375 RegExpEngine::Compile(&compile_data, |
| 337 flags.is_ignore_case(), | 376 flags.is_ignore_case(), |
| 338 flags.is_multiline(), | 377 flags.is_multiline(), |
| 339 pattern, | 378 pattern, |
| 340 is_ascii); | 379 is_ascii); |
| 341 if (result.error_message != NULL) { | 380 if (result.error_message != NULL) { |
| 342 // Unable to compile regexp. | 381 // Unable to compile regexp. |
| 343 Factory* factory = isolate->factory(); | |
| 344 Handle<FixedArray> elements = factory->NewFixedArray(2); | |
| 345 elements->set(0, *pattern); | |
| 346 Handle<String> error_message = | 382 Handle<String> error_message = |
| 347 factory->NewStringFromUtf8(CStrVector(result.error_message)); | 383 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message)); |
| 348 elements->set(1, *error_message); | 384 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
| 349 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); | |
| 350 Handle<Object> regexp_err = | |
| 351 factory->NewSyntaxError("malformed_regexp", array); | |
| 352 isolate->Throw(*regexp_err); | |
| 353 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); | |
| 354 return false; | 385 return false; |
| 355 } | 386 } |
| 356 | 387 |
| 357 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); | 388 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); |
| 358 data->set(JSRegExp::code_index(is_ascii), result.code); | 389 data->set(JSRegExp::code_index(is_ascii), result.code); |
| 359 int register_max = IrregexpMaxRegisterCount(*data); | 390 int register_max = IrregexpMaxRegisterCount(*data); |
| 360 if (result.num_registers > register_max) { | 391 if (result.num_registers > register_max) { |
| 361 SetIrregexpMaxRegisterCount(*data, result.num_registers); | 392 SetIrregexpMaxRegisterCount(*data, result.num_registers); |
| 362 } | 393 } |
| 363 | 394 |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 453 | 484 |
| 454 // A flat ASCII string might have a two-byte first part. | 485 // A flat ASCII string might have a two-byte first part. |
| 455 if (subject->IsConsString()) { | 486 if (subject->IsConsString()) { |
| 456 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); | 487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); |
| 457 } | 488 } |
| 458 | 489 |
| 459 #ifndef V8_INTERPRETED_REGEXP | 490 #ifndef V8_INTERPRETED_REGEXP |
| 460 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 461 do { | 492 do { |
| 462 bool is_ascii = subject->IsAsciiRepresentation(); | 493 bool is_ascii = subject->IsAsciiRepresentation(); |
| 494 EnsureCompiledIrregexp(regexp, is_ascii); |
| 463 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| 464 NativeRegExpMacroAssembler::Result res = | 496 NativeRegExpMacroAssembler::Result res = |
| 465 NativeRegExpMacroAssembler::Match(code, | 497 NativeRegExpMacroAssembler::Match(code, |
| 466 subject, | 498 subject, |
| 467 output.start(), | 499 output.start(), |
| 468 output.length(), | 500 output.length(), |
| 469 index, | 501 index, |
| 470 isolate); | 502 isolate); |
| 471 if (res != NativeRegExpMacroAssembler::RETRY) { | 503 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 472 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| (...skipping 330 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 803 static const int kMaxRecursion = 100; | 835 static const int kMaxRecursion = 100; |
| 804 inline int recursion_depth() { return recursion_depth_; } | 836 inline int recursion_depth() { return recursion_depth_; } |
| 805 inline void IncrementRecursionDepth() { recursion_depth_++; } | 837 inline void IncrementRecursionDepth() { recursion_depth_++; } |
| 806 inline void DecrementRecursionDepth() { recursion_depth_--; } | 838 inline void DecrementRecursionDepth() { recursion_depth_--; } |
| 807 | 839 |
| 808 void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 840 void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
| 809 | 841 |
| 810 inline bool ignore_case() { return ignore_case_; } | 842 inline bool ignore_case() { return ignore_case_; } |
| 811 inline bool ascii() { return ascii_; } | 843 inline bool ascii() { return ascii_; } |
| 812 | 844 |
| 845 int current_expansion_factor() { return current_expansion_factor_; } |
| 846 void set_current_expansion_factor(int value) { |
| 847 current_expansion_factor_ = value; |
| 848 } |
| 849 |
| 813 static const int kNoRegister = -1; | 850 static const int kNoRegister = -1; |
| 851 |
| 814 private: | 852 private: |
| 815 EndNode* accept_; | 853 EndNode* accept_; |
| 816 int next_register_; | 854 int next_register_; |
| 817 List<RegExpNode*>* work_list_; | 855 List<RegExpNode*>* work_list_; |
| 818 int recursion_depth_; | 856 int recursion_depth_; |
| 819 RegExpMacroAssembler* macro_assembler_; | 857 RegExpMacroAssembler* macro_assembler_; |
| 820 bool ignore_case_; | 858 bool ignore_case_; |
| 821 bool ascii_; | 859 bool ascii_; |
| 822 bool reg_exp_too_big_; | 860 bool reg_exp_too_big_; |
| 861 int current_expansion_factor_; |
| 823 }; | 862 }; |
| 824 | 863 |
| 825 | 864 |
| 826 class RecursionCheck { | 865 class RecursionCheck { |
| 827 public: | 866 public: |
| 828 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 867 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 829 compiler->IncrementRecursionDepth(); | 868 compiler->IncrementRecursionDepth(); |
| 830 } | 869 } |
| 831 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 870 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 832 private: | 871 private: |
| 833 RegExpCompiler* compiler_; | 872 RegExpCompiler* compiler_; |
| 834 }; | 873 }; |
| 835 | 874 |
| 836 | 875 |
| 837 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 876 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
| 838 return RegExpEngine::CompilationResult("RegExp too big"); | 877 return RegExpEngine::CompilationResult("RegExp too big"); |
| 839 } | 878 } |
| 840 | 879 |
| 841 | 880 |
| 842 // Attempts to compile the regexp using an Irregexp code generator. Returns | 881 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 843 // a fixed array or a null handle depending on whether it succeeded. | 882 // a fixed array or a null handle depending on whether it succeeded. |
| 844 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 883 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
| 845 : next_register_(2 * (capture_count + 1)), | 884 : next_register_(2 * (capture_count + 1)), |
| 846 work_list_(NULL), | 885 work_list_(NULL), |
| 847 recursion_depth_(0), | 886 recursion_depth_(0), |
| 848 ignore_case_(ignore_case), | 887 ignore_case_(ignore_case), |
| 849 ascii_(ascii), | 888 ascii_(ascii), |
| 850 reg_exp_too_big_(false) { | 889 reg_exp_too_big_(false), |
| 890 current_expansion_factor_(1) { |
| 851 accept_ = new EndNode(EndNode::ACCEPT); | 891 accept_ = new EndNode(EndNode::ACCEPT); |
| 852 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 892 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
| 853 } | 893 } |
| 854 | 894 |
| 855 | 895 |
| 856 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 896 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 857 RegExpMacroAssembler* macro_assembler, | 897 RegExpMacroAssembler* macro_assembler, |
| 858 RegExpNode* start, | 898 RegExpNode* start, |
| 859 int capture_count, | 899 int capture_count, |
| 860 Handle<String> pattern) { | 900 Handle<String> pattern) { |
| (...skipping 1899 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2760 ~AlternativeGenerationList() { | 2800 ~AlternativeGenerationList() { |
| 2761 for (int i = kAFew; i < alt_gens_.length(); i++) { | 2801 for (int i = kAFew; i < alt_gens_.length(); i++) { |
| 2762 delete alt_gens_[i]; | 2802 delete alt_gens_[i]; |
| 2763 alt_gens_[i] = NULL; | 2803 alt_gens_[i] = NULL; |
| 2764 } | 2804 } |
| 2765 } | 2805 } |
| 2766 | 2806 |
| 2767 AlternativeGeneration* at(int i) { | 2807 AlternativeGeneration* at(int i) { |
| 2768 return alt_gens_[i]; | 2808 return alt_gens_[i]; |
| 2769 } | 2809 } |
| 2810 |
| 2770 private: | 2811 private: |
| 2771 static const int kAFew = 10; | 2812 static const int kAFew = 10; |
| 2772 ZoneList<AlternativeGeneration*> alt_gens_; | 2813 ZoneList<AlternativeGeneration*> alt_gens_; |
| 2773 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2814 AlternativeGeneration a_few_alt_gens_[kAFew]; |
| 2774 }; | 2815 }; |
| 2775 | 2816 |
| 2776 | 2817 |
| 2777 /* Code generation for choice nodes. | 2818 /* Code generation for choice nodes. |
| 2778 * | 2819 * |
| 2779 * We generate quick checks that do a mask and compare to eliminate a | 2820 * We generate quick checks that do a mask and compare to eliminate a |
| (...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3319 int priority = 0; | 3360 int priority = 0; |
| 3320 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { | 3361 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 3321 if (out_set->Get(i)) { | 3362 if (out_set->Get(i)) { |
| 3322 if (priority > 0) stream()->Add("|"); | 3363 if (priority > 0) stream()->Add("|"); |
| 3323 stream()->Add("<s%io%i> %i", from, i, priority); | 3364 stream()->Add("<s%io%i> %i", from, i, priority); |
| 3324 priority++; | 3365 priority++; |
| 3325 } | 3366 } |
| 3326 } | 3367 } |
| 3327 stream()->Add("}}"); | 3368 stream()->Add("}}"); |
| 3328 } | 3369 } |
| 3370 |
| 3329 private: | 3371 private: |
| 3330 bool first_; | 3372 bool first_; |
| 3331 StringStream* stream() { return stream_; } | 3373 StringStream* stream() { return stream_; } |
| 3332 StringStream* stream_; | 3374 StringStream* stream_; |
| 3333 }; | 3375 }; |
| 3334 | 3376 |
| 3335 | 3377 |
| 3336 class AttributePrinter { | 3378 class AttributePrinter { |
| 3337 public: | 3379 public: |
| 3338 explicit AttributePrinter(DotPrinter* out) | 3380 explicit AttributePrinter(DotPrinter* out) |
| (...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3720 RegExpNode* on_success) { | 3762 RegExpNode* on_success) { |
| 3721 return ToNode(min(), | 3763 return ToNode(min(), |
| 3722 max(), | 3764 max(), |
| 3723 is_greedy(), | 3765 is_greedy(), |
| 3724 body(), | 3766 body(), |
| 3725 compiler, | 3767 compiler, |
| 3726 on_success); | 3768 on_success); |
| 3727 } | 3769 } |
| 3728 | 3770 |
| 3729 | 3771 |
| 3772 // Scoped object to keep track of how much we unroll quantifier loops in the |
| 3773 // regexp graph generator. |
| 3774 class RegExpExpansionLimiter { |
| 3775 public: |
| 3776 static const int kMaxExpansionFactor = 6; |
| 3777 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) |
| 3778 : compiler_(compiler), |
| 3779 saved_expansion_factor_(compiler->current_expansion_factor()), |
| 3780 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { |
| 3781 ASSERT(factor > 0); |
| 3782 if (ok_to_expand_) { |
| 3783 if (factor > kMaxExpansionFactor) { |
| 3784 // Avoid integer overflow of the current expansion factor. |
| 3785 ok_to_expand_ = false; |
| 3786 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); |
| 3787 } else { |
| 3788 int new_factor = saved_expansion_factor_ * factor; |
| 3789 ok_to_expand_ = (new_factor <= kMaxExpansionFactor); |
| 3790 compiler->set_current_expansion_factor(new_factor); |
| 3791 } |
| 3792 } |
| 3793 } |
| 3794 |
| 3795 ~RegExpExpansionLimiter() { |
| 3796 compiler_->set_current_expansion_factor(saved_expansion_factor_); |
| 3797 } |
| 3798 |
| 3799 bool ok_to_expand() { return ok_to_expand_; } |
| 3800 |
| 3801 private: |
| 3802 RegExpCompiler* compiler_; |
| 3803 int saved_expansion_factor_; |
| 3804 bool ok_to_expand_; |
| 3805 |
| 3806 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
| 3807 }; |
| 3808 |
| 3809 |
| 3730 RegExpNode* RegExpQuantifier::ToNode(int min, | 3810 RegExpNode* RegExpQuantifier::ToNode(int min, |
| 3731 int max, | 3811 int max, |
| 3732 bool is_greedy, | 3812 bool is_greedy, |
| 3733 RegExpTree* body, | 3813 RegExpTree* body, |
| 3734 RegExpCompiler* compiler, | 3814 RegExpCompiler* compiler, |
| 3735 RegExpNode* on_success, | 3815 RegExpNode* on_success, |
| 3736 bool not_at_start) { | 3816 bool not_at_start) { |
| 3737 // x{f, t} becomes this: | 3817 // x{f, t} becomes this: |
| 3738 // | 3818 // |
| 3739 // (r++)<-. | 3819 // (r++)<-. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 3759 if (max == 0) return on_success; // This can happen due to recursion. | 3839 if (max == 0) return on_success; // This can happen due to recursion. |
| 3760 bool body_can_be_empty = (body->min_match() == 0); | 3840 bool body_can_be_empty = (body->min_match() == 0); |
| 3761 int body_start_reg = RegExpCompiler::kNoRegister; | 3841 int body_start_reg = RegExpCompiler::kNoRegister; |
| 3762 Interval capture_registers = body->CaptureRegisters(); | 3842 Interval capture_registers = body->CaptureRegisters(); |
| 3763 bool needs_capture_clearing = !capture_registers.is_empty(); | 3843 bool needs_capture_clearing = !capture_registers.is_empty(); |
| 3764 if (body_can_be_empty) { | 3844 if (body_can_be_empty) { |
| 3765 body_start_reg = compiler->AllocateRegister(); | 3845 body_start_reg = compiler->AllocateRegister(); |
| 3766 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { | 3846 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
| 3767 // Only unroll if there are no captures and the body can't be | 3847 // Only unroll if there are no captures and the body can't be |
| 3768 // empty. | 3848 // empty. |
| 3769 if (min > 0 && min <= kMaxUnrolledMinMatches) { | 3849 { |
| 3770 int new_max = (max == kInfinity) ? max : max - min; | 3850 RegExpExpansionLimiter limiter( |
| 3771 // Recurse once to get the loop or optional matches after the fixed ones. | 3851 compiler, min + ((max != min) ? 1 : 0)); |
| 3772 RegExpNode* answer = ToNode( | 3852 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
| 3773 0, new_max, is_greedy, body, compiler, on_success, true); | 3853 int new_max = (max == kInfinity) ? max : max - min; |
| 3774 // Unroll the forced matches from 0 to min. This can cause chains of | 3854 // Recurse once to get the loop or optional matches after the fixed |
| 3775 // TextNodes (which the parser does not generate). These should be | 3855 // ones. |
| 3776 // combined if it turns out they hinder good code generation. | 3856 RegExpNode* answer = ToNode( |
| 3777 for (int i = 0; i < min; i++) { | 3857 0, new_max, is_greedy, body, compiler, on_success, true); |
| 3778 answer = body->ToNode(compiler, answer); | 3858 // Unroll the forced matches from 0 to min. This can cause chains of |
| 3859 // TextNodes (which the parser does not generate). These should be |
| 3860 // combined if it turns out they hinder good code generation. |
| 3861 for (int i = 0; i < min; i++) { |
| 3862 answer = body->ToNode(compiler, answer); |
| 3863 } |
| 3864 return answer; |
| 3779 } | 3865 } |
| 3780 return answer; | |
| 3781 } | 3866 } |
| 3782 if (max <= kMaxUnrolledMaxMatches) { | 3867 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
| 3783 ASSERT(min == 0); | 3868 ASSERT(max > 0); // Due to the 'if' above. |
| 3784 // Unroll the optional matches up to max. | 3869 RegExpExpansionLimiter limiter(compiler, max); |
| 3785 RegExpNode* answer = on_success; | 3870 if (limiter.ok_to_expand()) { |
| 3786 for (int i = 0; i < max; i++) { | 3871 // Unroll the optional matches up to max. |
| 3787 ChoiceNode* alternation = new ChoiceNode(2); | 3872 RegExpNode* answer = on_success; |
| 3788 if (is_greedy) { | 3873 for (int i = 0; i < max; i++) { |
| 3789 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3874 ChoiceNode* alternation = new ChoiceNode(2); |
| 3790 answer))); | 3875 if (is_greedy) { |
| 3791 alternation->AddAlternative(GuardedAlternative(on_success)); | 3876 alternation->AddAlternative( |
| 3792 } else { | 3877 GuardedAlternative(body->ToNode(compiler, answer))); |
| 3793 alternation->AddAlternative(GuardedAlternative(on_success)); | 3878 alternation->AddAlternative(GuardedAlternative(on_success)); |
| 3794 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3879 } else { |
| 3795 answer))); | 3880 alternation->AddAlternative(GuardedAlternative(on_success)); |
| 3881 alternation->AddAlternative( |
| 3882 GuardedAlternative(body->ToNode(compiler, answer))); |
| 3883 } |
| 3884 answer = alternation; |
| 3885 if (not_at_start) alternation->set_not_at_start(); |
| 3796 } | 3886 } |
| 3797 answer = alternation; | 3887 return answer; |
| 3798 if (not_at_start) alternation->set_not_at_start(); | |
| 3799 } | 3888 } |
| 3800 return answer; | |
| 3801 } | 3889 } |
| 3802 } | 3890 } |
| 3803 bool has_min = min > 0; | 3891 bool has_min = min > 0; |
| 3804 bool has_max = max < RegExpTree::kInfinity; | 3892 bool has_max = max < RegExpTree::kInfinity; |
| 3805 bool needs_counter = has_min || has_max; | 3893 bool needs_counter = has_min || has_max; |
| 3806 int reg_ctr = needs_counter | 3894 int reg_ctr = needs_counter |
| 3807 ? compiler->AllocateRegister() | 3895 ? compiler->AllocateRegister() |
| 3808 : RegExpCompiler::kNoRegister; | 3896 : RegExpCompiler::kNoRegister; |
| 3809 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 3897 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); |
| 3810 if (not_at_start) center->set_not_at_start(); | 3898 if (not_at_start) center->set_not_at_start(); |
| (...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4116 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4204 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
| 4117 for (int i = 0; i < overlay.length(); i += 2) { | 4205 for (int i = 0; i < overlay.length(); i += 2) { |
| 4118 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4206 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
| 4119 CharacterRangeSplitter::kInOverlay); | 4207 CharacterRangeSplitter::kInOverlay); |
| 4120 } | 4208 } |
| 4121 CharacterRangeSplitter callback(included, excluded); | 4209 CharacterRangeSplitter callback(included, excluded); |
| 4122 table.ForEach(&callback); | 4210 table.ForEach(&callback); |
| 4123 } | 4211 } |
| 4124 | 4212 |
| 4125 | 4213 |
| 4126 static void AddUncanonicals(Isolate* isolate, | |
| 4127 ZoneList<CharacterRange>* ranges, | |
| 4128 int bottom, | |
| 4129 int top); | |
| 4130 | |
| 4131 | |
| 4132 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4214 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| 4133 bool is_ascii) { | 4215 bool is_ascii) { |
| 4134 Isolate* isolate = Isolate::Current(); | 4216 Isolate* isolate = Isolate::Current(); |
| 4135 uc16 bottom = from(); | 4217 uc16 bottom = from(); |
| 4136 uc16 top = to(); | 4218 uc16 top = to(); |
| 4137 if (is_ascii) { | 4219 if (is_ascii) { |
| 4138 if (bottom > String::kMaxAsciiCharCode) return; | 4220 if (bottom > String::kMaxAsciiCharCode) return; |
| 4139 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4221 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
| 4140 } | 4222 } |
| 4141 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4223 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4282 result.SetElementsInSecondSet(); | 4364 result.SetElementsInSecondSet(); |
| 4283 } else if (j < range->length()) { | 4365 } else if (j < range->length()) { |
| 4284 // Argument range contains something not in word range. | 4366 // Argument range contains something not in word range. |
| 4285 result.SetElementsInFirstSet(); | 4367 result.SetElementsInFirstSet(); |
| 4286 } | 4368 } |
| 4287 | 4369 |
| 4288 return result; | 4370 return result; |
| 4289 } | 4371 } |
| 4290 | 4372 |
| 4291 | 4373 |
| 4292 static void AddUncanonicals(Isolate* isolate, | |
| 4293 ZoneList<CharacterRange>* ranges, | |
| 4294 int bottom, | |
| 4295 int top) { | |
| 4296 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 4297 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | |
| 4298 // this table is correct. | |
| 4299 // 0x0600 - 0x0fff | |
| 4300 // 0x1100 - 0x1cff | |
| 4301 // 0x2000 - 0x20ff | |
| 4302 // 0x2200 - 0x23ff | |
| 4303 // 0x2500 - 0x2bff | |
| 4304 // 0x2e00 - 0xa5ff | |
| 4305 // 0xa800 - 0xfaff | |
| 4306 // 0xfc00 - 0xfeff | |
| 4307 const int boundary_count = 18; | |
| 4308 int boundaries[] = { | |
| 4309 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, | |
| 4310 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; | |
| 4311 | |
| 4312 // Special ASCII rule from spec can save us some work here. | |
| 4313 if (bottom == 0x80 && top == 0xffff) return; | |
| 4314 | |
| 4315 if (top <= boundaries[0]) { | |
| 4316 CharacterRange range(bottom, top); | |
| 4317 range.AddCaseEquivalents(ranges, false); | |
| 4318 return; | |
| 4319 } | |
| 4320 | |
| 4321 // Split up very large ranges. This helps remove ranges where there are no | |
| 4322 // case mappings. | |
| 4323 for (int i = 0; i < boundary_count; i++) { | |
| 4324 if (bottom < boundaries[i] && top >= boundaries[i]) { | |
| 4325 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); | |
| 4326 AddUncanonicals(isolate, ranges, boundaries[i], top); | |
| 4327 return; | |
| 4328 } | |
| 4329 } | |
| 4330 | |
| 4331 // If we are completely in a zone with no case mappings then we are done. | |
| 4332 for (int i = 0; i < boundary_count; i += 2) { | |
| 4333 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | |
| 4334 #ifdef DEBUG | |
| 4335 for (int j = bottom; j <= top; j++) { | |
| 4336 unsigned current_char = j; | |
| 4337 int length = isolate->jsregexp_uncanonicalize()->get(current_char, | |
| 4338 '\0', chars); | |
| 4339 for (int k = 0; k < length; k++) { | |
| 4340 ASSERT(chars[k] == current_char); | |
| 4341 } | |
| 4342 } | |
| 4343 #endif | |
| 4344 return; | |
| 4345 } | |
| 4346 } | |
| 4347 | |
| 4348 // Step through the range finding equivalent characters. | |
| 4349 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | |
| 4350 for (int i = bottom; i <= top; i++) { | |
| 4351 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); | |
| 4352 for (int j = 0; j < length; j++) { | |
| 4353 uc32 chr = chars[j]; | |
| 4354 if (chr != i && (chr < bottom || chr > top)) { | |
| 4355 characters->Add(chr); | |
| 4356 } | |
| 4357 } | |
| 4358 } | |
| 4359 | |
| 4360 // Step through the equivalent characters finding simple ranges and | |
| 4361 // adding ranges to the character class. | |
| 4362 if (characters->length() > 0) { | |
| 4363 int new_from = characters->at(0); | |
| 4364 int new_to = new_from; | |
| 4365 for (int i = 1; i < characters->length(); i++) { | |
| 4366 int chr = characters->at(i); | |
| 4367 if (chr == new_to + 1) { | |
| 4368 new_to++; | |
| 4369 } else { | |
| 4370 if (new_to == new_from) { | |
| 4371 ranges->Add(CharacterRange::Singleton(new_from)); | |
| 4372 } else { | |
| 4373 ranges->Add(CharacterRange(new_from, new_to)); | |
| 4374 } | |
| 4375 new_from = new_to = chr; | |
| 4376 } | |
| 4377 } | |
| 4378 if (new_to == new_from) { | |
| 4379 ranges->Add(CharacterRange::Singleton(new_from)); | |
| 4380 } else { | |
| 4381 ranges->Add(CharacterRange(new_from, new_to)); | |
| 4382 } | |
| 4383 } | |
| 4384 } | |
| 4385 | |
| 4386 | |
| 4387 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4374 ZoneList<CharacterRange>* CharacterSet::ranges() { |
| 4388 if (ranges_ == NULL) { | 4375 if (ranges_ == NULL) { |
| 4389 ranges_ = new ZoneList<CharacterRange>(2); | 4376 ranges_ = new ZoneList<CharacterRange>(2); |
| 4390 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4377 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
| 4391 } | 4378 } |
| 4392 return ranges_; | 4379 return ranges_; |
| 4393 } | 4380 } |
| 4394 | 4381 |
| 4395 | 4382 |
| 4396 // Move a number of elements in a zonelist to another position | 4383 // Move a number of elements in a zonelist to another position |
| (...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5376 } | 5363 } |
| 5377 | 5364 |
| 5378 return compiler.Assemble(¯o_assembler, | 5365 return compiler.Assemble(¯o_assembler, |
| 5379 node, | 5366 node, |
| 5380 data->capture_count, | 5367 data->capture_count, |
| 5381 pattern); | 5368 pattern); |
| 5382 } | 5369 } |
| 5383 | 5370 |
| 5384 | 5371 |
| 5385 }} // namespace v8::internal | 5372 }} // namespace v8::internal |
| OLD | NEW |