OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 792 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
803 static const int kMaxRecursion = 100; | 803 static const int kMaxRecursion = 100; |
804 inline int recursion_depth() { return recursion_depth_; } | 804 inline int recursion_depth() { return recursion_depth_; } |
805 inline void IncrementRecursionDepth() { recursion_depth_++; } | 805 inline void IncrementRecursionDepth() { recursion_depth_++; } |
806 inline void DecrementRecursionDepth() { recursion_depth_--; } | 806 inline void DecrementRecursionDepth() { recursion_depth_--; } |
807 | 807 |
808 void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 808 void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
809 | 809 |
810 inline bool ignore_case() { return ignore_case_; } | 810 inline bool ignore_case() { return ignore_case_; } |
811 inline bool ascii() { return ascii_; } | 811 inline bool ascii() { return ascii_; } |
812 | 812 |
813 int current_expansion_factor() { return current_expansion_factor_; } | |
814 void set_current_expansion_factor(int value) { | |
815 current_expansion_factor_ = value; | |
816 } | |
817 | |
813 static const int kNoRegister = -1; | 818 static const int kNoRegister = -1; |
814 | 819 |
815 private: | 820 private: |
816 EndNode* accept_; | 821 EndNode* accept_; |
817 int next_register_; | 822 int next_register_; |
818 List<RegExpNode*>* work_list_; | 823 List<RegExpNode*>* work_list_; |
819 int recursion_depth_; | 824 int recursion_depth_; |
820 RegExpMacroAssembler* macro_assembler_; | 825 RegExpMacroAssembler* macro_assembler_; |
821 bool ignore_case_; | 826 bool ignore_case_; |
822 bool ascii_; | 827 bool ascii_; |
823 bool reg_exp_too_big_; | 828 bool reg_exp_too_big_; |
829 int current_expansion_factor_; | |
824 }; | 830 }; |
825 | 831 |
826 | 832 |
827 class RecursionCheck { | 833 class RecursionCheck { |
828 public: | 834 public: |
829 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 835 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
830 compiler->IncrementRecursionDepth(); | 836 compiler->IncrementRecursionDepth(); |
831 } | 837 } |
832 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 838 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
833 private: | 839 private: |
834 RegExpCompiler* compiler_; | 840 RegExpCompiler* compiler_; |
835 }; | 841 }; |
836 | 842 |
837 | 843 |
838 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 844 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
839 return RegExpEngine::CompilationResult("RegExp too big"); | 845 return RegExpEngine::CompilationResult("RegExp too big"); |
840 } | 846 } |
841 | 847 |
842 | 848 |
843 // Attempts to compile the regexp using an Irregexp code generator. Returns | 849 // Attempts to compile the regexp using an Irregexp code generator. Returns |
844 // a fixed array or a null handle depending on whether it succeeded. | 850 // a fixed array or a null handle depending on whether it succeeded. |
845 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) | 851 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
846 : next_register_(2 * (capture_count + 1)), | 852 : next_register_(2 * (capture_count + 1)), |
847 work_list_(NULL), | 853 work_list_(NULL), |
848 recursion_depth_(0), | 854 recursion_depth_(0), |
849 ignore_case_(ignore_case), | 855 ignore_case_(ignore_case), |
850 ascii_(ascii), | 856 ascii_(ascii), |
851 reg_exp_too_big_(false) { | 857 reg_exp_too_big_(false), |
858 current_expansion_factor_(1) { | |
852 accept_ = new EndNode(EndNode::ACCEPT); | 859 accept_ = new EndNode(EndNode::ACCEPT); |
853 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 860 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
854 } | 861 } |
855 | 862 |
856 | 863 |
857 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 864 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
858 RegExpMacroAssembler* macro_assembler, | 865 RegExpMacroAssembler* macro_assembler, |
859 RegExpNode* start, | 866 RegExpNode* start, |
860 int capture_count, | 867 int capture_count, |
861 Handle<String> pattern) { | 868 Handle<String> pattern) { |
(...skipping 2861 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3723 RegExpNode* on_success) { | 3730 RegExpNode* on_success) { |
3724 return ToNode(min(), | 3731 return ToNode(min(), |
3725 max(), | 3732 max(), |
3726 is_greedy(), | 3733 is_greedy(), |
3727 body(), | 3734 body(), |
3728 compiler, | 3735 compiler, |
3729 on_success); | 3736 on_success); |
3730 } | 3737 } |
3731 | 3738 |
3732 | 3739 |
3740 // Scoped object to keep track of how much we unroll quantifier loops in the | |
3741 // regexp graph generator. | |
3742 class RegExpExpansionLimiter { | |
3743 public: | |
3744 static const int kMaxExpansionFactor = 6; | |
3745 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) | |
3746 : compiler_(compiler), | |
3747 saved_expansion_factor_(compiler->current_expansion_factor()), | |
3748 ok_to_expand_(true) { | |
Lasse Reichstein
2011/06/17 07:18:49
How about initializing this as:
ok_to_expand_(sa
Erik Corry
2011/06/17 07:31:14
Done.
| |
3749 ASSERT(factor > 0); | |
3750 if (factor > kMaxExpansionFactor) { | |
Lasse Reichstein
2011/06/17 07:18:49
Add comment saying that this prevents overflow in
Erik Corry
2011/06/17 07:31:14
Done.
| |
3751 ok_to_expand_ = false; | |
3752 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); | |
3753 } else { | |
3754 int new_factor = saved_expansion_factor_ * factor; | |
3755 if (new_factor > kMaxExpansionFactor) { | |
3756 ok_to_expand_ = false; | |
3757 } | |
Lasse Reichstein
2011/06/17 07:18:49
Remove the if and just always assing:
ok_to_expan
Erik Corry
2011/06/17 07:31:14
Done.
| |
3758 compiler->set_current_expansion_factor(new_factor); | |
3759 } | |
3760 } | |
3761 | |
3762 ~RegExpExpansionLimiter() { | |
3763 compiler_->set_current_expansion_factor(saved_expansion_factor_); | |
3764 } | |
3765 | |
3766 bool ok_to_expand() { return ok_to_expand_; } | |
3767 | |
3768 private: | |
3769 RegExpCompiler* compiler_; | |
3770 int saved_expansion_factor_; | |
3771 bool ok_to_expand_; | |
3772 | |
3773 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | |
3774 }; | |
3775 | |
3776 | |
3733 RegExpNode* RegExpQuantifier::ToNode(int min, | 3777 RegExpNode* RegExpQuantifier::ToNode(int min, |
3734 int max, | 3778 int max, |
3735 bool is_greedy, | 3779 bool is_greedy, |
3736 RegExpTree* body, | 3780 RegExpTree* body, |
3737 RegExpCompiler* compiler, | 3781 RegExpCompiler* compiler, |
3738 RegExpNode* on_success, | 3782 RegExpNode* on_success, |
3739 bool not_at_start) { | 3783 bool not_at_start) { |
3740 // x{f, t} becomes this: | 3784 // x{f, t} becomes this: |
3741 // | 3785 // |
3742 // (r++)<-. | 3786 // (r++)<-. |
(...skipping 16 matching lines...) Expand all Loading... | |
3759 // this case. | 3803 // this case. |
3760 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} | 3804 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} |
3761 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} | 3805 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} |
3762 if (max == 0) return on_success; // This can happen due to recursion. | 3806 if (max == 0) return on_success; // This can happen due to recursion. |
3763 bool body_can_be_empty = (body->min_match() == 0); | 3807 bool body_can_be_empty = (body->min_match() == 0); |
3764 int body_start_reg = RegExpCompiler::kNoRegister; | 3808 int body_start_reg = RegExpCompiler::kNoRegister; |
3765 Interval capture_registers = body->CaptureRegisters(); | 3809 Interval capture_registers = body->CaptureRegisters(); |
3766 bool needs_capture_clearing = !capture_registers.is_empty(); | 3810 bool needs_capture_clearing = !capture_registers.is_empty(); |
3767 if (body_can_be_empty) { | 3811 if (body_can_be_empty) { |
3768 body_start_reg = compiler->AllocateRegister(); | 3812 body_start_reg = compiler->AllocateRegister(); |
3769 } else if (FLAG_regexp_optimization && | 3813 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
3770 !body->ContainsExpandedQuantifier() && | |
3771 !needs_capture_clearing) { | |
3772 // Only unroll if there are no captures and the body can't be | 3814 // Only unroll if there are no captures and the body can't be |
3773 // empty. | 3815 // empty. |
3774 if (min > 0 && min <= kMaxUnrolledMinMatches) { | 3816 { |
3775 int new_max = (max == kInfinity) ? max : max - min; | 3817 RegExpExpansionLimiter limiter( |
3776 // Recurse once to get the loop or optional matches after the fixed ones. | 3818 compiler, min + ((max - min) ? 1 : 0)); |
Lasse Reichstein
2011/06/17 07:18:49
I just noticed the using of (max - min) as a boole
Erik Corry
2011/06/17 07:31:14
max - min replaced with max != min
| |
3777 RegExpNode* answer = ToNode( | 3819 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
3778 0, new_max, is_greedy, body, compiler, on_success, true); | 3820 int new_max = (max == kInfinity) ? max : max - min; |
3779 // Unroll the forced matches from 0 to min. This can cause chains of | 3821 // Recurse once to get the loop or optional matches after the fixed |
3780 // TextNodes (which the parser does not generate). These should be | 3822 // ones. |
3781 // combined if it turns out they hinder good code generation. | 3823 RegExpNode* answer = ToNode( |
3782 for (int i = 0; i < min; i++) { | 3824 0, new_max, is_greedy, body, compiler, on_success, true); |
3783 answer = body->ToNode(compiler, answer); | 3825 // Unroll the forced matches from 0 to min. This can cause chains of |
3826 // TextNodes (which the parser does not generate). These should be | |
3827 // combined if it turns out they hinder good code generation. | |
3828 for (int i = 0; i < min; i++) { | |
3829 answer = body->ToNode(compiler, answer); | |
3830 } | |
3831 return answer; | |
3784 } | 3832 } |
3785 if (min > 1) body->set_contains_expanded_quantifier(true); | |
3786 return answer; | |
3787 } | 3833 } |
3788 if (max <= kMaxUnrolledMaxMatches) { | 3834 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
3789 ASSERT(min == 0); | 3835 ASSERT(max > 0); // Due to the 'if' above. |
3790 // Unroll the optional matches up to max. | 3836 RegExpExpansionLimiter limiter(compiler, max); |
3791 RegExpNode* answer = on_success; | 3837 if (limiter.ok_to_expand()) { |
3792 for (int i = 0; i < max; i++) { | 3838 // Unroll the optional matches up to max. |
3793 ChoiceNode* alternation = new ChoiceNode(2); | 3839 RegExpNode* answer = on_success; |
3794 if (is_greedy) { | 3840 for (int i = 0; i < max; i++) { |
3795 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3841 ChoiceNode* alternation = new ChoiceNode(2); |
3796 answer))); | 3842 if (is_greedy) { |
3797 alternation->AddAlternative(GuardedAlternative(on_success)); | 3843 alternation->AddAlternative( |
3798 } else { | 3844 GuardedAlternative(body->ToNode(compiler, answer))); |
3799 alternation->AddAlternative(GuardedAlternative(on_success)); | 3845 alternation->AddAlternative(GuardedAlternative(on_success)); |
3800 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3846 } else { |
3801 answer))); | 3847 alternation->AddAlternative(GuardedAlternative(on_success)); |
3848 alternation->AddAlternative( | |
3849 GuardedAlternative(body->ToNode(compiler, answer))); | |
3850 } | |
3851 answer = alternation; | |
3852 if (not_at_start) alternation->set_not_at_start(); | |
3802 } | 3853 } |
3803 answer = alternation; | 3854 return answer; |
3804 if (not_at_start) alternation->set_not_at_start(); | |
3805 } | 3855 } |
3806 if (max > 1) body->set_contains_expanded_quantifier(true); | |
3807 return answer; | |
3808 } | 3856 } |
3809 } | 3857 } |
3810 bool has_min = min > 0; | 3858 bool has_min = min > 0; |
3811 bool has_max = max < RegExpTree::kInfinity; | 3859 bool has_max = max < RegExpTree::kInfinity; |
3812 bool needs_counter = has_min || has_max; | 3860 bool needs_counter = has_min || has_max; |
3813 int reg_ctr = needs_counter | 3861 int reg_ctr = needs_counter |
3814 ? compiler->AllocateRegister() | 3862 ? compiler->AllocateRegister() |
3815 : RegExpCompiler::kNoRegister; | 3863 : RegExpCompiler::kNoRegister; |
3816 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 3864 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); |
3817 if (not_at_start) center->set_not_at_start(); | 3865 if (not_at_start) center->set_not_at_start(); |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4123 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4171 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
4124 for (int i = 0; i < overlay.length(); i += 2) { | 4172 for (int i = 0; i < overlay.length(); i += 2) { |
4125 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4173 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
4126 CharacterRangeSplitter::kInOverlay); | 4174 CharacterRangeSplitter::kInOverlay); |
4127 } | 4175 } |
4128 CharacterRangeSplitter callback(included, excluded); | 4176 CharacterRangeSplitter callback(included, excluded); |
4129 table.ForEach(&callback); | 4177 table.ForEach(&callback); |
4130 } | 4178 } |
4131 | 4179 |
4132 | 4180 |
4133 static void AddUncanonicals(Isolate* isolate, | |
4134 ZoneList<CharacterRange>* ranges, | |
4135 int bottom, | |
4136 int top); | |
4137 | |
4138 | |
4139 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4181 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
4140 bool is_ascii) { | 4182 bool is_ascii) { |
4141 Isolate* isolate = Isolate::Current(); | 4183 Isolate* isolate = Isolate::Current(); |
4142 uc16 bottom = from(); | 4184 uc16 bottom = from(); |
4143 uc16 top = to(); | 4185 uc16 top = to(); |
4144 if (is_ascii) { | 4186 if (is_ascii) { |
4145 if (bottom > String::kMaxAsciiCharCode) return; | 4187 if (bottom > String::kMaxAsciiCharCode) return; |
4146 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4188 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
4147 } | 4189 } |
4148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4190 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4289 result.SetElementsInSecondSet(); | 4331 result.SetElementsInSecondSet(); |
4290 } else if (j < range->length()) { | 4332 } else if (j < range->length()) { |
4291 // Argument range contains something not in word range. | 4333 // Argument range contains something not in word range. |
4292 result.SetElementsInFirstSet(); | 4334 result.SetElementsInFirstSet(); |
4293 } | 4335 } |
4294 | 4336 |
4295 return result; | 4337 return result; |
4296 } | 4338 } |
4297 | 4339 |
4298 | 4340 |
4299 static void AddUncanonicals(Isolate* isolate, | |
4300 ZoneList<CharacterRange>* ranges, | |
4301 int bottom, | |
4302 int top) { | |
4303 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
4304 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | |
4305 // this table is correct. | |
4306 // 0x0600 - 0x0fff | |
4307 // 0x1100 - 0x1cff | |
4308 // 0x2000 - 0x20ff | |
4309 // 0x2200 - 0x23ff | |
4310 // 0x2500 - 0x2bff | |
4311 // 0x2e00 - 0xa5ff | |
4312 // 0xa800 - 0xfaff | |
4313 // 0xfc00 - 0xfeff | |
4314 const int boundary_count = 18; | |
4315 int boundaries[] = { | |
4316 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, | |
4317 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; | |
4318 | |
4319 // Special ASCII rule from spec can save us some work here. | |
4320 if (bottom == 0x80 && top == 0xffff) return; | |
4321 | |
4322 if (top <= boundaries[0]) { | |
4323 CharacterRange range(bottom, top); | |
4324 range.AddCaseEquivalents(ranges, false); | |
4325 return; | |
4326 } | |
4327 | |
4328 // Split up very large ranges. This helps remove ranges where there are no | |
4329 // case mappings. | |
4330 for (int i = 0; i < boundary_count; i++) { | |
4331 if (bottom < boundaries[i] && top >= boundaries[i]) { | |
4332 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); | |
4333 AddUncanonicals(isolate, ranges, boundaries[i], top); | |
4334 return; | |
4335 } | |
4336 } | |
4337 | |
4338 // If we are completely in a zone with no case mappings then we are done. | |
4339 for (int i = 0; i < boundary_count; i += 2) { | |
4340 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | |
4341 #ifdef DEBUG | |
4342 for (int j = bottom; j <= top; j++) { | |
4343 unsigned current_char = j; | |
4344 int length = isolate->jsregexp_uncanonicalize()->get(current_char, | |
4345 '\0', chars); | |
4346 for (int k = 0; k < length; k++) { | |
4347 ASSERT(chars[k] == current_char); | |
4348 } | |
4349 } | |
4350 #endif | |
4351 return; | |
4352 } | |
4353 } | |
4354 | |
4355 // Step through the range finding equivalent characters. | |
4356 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | |
4357 for (int i = bottom; i <= top; i++) { | |
4358 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); | |
4359 for (int j = 0; j < length; j++) { | |
4360 uc32 chr = chars[j]; | |
4361 if (chr != i && (chr < bottom || chr > top)) { | |
4362 characters->Add(chr); | |
4363 } | |
4364 } | |
4365 } | |
4366 | |
4367 // Step through the equivalent characters finding simple ranges and | |
4368 // adding ranges to the character class. | |
4369 if (characters->length() > 0) { | |
4370 int new_from = characters->at(0); | |
4371 int new_to = new_from; | |
4372 for (int i = 1; i < characters->length(); i++) { | |
4373 int chr = characters->at(i); | |
4374 if (chr == new_to + 1) { | |
4375 new_to++; | |
4376 } else { | |
4377 if (new_to == new_from) { | |
4378 ranges->Add(CharacterRange::Singleton(new_from)); | |
4379 } else { | |
4380 ranges->Add(CharacterRange(new_from, new_to)); | |
4381 } | |
4382 new_from = new_to = chr; | |
4383 } | |
4384 } | |
4385 if (new_to == new_from) { | |
4386 ranges->Add(CharacterRange::Singleton(new_from)); | |
4387 } else { | |
4388 ranges->Add(CharacterRange(new_from, new_to)); | |
4389 } | |
4390 } | |
4391 } | |
4392 | |
4393 | |
4394 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4341 ZoneList<CharacterRange>* CharacterSet::ranges() { |
4395 if (ranges_ == NULL) { | 4342 if (ranges_ == NULL) { |
4396 ranges_ = new ZoneList<CharacterRange>(2); | 4343 ranges_ = new ZoneList<CharacterRange>(2); |
4397 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4344 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4398 } | 4345 } |
4399 return ranges_; | 4346 return ranges_; |
4400 } | 4347 } |
4401 | 4348 |
4402 | 4349 |
4403 // Move a number of elements in a zonelist to another position | 4350 // Move a number of elements in a zonelist to another position |
(...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5383 } | 5330 } |
5384 | 5331 |
5385 return compiler.Assemble(¯o_assembler, | 5332 return compiler.Assemble(¯o_assembler, |
5386 node, | 5333 node, |
5387 data->capture_count, | 5334 data->capture_count, |
5388 pattern); | 5335 pattern); |
5389 } | 5336 } |
5390 | 5337 |
5391 | 5338 |
5392 }} // namespace v8::internal | 5339 }} // namespace v8::internal |
OLD | NEW |