OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 3712 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3723 RegExpNode* on_success) { | 3723 RegExpNode* on_success) { |
3724 return ToNode(min(), | 3724 return ToNode(min(), |
3725 max(), | 3725 max(), |
3726 is_greedy(), | 3726 is_greedy(), |
3727 body(), | 3727 body(), |
3728 compiler, | 3728 compiler, |
3729 on_success); | 3729 on_success); |
3730 } | 3730 } |
3731 | 3731 |
3732 | 3732 |
3733 class RegExpExpansionLimiter { | |
Lasse Reichstein
2011/06/16 12:12:47
Some documentation would be nice.
What problem is
| |
3734 public: | |
3735 static const int kMaxExpansionFactor = 6; | |
3736 RegExpExpansionLimiter(Isolate* isolate, int factor) | |
3737 : isolate_(isolate), | |
3738 saved_regexp_expansion_factor_(isolate->regexp_expansion_factor()), | |
3739 ok_to_expand_(true) { | |
3740 ASSERT(factor > 0); | |
3741 int new_factor = saved_regexp_expansion_factor_ * factor; | |
3742 if (new_factor < saved_regexp_expansion_factor_) { | |
Lasse Reichstein
2011/06/16 12:12:47
Dangerous assumption. Multiplication, if it's supp
| |
3743 // Integer overflow. | |
3744 new_factor = kMaxInt; | |
3745 ok_to_expand_ = false; | |
3746 } else if (new_factor > kMaxExpansionFactor) { | |
3747 ok_to_expand_ = false; | |
3748 } | |
3749 isolate->set_regexp_expansion_factor(new_factor); | |
3750 } | |
3751 | |
3752 ~RegExpExpansionLimiter() { | |
3753 isolate_->set_regexp_expansion_factor(saved_regexp_expansion_factor_); | |
3754 } | |
3755 | |
3756 bool ok_to_expand() { return ok_to_expand_; } | |
3757 | |
3758 private: | |
3759 Isolate* isolate_; | |
3760 int saved_regexp_expansion_factor_; | |
3761 bool ok_to_expand_; | |
3762 | |
3763 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | |
3764 }; | |
3765 | |
3766 | |
3733 RegExpNode* RegExpQuantifier::ToNode(int min, | 3767 RegExpNode* RegExpQuantifier::ToNode(int min, |
3734 int max, | 3768 int max, |
3735 bool is_greedy, | 3769 bool is_greedy, |
3736 RegExpTree* body, | 3770 RegExpTree* body, |
3737 RegExpCompiler* compiler, | 3771 RegExpCompiler* compiler, |
3738 RegExpNode* on_success, | 3772 RegExpNode* on_success, |
3739 bool not_at_start) { | 3773 bool not_at_start) { |
3740 // x{f, t} becomes this: | 3774 // x{f, t} becomes this: |
3741 // | 3775 // |
3742 // (r++)<-. | 3776 // (r++)<-. |
(...skipping 16 matching lines...) Expand all Loading... | |
3759 // this case. | 3793 // this case. |
3760 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} | 3794 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} |
3761 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} | 3795 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} |
3762 if (max == 0) return on_success; // This can happen due to recursion. | 3796 if (max == 0) return on_success; // This can happen due to recursion. |
3763 bool body_can_be_empty = (body->min_match() == 0); | 3797 bool body_can_be_empty = (body->min_match() == 0); |
3764 int body_start_reg = RegExpCompiler::kNoRegister; | 3798 int body_start_reg = RegExpCompiler::kNoRegister; |
3765 Interval capture_registers = body->CaptureRegisters(); | 3799 Interval capture_registers = body->CaptureRegisters(); |
3766 bool needs_capture_clearing = !capture_registers.is_empty(); | 3800 bool needs_capture_clearing = !capture_registers.is_empty(); |
3767 if (body_can_be_empty) { | 3801 if (body_can_be_empty) { |
3768 body_start_reg = compiler->AllocateRegister(); | 3802 body_start_reg = compiler->AllocateRegister(); |
3769 } else if (FLAG_regexp_optimization && | 3803 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
3770 !body->ContainsExpandedQuantifier() && | |
3771 !needs_capture_clearing) { | |
3772 // Only unroll if there are no captures and the body can't be | 3804 // Only unroll if there are no captures and the body can't be |
3773 // empty. | 3805 // empty. |
3774 if (min > 0 && min <= kMaxUnrolledMinMatches) { | 3806 { |
3775 int new_max = (max == kInfinity) ? max : max - min; | 3807 RegExpExpansionLimiter limiter( |
3776 // Recurse once to get the loop or optional matches after the fixed ones. | 3808 Isolate::Current(), min + ((max - min) ? 1 : 0)); |
Lasse Reichstein
2011/06/16 12:12:47
Do we really not have a faster way to get the isol
| |
3777 RegExpNode* answer = ToNode( | 3809 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
3778 0, new_max, is_greedy, body, compiler, on_success, true); | 3810 int new_max = (max == kInfinity) ? max : max - min; |
3779 // Unroll the forced matches from 0 to min. This can cause chains of | 3811 // Recurse once to get the loop or optional matches after the fixed |
3780 // TextNodes (which the parser does not generate). These should be | 3812 // ones. |
3781 // combined if it turns out they hinder good code generation. | 3813 RegExpNode* answer = ToNode( |
3782 for (int i = 0; i < min; i++) { | 3814 0, new_max, is_greedy, body, compiler, on_success, true); |
3783 answer = body->ToNode(compiler, answer); | 3815 // Unroll the forced matches from 0 to min. This can cause chains of |
3816 // TextNodes (which the parser does not generate). These should be | |
3817 // combined if it turns out they hinder good code generation. | |
3818 for (int i = 0; i < min; i++) { | |
3819 answer = body->ToNode(compiler, answer); | |
3820 } | |
3821 return answer; | |
3784 } | 3822 } |
3785 if (min > 1) body->set_contains_expanded_quantifier(true); | |
3786 return answer; | |
3787 } | 3823 } |
3788 if (max <= kMaxUnrolledMaxMatches) { | 3824 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
3789 ASSERT(min == 0); | 3825 RegExpExpansionLimiter limiter(Isolate::Current(), max); |
3790 // Unroll the optional matches up to max. | 3826 if (limiter.ok_to_expand()) { |
3791 RegExpNode* answer = on_success; | 3827 // Unroll the optional matches up to max. |
3792 for (int i = 0; i < max; i++) { | 3828 RegExpNode* answer = on_success; |
3793 ChoiceNode* alternation = new ChoiceNode(2); | 3829 for (int i = 0; i < max; i++) { |
3794 if (is_greedy) { | 3830 ChoiceNode* alternation = new ChoiceNode(2); |
3795 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3831 if (is_greedy) { |
3796 answer))); | 3832 alternation->AddAlternative( |
3797 alternation->AddAlternative(GuardedAlternative(on_success)); | 3833 GuardedAlternative(body->ToNode(compiler, answer))); |
3798 } else { | 3834 alternation->AddAlternative(GuardedAlternative(on_success)); |
3799 alternation->AddAlternative(GuardedAlternative(on_success)); | 3835 } else { |
3800 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3836 alternation->AddAlternative(GuardedAlternative(on_success)); |
3801 answer))); | 3837 alternation->AddAlternative( |
3838 GuardedAlternative(body->ToNode(compiler, answer))); | |
3839 } | |
3840 answer = alternation; | |
3841 if (not_at_start) alternation->set_not_at_start(); | |
3802 } | 3842 } |
3803 answer = alternation; | 3843 return answer; |
3804 if (not_at_start) alternation->set_not_at_start(); | |
3805 } | 3844 } |
3806 if (max > 1) body->set_contains_expanded_quantifier(true); | |
3807 return answer; | |
3808 } | 3845 } |
3809 } | 3846 } |
3810 bool has_min = min > 0; | 3847 bool has_min = min > 0; |
3811 bool has_max = max < RegExpTree::kInfinity; | 3848 bool has_max = max < RegExpTree::kInfinity; |
3812 bool needs_counter = has_min || has_max; | 3849 bool needs_counter = has_min || has_max; |
3813 int reg_ctr = needs_counter | 3850 int reg_ctr = needs_counter |
3814 ? compiler->AllocateRegister() | 3851 ? compiler->AllocateRegister() |
3815 : RegExpCompiler::kNoRegister; | 3852 : RegExpCompiler::kNoRegister; |
3816 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 3853 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); |
3817 if (not_at_start) center->set_not_at_start(); | 3854 if (not_at_start) center->set_not_at_start(); |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4123 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4160 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
4124 for (int i = 0; i < overlay.length(); i += 2) { | 4161 for (int i = 0; i < overlay.length(); i += 2) { |
4125 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4162 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
4126 CharacterRangeSplitter::kInOverlay); | 4163 CharacterRangeSplitter::kInOverlay); |
4127 } | 4164 } |
4128 CharacterRangeSplitter callback(included, excluded); | 4165 CharacterRangeSplitter callback(included, excluded); |
4129 table.ForEach(&callback); | 4166 table.ForEach(&callback); |
4130 } | 4167 } |
4131 | 4168 |
4132 | 4169 |
4133 static void AddUncanonicals(Isolate* isolate, | |
4134 ZoneList<CharacterRange>* ranges, | |
4135 int bottom, | |
4136 int top); | |
4137 | |
4138 | |
4139 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4170 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
4140 bool is_ascii) { | 4171 bool is_ascii) { |
4141 Isolate* isolate = Isolate::Current(); | 4172 Isolate* isolate = Isolate::Current(); |
4142 uc16 bottom = from(); | 4173 uc16 bottom = from(); |
4143 uc16 top = to(); | 4174 uc16 top = to(); |
4144 if (is_ascii) { | 4175 if (is_ascii) { |
4145 if (bottom > String::kMaxAsciiCharCode) return; | 4176 if (bottom > String::kMaxAsciiCharCode) return; |
4146 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4177 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
4147 } | 4178 } |
4148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4179 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4289 result.SetElementsInSecondSet(); | 4320 result.SetElementsInSecondSet(); |
4290 } else if (j < range->length()) { | 4321 } else if (j < range->length()) { |
4291 // Argument range contains something not in word range. | 4322 // Argument range contains something not in word range. |
4292 result.SetElementsInFirstSet(); | 4323 result.SetElementsInFirstSet(); |
4293 } | 4324 } |
4294 | 4325 |
4295 return result; | 4326 return result; |
4296 } | 4327 } |
4297 | 4328 |
4298 | 4329 |
4299 static void AddUncanonicals(Isolate* isolate, | |
4300 ZoneList<CharacterRange>* ranges, | |
4301 int bottom, | |
4302 int top) { | |
4303 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
4304 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | |
4305 // this table is correct. | |
4306 // 0x0600 - 0x0fff | |
4307 // 0x1100 - 0x1cff | |
4308 // 0x2000 - 0x20ff | |
4309 // 0x2200 - 0x23ff | |
4310 // 0x2500 - 0x2bff | |
4311 // 0x2e00 - 0xa5ff | |
4312 // 0xa800 - 0xfaff | |
4313 // 0xfc00 - 0xfeff | |
4314 const int boundary_count = 18; | |
4315 int boundaries[] = { | |
4316 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, | |
4317 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; | |
4318 | |
4319 // Special ASCII rule from spec can save us some work here. | |
4320 if (bottom == 0x80 && top == 0xffff) return; | |
4321 | |
4322 if (top <= boundaries[0]) { | |
4323 CharacterRange range(bottom, top); | |
4324 range.AddCaseEquivalents(ranges, false); | |
4325 return; | |
4326 } | |
4327 | |
4328 // Split up very large ranges. This helps remove ranges where there are no | |
4329 // case mappings. | |
4330 for (int i = 0; i < boundary_count; i++) { | |
4331 if (bottom < boundaries[i] && top >= boundaries[i]) { | |
4332 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); | |
4333 AddUncanonicals(isolate, ranges, boundaries[i], top); | |
4334 return; | |
4335 } | |
4336 } | |
4337 | |
4338 // If we are completely in a zone with no case mappings then we are done. | |
4339 for (int i = 0; i < boundary_count; i += 2) { | |
4340 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | |
4341 #ifdef DEBUG | |
4342 for (int j = bottom; j <= top; j++) { | |
4343 unsigned current_char = j; | |
4344 int length = isolate->jsregexp_uncanonicalize()->get(current_char, | |
4345 '\0', chars); | |
4346 for (int k = 0; k < length; k++) { | |
4347 ASSERT(chars[k] == current_char); | |
4348 } | |
4349 } | |
4350 #endif | |
4351 return; | |
4352 } | |
4353 } | |
4354 | |
4355 // Step through the range finding equivalent characters. | |
4356 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | |
4357 for (int i = bottom; i <= top; i++) { | |
4358 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); | |
4359 for (int j = 0; j < length; j++) { | |
4360 uc32 chr = chars[j]; | |
4361 if (chr != i && (chr < bottom || chr > top)) { | |
4362 characters->Add(chr); | |
4363 } | |
4364 } | |
4365 } | |
4366 | |
4367 // Step through the equivalent characters finding simple ranges and | |
4368 // adding ranges to the character class. | |
4369 if (characters->length() > 0) { | |
4370 int new_from = characters->at(0); | |
4371 int new_to = new_from; | |
4372 for (int i = 1; i < characters->length(); i++) { | |
4373 int chr = characters->at(i); | |
4374 if (chr == new_to + 1) { | |
4375 new_to++; | |
4376 } else { | |
4377 if (new_to == new_from) { | |
4378 ranges->Add(CharacterRange::Singleton(new_from)); | |
4379 } else { | |
4380 ranges->Add(CharacterRange(new_from, new_to)); | |
4381 } | |
4382 new_from = new_to = chr; | |
4383 } | |
4384 } | |
4385 if (new_to == new_from) { | |
4386 ranges->Add(CharacterRange::Singleton(new_from)); | |
4387 } else { | |
4388 ranges->Add(CharacterRange(new_from, new_to)); | |
4389 } | |
4390 } | |
4391 } | |
4392 | |
4393 | |
4394 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4330 ZoneList<CharacterRange>* CharacterSet::ranges() { |
4395 if (ranges_ == NULL) { | 4331 if (ranges_ == NULL) { |
4396 ranges_ = new ZoneList<CharacterRange>(2); | 4332 ranges_ = new ZoneList<CharacterRange>(2); |
4397 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4333 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4398 } | 4334 } |
4399 return ranges_; | 4335 return ranges_; |
4400 } | 4336 } |
4401 | 4337 |
4402 | 4338 |
4403 // Move a number of elements in a zonelist to another position | 4339 // Move a number of elements in a zonelist to another position |
(...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5383 } | 5319 } |
5384 | 5320 |
5385 return compiler.Assemble(¯o_assembler, | 5321 return compiler.Assemble(¯o_assembler, |
5386 node, | 5322 node, |
5387 data->capture_count, | 5323 data->capture_count, |
5388 pattern); | 5324 pattern); |
5389 } | 5325 } |
5390 | 5326 |
5391 | 5327 |
5392 }} // namespace v8::internal | 5328 }} // namespace v8::internal |
OLD | NEW |