Chromium Code Reviews| OLD | NEW | 
|---|---|
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright | 
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. | 
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above | 
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following | 
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided | 
| (...skipping 3712 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3723 RegExpNode* on_success) { | 3723 RegExpNode* on_success) { | 
| 3724 return ToNode(min(), | 3724 return ToNode(min(), | 
| 3725 max(), | 3725 max(), | 
| 3726 is_greedy(), | 3726 is_greedy(), | 
| 3727 body(), | 3727 body(), | 
| 3728 compiler, | 3728 compiler, | 
| 3729 on_success); | 3729 on_success); | 
| 3730 } | 3730 } | 
| 3731 | 3731 | 
| 3732 | 3732 | 
| 3733 class RegExpExpansionLimiter { | |
| 
 
Lasse Reichstein
2011/06/16 12:12:47
Some documentation would be nice.
What problem is
 
 | |
| 3734 public: | |
| 3735 static const int kMaxExpansionFactor = 6; | |
| 3736 RegExpExpansionLimiter(Isolate* isolate, int factor) | |
| 3737 : isolate_(isolate), | |
| 3738 saved_regexp_expansion_factor_(isolate->regexp_expansion_factor()), | |
| 3739 ok_to_expand_(true) { | |
| 3740 ASSERT(factor > 0); | |
| 3741 int new_factor = saved_regexp_expansion_factor_ * factor; | |
| 3742 if (new_factor < saved_regexp_expansion_factor_) { | |
| 
 
Lasse Reichstein
2011/06/16 12:12:47
Dangerous assumption. Multiplication, if it's supp
 
 | |
| 3743 // Integer overflow. | |
| 3744 new_factor = kMaxInt; | |
| 3745 ok_to_expand_ = false; | |
| 3746 } else if (new_factor > kMaxExpansionFactor) { | |
| 3747 ok_to_expand_ = false; | |
| 3748 } | |
| 3749 isolate->set_regexp_expansion_factor(new_factor); | |
| 3750 } | |
| 3751 | |
| 3752 ~RegExpExpansionLimiter() { | |
| 3753 isolate_->set_regexp_expansion_factor(saved_regexp_expansion_factor_); | |
| 3754 } | |
| 3755 | |
| 3756 bool ok_to_expand() { return ok_to_expand_; } | |
| 3757 | |
| 3758 private: | |
| 3759 Isolate* isolate_; | |
| 3760 int saved_regexp_expansion_factor_; | |
| 3761 bool ok_to_expand_; | |
| 3762 | |
| 3763 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | |
| 3764 }; | |
| 3765 | |
| 3766 | |
| 3733 RegExpNode* RegExpQuantifier::ToNode(int min, | 3767 RegExpNode* RegExpQuantifier::ToNode(int min, | 
| 3734 int max, | 3768 int max, | 
| 3735 bool is_greedy, | 3769 bool is_greedy, | 
| 3736 RegExpTree* body, | 3770 RegExpTree* body, | 
| 3737 RegExpCompiler* compiler, | 3771 RegExpCompiler* compiler, | 
| 3738 RegExpNode* on_success, | 3772 RegExpNode* on_success, | 
| 3739 bool not_at_start) { | 3773 bool not_at_start) { | 
| 3740 // x{f, t} becomes this: | 3774 // x{f, t} becomes this: | 
| 3741 // | 3775 // | 
| 3742 // (r++)<-. | 3776 // (r++)<-. | 
| (...skipping 16 matching lines...) Expand all Loading... | |
| 3759 // this case. | 3793 // this case. | 
| 3760 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} | 3794 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} | 
| 3761 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} | 3795 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} | 
| 3762 if (max == 0) return on_success; // This can happen due to recursion. | 3796 if (max == 0) return on_success; // This can happen due to recursion. | 
| 3763 bool body_can_be_empty = (body->min_match() == 0); | 3797 bool body_can_be_empty = (body->min_match() == 0); | 
| 3764 int body_start_reg = RegExpCompiler::kNoRegister; | 3798 int body_start_reg = RegExpCompiler::kNoRegister; | 
| 3765 Interval capture_registers = body->CaptureRegisters(); | 3799 Interval capture_registers = body->CaptureRegisters(); | 
| 3766 bool needs_capture_clearing = !capture_registers.is_empty(); | 3800 bool needs_capture_clearing = !capture_registers.is_empty(); | 
| 3767 if (body_can_be_empty) { | 3801 if (body_can_be_empty) { | 
| 3768 body_start_reg = compiler->AllocateRegister(); | 3802 body_start_reg = compiler->AllocateRegister(); | 
| 3769 } else if (FLAG_regexp_optimization && | 3803 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { | 
| 3770 !body->ContainsExpandedQuantifier() && | |
| 3771 !needs_capture_clearing) { | |
| 3772 // Only unroll if there are no captures and the body can't be | 3804 // Only unroll if there are no captures and the body can't be | 
| 3773 // empty. | 3805 // empty. | 
| 3774 if (min > 0 && min <= kMaxUnrolledMinMatches) { | 3806 { | 
| 3775 int new_max = (max == kInfinity) ? max : max - min; | 3807 RegExpExpansionLimiter limiter( | 
| 3776 // Recurse once to get the loop or optional matches after the fixed ones. | 3808 Isolate::Current(), min + ((max - min) ? 1 : 0)); | 
| 
 
Lasse Reichstein
2011/06/16 12:12:47
Do we really not have a faster way to get the isol
 
 | |
| 3777 RegExpNode* answer = ToNode( | 3809 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { | 
| 3778 0, new_max, is_greedy, body, compiler, on_success, true); | 3810 int new_max = (max == kInfinity) ? max : max - min; | 
| 3779 // Unroll the forced matches from 0 to min. This can cause chains of | 3811 // Recurse once to get the loop or optional matches after the fixed | 
| 3780 // TextNodes (which the parser does not generate). These should be | 3812 // ones. | 
| 3781 // combined if it turns out they hinder good code generation. | 3813 RegExpNode* answer = ToNode( | 
| 3782 for (int i = 0; i < min; i++) { | 3814 0, new_max, is_greedy, body, compiler, on_success, true); | 
| 3783 answer = body->ToNode(compiler, answer); | 3815 // Unroll the forced matches from 0 to min. This can cause chains of | 
| 3816 // TextNodes (which the parser does not generate). These should be | |
| 3817 // combined if it turns out they hinder good code generation. | |
| 3818 for (int i = 0; i < min; i++) { | |
| 3819 answer = body->ToNode(compiler, answer); | |
| 3820 } | |
| 3821 return answer; | |
| 3784 } | 3822 } | 
| 3785 if (min > 1) body->set_contains_expanded_quantifier(true); | |
| 3786 return answer; | |
| 3787 } | 3823 } | 
| 3788 if (max <= kMaxUnrolledMaxMatches) { | 3824 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 
| 3789 ASSERT(min == 0); | 3825 RegExpExpansionLimiter limiter(Isolate::Current(), max); | 
| 3790 // Unroll the optional matches up to max. | 3826 if (limiter.ok_to_expand()) { | 
| 3791 RegExpNode* answer = on_success; | 3827 // Unroll the optional matches up to max. | 
| 3792 for (int i = 0; i < max; i++) { | 3828 RegExpNode* answer = on_success; | 
| 3793 ChoiceNode* alternation = new ChoiceNode(2); | 3829 for (int i = 0; i < max; i++) { | 
| 3794 if (is_greedy) { | 3830 ChoiceNode* alternation = new ChoiceNode(2); | 
| 3795 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3831 if (is_greedy) { | 
| 3796 answer))); | 3832 alternation->AddAlternative( | 
| 3797 alternation->AddAlternative(GuardedAlternative(on_success)); | 3833 GuardedAlternative(body->ToNode(compiler, answer))); | 
| 3798 } else { | 3834 alternation->AddAlternative(GuardedAlternative(on_success)); | 
| 3799 alternation->AddAlternative(GuardedAlternative(on_success)); | 3835 } else { | 
| 3800 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, | 3836 alternation->AddAlternative(GuardedAlternative(on_success)); | 
| 3801 answer))); | 3837 alternation->AddAlternative( | 
| 3838 GuardedAlternative(body->ToNode(compiler, answer))); | |
| 3839 } | |
| 3840 answer = alternation; | |
| 3841 if (not_at_start) alternation->set_not_at_start(); | |
| 3802 } | 3842 } | 
| 3803 answer = alternation; | 3843 return answer; | 
| 3804 if (not_at_start) alternation->set_not_at_start(); | |
| 3805 } | 3844 } | 
| 3806 if (max > 1) body->set_contains_expanded_quantifier(true); | |
| 3807 return answer; | |
| 3808 } | 3845 } | 
| 3809 } | 3846 } | 
| 3810 bool has_min = min > 0; | 3847 bool has_min = min > 0; | 
| 3811 bool has_max = max < RegExpTree::kInfinity; | 3848 bool has_max = max < RegExpTree::kInfinity; | 
| 3812 bool needs_counter = has_min || has_max; | 3849 bool needs_counter = has_min || has_max; | 
| 3813 int reg_ctr = needs_counter | 3850 int reg_ctr = needs_counter | 
| 3814 ? compiler->AllocateRegister() | 3851 ? compiler->AllocateRegister() | 
| 3815 : RegExpCompiler::kNoRegister; | 3852 : RegExpCompiler::kNoRegister; | 
| 3816 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 3853 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); | 
| 3817 if (not_at_start) center->set_not_at_start(); | 3854 if (not_at_start) center->set_not_at_start(); | 
| (...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4123 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4160 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 
| 4124 for (int i = 0; i < overlay.length(); i += 2) { | 4161 for (int i = 0; i < overlay.length(); i += 2) { | 
| 4125 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4162 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 
| 4126 CharacterRangeSplitter::kInOverlay); | 4163 CharacterRangeSplitter::kInOverlay); | 
| 4127 } | 4164 } | 
| 4128 CharacterRangeSplitter callback(included, excluded); | 4165 CharacterRangeSplitter callback(included, excluded); | 
| 4129 table.ForEach(&callback); | 4166 table.ForEach(&callback); | 
| 4130 } | 4167 } | 
| 4131 | 4168 | 
| 4132 | 4169 | 
| 4133 static void AddUncanonicals(Isolate* isolate, | |
| 4134 ZoneList<CharacterRange>* ranges, | |
| 4135 int bottom, | |
| 4136 int top); | |
| 4137 | |
| 4138 | |
| 4139 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4170 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 
| 4140 bool is_ascii) { | 4171 bool is_ascii) { | 
| 4141 Isolate* isolate = Isolate::Current(); | 4172 Isolate* isolate = Isolate::Current(); | 
| 4142 uc16 bottom = from(); | 4173 uc16 bottom = from(); | 
| 4143 uc16 top = to(); | 4174 uc16 top = to(); | 
| 4144 if (is_ascii) { | 4175 if (is_ascii) { | 
| 4145 if (bottom > String::kMaxAsciiCharCode) return; | 4176 if (bottom > String::kMaxAsciiCharCode) return; | 
| 4146 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4177 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 
| 4147 } | 4178 } | 
| 4148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4179 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4289 result.SetElementsInSecondSet(); | 4320 result.SetElementsInSecondSet(); | 
| 4290 } else if (j < range->length()) { | 4321 } else if (j < range->length()) { | 
| 4291 // Argument range contains something not in word range. | 4322 // Argument range contains something not in word range. | 
| 4292 result.SetElementsInFirstSet(); | 4323 result.SetElementsInFirstSet(); | 
| 4293 } | 4324 } | 
| 4294 | 4325 | 
| 4295 return result; | 4326 return result; | 
| 4296 } | 4327 } | 
| 4297 | 4328 | 
| 4298 | 4329 | 
| 4299 static void AddUncanonicals(Isolate* isolate, | |
| 4300 ZoneList<CharacterRange>* ranges, | |
| 4301 int bottom, | |
| 4302 int top) { | |
| 4303 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | |
| 4304 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | |
| 4305 // this table is correct. | |
| 4306 // 0x0600 - 0x0fff | |
| 4307 // 0x1100 - 0x1cff | |
| 4308 // 0x2000 - 0x20ff | |
| 4309 // 0x2200 - 0x23ff | |
| 4310 // 0x2500 - 0x2bff | |
| 4311 // 0x2e00 - 0xa5ff | |
| 4312 // 0xa800 - 0xfaff | |
| 4313 // 0xfc00 - 0xfeff | |
| 4314 const int boundary_count = 18; | |
| 4315 int boundaries[] = { | |
| 4316 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, | |
| 4317 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; | |
| 4318 | |
| 4319 // Special ASCII rule from spec can save us some work here. | |
| 4320 if (bottom == 0x80 && top == 0xffff) return; | |
| 4321 | |
| 4322 if (top <= boundaries[0]) { | |
| 4323 CharacterRange range(bottom, top); | |
| 4324 range.AddCaseEquivalents(ranges, false); | |
| 4325 return; | |
| 4326 } | |
| 4327 | |
| 4328 // Split up very large ranges. This helps remove ranges where there are no | |
| 4329 // case mappings. | |
| 4330 for (int i = 0; i < boundary_count; i++) { | |
| 4331 if (bottom < boundaries[i] && top >= boundaries[i]) { | |
| 4332 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); | |
| 4333 AddUncanonicals(isolate, ranges, boundaries[i], top); | |
| 4334 return; | |
| 4335 } | |
| 4336 } | |
| 4337 | |
| 4338 // If we are completely in a zone with no case mappings then we are done. | |
| 4339 for (int i = 0; i < boundary_count; i += 2) { | |
| 4340 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | |
| 4341 #ifdef DEBUG | |
| 4342 for (int j = bottom; j <= top; j++) { | |
| 4343 unsigned current_char = j; | |
| 4344 int length = isolate->jsregexp_uncanonicalize()->get(current_char, | |
| 4345 '\0', chars); | |
| 4346 for (int k = 0; k < length; k++) { | |
| 4347 ASSERT(chars[k] == current_char); | |
| 4348 } | |
| 4349 } | |
| 4350 #endif | |
| 4351 return; | |
| 4352 } | |
| 4353 } | |
| 4354 | |
| 4355 // Step through the range finding equivalent characters. | |
| 4356 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | |
| 4357 for (int i = bottom; i <= top; i++) { | |
| 4358 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); | |
| 4359 for (int j = 0; j < length; j++) { | |
| 4360 uc32 chr = chars[j]; | |
| 4361 if (chr != i && (chr < bottom || chr > top)) { | |
| 4362 characters->Add(chr); | |
| 4363 } | |
| 4364 } | |
| 4365 } | |
| 4366 | |
| 4367 // Step through the equivalent characters finding simple ranges and | |
| 4368 // adding ranges to the character class. | |
| 4369 if (characters->length() > 0) { | |
| 4370 int new_from = characters->at(0); | |
| 4371 int new_to = new_from; | |
| 4372 for (int i = 1; i < characters->length(); i++) { | |
| 4373 int chr = characters->at(i); | |
| 4374 if (chr == new_to + 1) { | |
| 4375 new_to++; | |
| 4376 } else { | |
| 4377 if (new_to == new_from) { | |
| 4378 ranges->Add(CharacterRange::Singleton(new_from)); | |
| 4379 } else { | |
| 4380 ranges->Add(CharacterRange(new_from, new_to)); | |
| 4381 } | |
| 4382 new_from = new_to = chr; | |
| 4383 } | |
| 4384 } | |
| 4385 if (new_to == new_from) { | |
| 4386 ranges->Add(CharacterRange::Singleton(new_from)); | |
| 4387 } else { | |
| 4388 ranges->Add(CharacterRange(new_from, new_to)); | |
| 4389 } | |
| 4390 } | |
| 4391 } | |
| 4392 | |
| 4393 | |
| 4394 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4330 ZoneList<CharacterRange>* CharacterSet::ranges() { | 
| 4395 if (ranges_ == NULL) { | 4331 if (ranges_ == NULL) { | 
| 4396 ranges_ = new ZoneList<CharacterRange>(2); | 4332 ranges_ = new ZoneList<CharacterRange>(2); | 
| 4397 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4333 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 
| 4398 } | 4334 } | 
| 4399 return ranges_; | 4335 return ranges_; | 
| 4400 } | 4336 } | 
| 4401 | 4337 | 
| 4402 | 4338 | 
| 4403 // Move a number of elements in a zonelist to another position | 4339 // Move a number of elements in a zonelist to another position | 
| (...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5383 } | 5319 } | 
| 5384 | 5320 | 
| 5385 return compiler.Assemble(¯o_assembler, | 5321 return compiler.Assemble(¯o_assembler, | 
| 5386 node, | 5322 node, | 
| 5387 data->capture_count, | 5323 data->capture_count, | 
| 5388 pattern); | 5324 pattern); | 
| 5389 } | 5325 } | 
| 5390 | 5326 | 
| 5391 | 5327 | 
| 5392 }} // namespace v8::internal | 5328 }} // namespace v8::internal | 
| OLD | NEW |