Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: src/jsregexp.cc

Issue 7193007: Refix issue 1472. The previous fix worked for the example in the bug (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 792 matching lines...) Expand 10 before | Expand all | Expand 10 after
803 static const int kMaxRecursion = 100; 803 static const int kMaxRecursion = 100;
804 inline int recursion_depth() { return recursion_depth_; } 804 inline int recursion_depth() { return recursion_depth_; }
805 inline void IncrementRecursionDepth() { recursion_depth_++; } 805 inline void IncrementRecursionDepth() { recursion_depth_++; }
806 inline void DecrementRecursionDepth() { recursion_depth_--; } 806 inline void DecrementRecursionDepth() { recursion_depth_--; }
807 807
808 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 808 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
809 809
810 inline bool ignore_case() { return ignore_case_; } 810 inline bool ignore_case() { return ignore_case_; }
811 inline bool ascii() { return ascii_; } 811 inline bool ascii() { return ascii_; }
812 812
813 int current_expansion_factor() { return current_expansion_factor_; }
814 void set_current_expansion_factor(int value) {
815 current_expansion_factor_ = value;
816 }
817
813 static const int kNoRegister = -1; 818 static const int kNoRegister = -1;
814 819
815 private: 820 private:
816 EndNode* accept_; 821 EndNode* accept_;
817 int next_register_; 822 int next_register_;
818 List<RegExpNode*>* work_list_; 823 List<RegExpNode*>* work_list_;
819 int recursion_depth_; 824 int recursion_depth_;
820 RegExpMacroAssembler* macro_assembler_; 825 RegExpMacroAssembler* macro_assembler_;
821 bool ignore_case_; 826 bool ignore_case_;
822 bool ascii_; 827 bool ascii_;
823 bool reg_exp_too_big_; 828 bool reg_exp_too_big_;
829 int current_expansion_factor_;
824 }; 830 };
825 831
826 832
827 class RecursionCheck { 833 class RecursionCheck {
828 public: 834 public:
829 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 835 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
830 compiler->IncrementRecursionDepth(); 836 compiler->IncrementRecursionDepth();
831 } 837 }
832 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 838 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
833 private: 839 private:
834 RegExpCompiler* compiler_; 840 RegExpCompiler* compiler_;
835 }; 841 };
836 842
837 843
838 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 844 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
839 return RegExpEngine::CompilationResult("RegExp too big"); 845 return RegExpEngine::CompilationResult("RegExp too big");
840 } 846 }
841 847
842 848
843 // Attempts to compile the regexp using an Irregexp code generator. Returns 849 // Attempts to compile the regexp using an Irregexp code generator. Returns
844 // a fixed array or a null handle depending on whether it succeeded. 850 // a fixed array or a null handle depending on whether it succeeded.
845 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 851 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
846 : next_register_(2 * (capture_count + 1)), 852 : next_register_(2 * (capture_count + 1)),
847 work_list_(NULL), 853 work_list_(NULL),
848 recursion_depth_(0), 854 recursion_depth_(0),
849 ignore_case_(ignore_case), 855 ignore_case_(ignore_case),
850 ascii_(ascii), 856 ascii_(ascii),
851 reg_exp_too_big_(false) { 857 reg_exp_too_big_(false),
858 current_expansion_factor_(1) {
852 accept_ = new EndNode(EndNode::ACCEPT); 859 accept_ = new EndNode(EndNode::ACCEPT);
853 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 860 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
854 } 861 }
855 862
856 863
857 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 864 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
858 RegExpMacroAssembler* macro_assembler, 865 RegExpMacroAssembler* macro_assembler,
859 RegExpNode* start, 866 RegExpNode* start,
860 int capture_count, 867 int capture_count,
861 Handle<String> pattern) { 868 Handle<String> pattern) {
(...skipping 2861 matching lines...) Expand 10 before | Expand all | Expand 10 after
3723 RegExpNode* on_success) { 3730 RegExpNode* on_success) {
3724 return ToNode(min(), 3731 return ToNode(min(),
3725 max(), 3732 max(),
3726 is_greedy(), 3733 is_greedy(),
3727 body(), 3734 body(),
3728 compiler, 3735 compiler,
3729 on_success); 3736 on_success);
3730 } 3737 }
3731 3738
3732 3739
3740 // Scoped object to keep track of how much we unroll quantifier loops in the
3741 // regexp graph generator.
3742 class RegExpExpansionLimiter {
3743 public:
3744 static const int kMaxExpansionFactor = 6;
3745 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
3746 : compiler_(compiler),
3747 saved_expansion_factor_(compiler->current_expansion_factor()),
3748 ok_to_expand_(true) {
Lasse Reichstein 2011/06/17 07:18:49 How about initializing this as: ok_to_expand_(sa
Erik Corry 2011/06/17 07:31:14 Done.
3749 ASSERT(factor > 0);
3750 if (factor > kMaxExpansionFactor) {
Lasse Reichstein 2011/06/17 07:18:49 Add comment saying that this prevents overflow in
Erik Corry 2011/06/17 07:31:14 Done.
3751 ok_to_expand_ = false;
3752 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
3753 } else {
3754 int new_factor = saved_expansion_factor_ * factor;
3755 if (new_factor > kMaxExpansionFactor) {
3756 ok_to_expand_ = false;
3757 }
Lasse Reichstein 2011/06/17 07:18:49 Remove the if and just always assing: ok_to_expan
Erik Corry 2011/06/17 07:31:14 Done.
3758 compiler->set_current_expansion_factor(new_factor);
3759 }
3760 }
3761
3762 ~RegExpExpansionLimiter() {
3763 compiler_->set_current_expansion_factor(saved_expansion_factor_);
3764 }
3765
3766 bool ok_to_expand() { return ok_to_expand_; }
3767
3768 private:
3769 RegExpCompiler* compiler_;
3770 int saved_expansion_factor_;
3771 bool ok_to_expand_;
3772
3773 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
3774 };
3775
3776
3733 RegExpNode* RegExpQuantifier::ToNode(int min, 3777 RegExpNode* RegExpQuantifier::ToNode(int min,
3734 int max, 3778 int max,
3735 bool is_greedy, 3779 bool is_greedy,
3736 RegExpTree* body, 3780 RegExpTree* body,
3737 RegExpCompiler* compiler, 3781 RegExpCompiler* compiler,
3738 RegExpNode* on_success, 3782 RegExpNode* on_success,
3739 bool not_at_start) { 3783 bool not_at_start) {
3740 // x{f, t} becomes this: 3784 // x{f, t} becomes this:
3741 // 3785 //
3742 // (r++)<-. 3786 // (r++)<-.
(...skipping 16 matching lines...) Expand all
3759 // this case. 3803 // this case.
3760 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} 3804 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
3761 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} 3805 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
3762 if (max == 0) return on_success; // This can happen due to recursion. 3806 if (max == 0) return on_success; // This can happen due to recursion.
3763 bool body_can_be_empty = (body->min_match() == 0); 3807 bool body_can_be_empty = (body->min_match() == 0);
3764 int body_start_reg = RegExpCompiler::kNoRegister; 3808 int body_start_reg = RegExpCompiler::kNoRegister;
3765 Interval capture_registers = body->CaptureRegisters(); 3809 Interval capture_registers = body->CaptureRegisters();
3766 bool needs_capture_clearing = !capture_registers.is_empty(); 3810 bool needs_capture_clearing = !capture_registers.is_empty();
3767 if (body_can_be_empty) { 3811 if (body_can_be_empty) {
3768 body_start_reg = compiler->AllocateRegister(); 3812 body_start_reg = compiler->AllocateRegister();
3769 } else if (FLAG_regexp_optimization && 3813 } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
3770 !body->ContainsExpandedQuantifier() &&
3771 !needs_capture_clearing) {
3772 // Only unroll if there are no captures and the body can't be 3814 // Only unroll if there are no captures and the body can't be
3773 // empty. 3815 // empty.
3774 if (min > 0 && min <= kMaxUnrolledMinMatches) { 3816 {
3775 int new_max = (max == kInfinity) ? max : max - min; 3817 RegExpExpansionLimiter limiter(
3776 // Recurse once to get the loop or optional matches after the fixed ones. 3818 compiler, min + ((max - min) ? 1 : 0));
Lasse Reichstein 2011/06/17 07:18:49 I just noticed the using of (max - min) as a boole
Erik Corry 2011/06/17 07:31:14 max - min replaced with max != min
3777 RegExpNode* answer = ToNode( 3819 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
3778 0, new_max, is_greedy, body, compiler, on_success, true); 3820 int new_max = (max == kInfinity) ? max : max - min;
3779 // Unroll the forced matches from 0 to min. This can cause chains of 3821 // Recurse once to get the loop or optional matches after the fixed
3780 // TextNodes (which the parser does not generate). These should be 3822 // ones.
3781 // combined if it turns out they hinder good code generation. 3823 RegExpNode* answer = ToNode(
3782 for (int i = 0; i < min; i++) { 3824 0, new_max, is_greedy, body, compiler, on_success, true);
3783 answer = body->ToNode(compiler, answer); 3825 // Unroll the forced matches from 0 to min. This can cause chains of
3826 // TextNodes (which the parser does not generate). These should be
3827 // combined if it turns out they hinder good code generation.
3828 for (int i = 0; i < min; i++) {
3829 answer = body->ToNode(compiler, answer);
3830 }
3831 return answer;
3784 } 3832 }
3785 if (min > 1) body->set_contains_expanded_quantifier(true);
3786 return answer;
3787 } 3833 }
3788 if (max <= kMaxUnrolledMaxMatches) { 3834 if (max <= kMaxUnrolledMaxMatches && min == 0) {
3789 ASSERT(min == 0); 3835 ASSERT(max > 0); // Due to the 'if' above.
3790 // Unroll the optional matches up to max. 3836 RegExpExpansionLimiter limiter(compiler, max);
3791 RegExpNode* answer = on_success; 3837 if (limiter.ok_to_expand()) {
3792 for (int i = 0; i < max; i++) { 3838 // Unroll the optional matches up to max.
3793 ChoiceNode* alternation = new ChoiceNode(2); 3839 RegExpNode* answer = on_success;
3794 if (is_greedy) { 3840 for (int i = 0; i < max; i++) {
3795 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, 3841 ChoiceNode* alternation = new ChoiceNode(2);
3796 answer))); 3842 if (is_greedy) {
3797 alternation->AddAlternative(GuardedAlternative(on_success)); 3843 alternation->AddAlternative(
3798 } else { 3844 GuardedAlternative(body->ToNode(compiler, answer)));
3799 alternation->AddAlternative(GuardedAlternative(on_success)); 3845 alternation->AddAlternative(GuardedAlternative(on_success));
3800 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, 3846 } else {
3801 answer))); 3847 alternation->AddAlternative(GuardedAlternative(on_success));
3848 alternation->AddAlternative(
3849 GuardedAlternative(body->ToNode(compiler, answer)));
3850 }
3851 answer = alternation;
3852 if (not_at_start) alternation->set_not_at_start();
3802 } 3853 }
3803 answer = alternation; 3854 return answer;
3804 if (not_at_start) alternation->set_not_at_start();
3805 } 3855 }
3806 if (max > 1) body->set_contains_expanded_quantifier(true);
3807 return answer;
3808 } 3856 }
3809 } 3857 }
3810 bool has_min = min > 0; 3858 bool has_min = min > 0;
3811 bool has_max = max < RegExpTree::kInfinity; 3859 bool has_max = max < RegExpTree::kInfinity;
3812 bool needs_counter = has_min || has_max; 3860 bool needs_counter = has_min || has_max;
3813 int reg_ctr = needs_counter 3861 int reg_ctr = needs_counter
3814 ? compiler->AllocateRegister() 3862 ? compiler->AllocateRegister()
3815 : RegExpCompiler::kNoRegister; 3863 : RegExpCompiler::kNoRegister;
3816 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); 3864 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
3817 if (not_at_start) center->set_not_at_start(); 3865 if (not_at_start) center->set_not_at_start();
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after
4123 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); 4171 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
4124 for (int i = 0; i < overlay.length(); i += 2) { 4172 for (int i = 0; i < overlay.length(); i += 2) {
4125 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), 4173 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
4126 CharacterRangeSplitter::kInOverlay); 4174 CharacterRangeSplitter::kInOverlay);
4127 } 4175 }
4128 CharacterRangeSplitter callback(included, excluded); 4176 CharacterRangeSplitter callback(included, excluded);
4129 table.ForEach(&callback); 4177 table.ForEach(&callback);
4130 } 4178 }
4131 4179
4132 4180
4133 static void AddUncanonicals(Isolate* isolate,
4134 ZoneList<CharacterRange>* ranges,
4135 int bottom,
4136 int top);
4137
4138
4139 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 4181 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
4140 bool is_ascii) { 4182 bool is_ascii) {
4141 Isolate* isolate = Isolate::Current(); 4183 Isolate* isolate = Isolate::Current();
4142 uc16 bottom = from(); 4184 uc16 bottom = from();
4143 uc16 top = to(); 4185 uc16 top = to();
4144 if (is_ascii) { 4186 if (is_ascii) {
4145 if (bottom > String::kMaxAsciiCharCode) return; 4187 if (bottom > String::kMaxAsciiCharCode) return;
4146 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; 4188 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
4147 } 4189 }
4148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4190 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
4289 result.SetElementsInSecondSet(); 4331 result.SetElementsInSecondSet();
4290 } else if (j < range->length()) { 4332 } else if (j < range->length()) {
4291 // Argument range contains something not in word range. 4333 // Argument range contains something not in word range.
4292 result.SetElementsInFirstSet(); 4334 result.SetElementsInFirstSet();
4293 } 4335 }
4294 4336
4295 return result; 4337 return result;
4296 } 4338 }
4297 4339
4298 4340
4299 static void AddUncanonicals(Isolate* isolate,
4300 ZoneList<CharacterRange>* ranges,
4301 int bottom,
4302 int top) {
4303 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4304 // Zones with no case mappings. There is a DEBUG-mode loop to assert that
4305 // this table is correct.
4306 // 0x0600 - 0x0fff
4307 // 0x1100 - 0x1cff
4308 // 0x2000 - 0x20ff
4309 // 0x2200 - 0x23ff
4310 // 0x2500 - 0x2bff
4311 // 0x2e00 - 0xa5ff
4312 // 0xa800 - 0xfaff
4313 // 0xfc00 - 0xfeff
4314 const int boundary_count = 18;
4315 int boundaries[] = {
4316 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
4317 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
4318
4319 // Special ASCII rule from spec can save us some work here.
4320 if (bottom == 0x80 && top == 0xffff) return;
4321
4322 if (top <= boundaries[0]) {
4323 CharacterRange range(bottom, top);
4324 range.AddCaseEquivalents(ranges, false);
4325 return;
4326 }
4327
4328 // Split up very large ranges. This helps remove ranges where there are no
4329 // case mappings.
4330 for (int i = 0; i < boundary_count; i++) {
4331 if (bottom < boundaries[i] && top >= boundaries[i]) {
4332 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
4333 AddUncanonicals(isolate, ranges, boundaries[i], top);
4334 return;
4335 }
4336 }
4337
4338 // If we are completely in a zone with no case mappings then we are done.
4339 for (int i = 0; i < boundary_count; i += 2) {
4340 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
4341 #ifdef DEBUG
4342 for (int j = bottom; j <= top; j++) {
4343 unsigned current_char = j;
4344 int length = isolate->jsregexp_uncanonicalize()->get(current_char,
4345 '\0', chars);
4346 for (int k = 0; k < length; k++) {
4347 ASSERT(chars[k] == current_char);
4348 }
4349 }
4350 #endif
4351 return;
4352 }
4353 }
4354
4355 // Step through the range finding equivalent characters.
4356 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
4357 for (int i = bottom; i <= top; i++) {
4358 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
4359 for (int j = 0; j < length; j++) {
4360 uc32 chr = chars[j];
4361 if (chr != i && (chr < bottom || chr > top)) {
4362 characters->Add(chr);
4363 }
4364 }
4365 }
4366
4367 // Step through the equivalent characters finding simple ranges and
4368 // adding ranges to the character class.
4369 if (characters->length() > 0) {
4370 int new_from = characters->at(0);
4371 int new_to = new_from;
4372 for (int i = 1; i < characters->length(); i++) {
4373 int chr = characters->at(i);
4374 if (chr == new_to + 1) {
4375 new_to++;
4376 } else {
4377 if (new_to == new_from) {
4378 ranges->Add(CharacterRange::Singleton(new_from));
4379 } else {
4380 ranges->Add(CharacterRange(new_from, new_to));
4381 }
4382 new_from = new_to = chr;
4383 }
4384 }
4385 if (new_to == new_from) {
4386 ranges->Add(CharacterRange::Singleton(new_from));
4387 } else {
4388 ranges->Add(CharacterRange(new_from, new_to));
4389 }
4390 }
4391 }
4392
4393
4394 ZoneList<CharacterRange>* CharacterSet::ranges() { 4341 ZoneList<CharacterRange>* CharacterSet::ranges() {
4395 if (ranges_ == NULL) { 4342 if (ranges_ == NULL) {
4396 ranges_ = new ZoneList<CharacterRange>(2); 4343 ranges_ = new ZoneList<CharacterRange>(2);
4397 CharacterRange::AddClassEscape(standard_set_type_, ranges_); 4344 CharacterRange::AddClassEscape(standard_set_type_, ranges_);
4398 } 4345 }
4399 return ranges_; 4346 return ranges_;
4400 } 4347 }
4401 4348
4402 4349
4403 // Move a number of elements in a zonelist to another position 4350 // Move a number of elements in a zonelist to another position
(...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after
5383 } 5330 }
5384 5331
5385 return compiler.Assemble(&macro_assembler, 5332 return compiler.Assemble(&macro_assembler,
5386 node, 5333 node,
5387 data->capture_count, 5334 data->capture_count,
5388 pattern); 5335 pattern);
5389 } 5336 }
5390 5337
5391 5338
5392 }} // namespace v8::internal 5339 }} // namespace v8::internal
OLDNEW
« src/ast.h ('K') | « src/ast.cc ('k') | test/mjsunit/regress/regress-1472.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698