src/jsregexp.cc - Issue 7193007: Refix issue 1472. The previous fix worked for the example in the bug

Side by Side Diff: src/jsregexp.cc

Issue 7193007: Refix issue 1472. The previous fix worked for the example in the bug (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 9 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved.	1 // Copyright 2006-2009 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 3712 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3723 RegExpNode* on_success) {	3723 RegExpNode* on_success) {

3724 return ToNode(min(),	3724 return ToNode(min(),

3725 max(),	3725 max(),

3726 is_greedy(),	3726 is_greedy(),

3727 body(),	3727 body(),

3728 compiler,	3728 compiler,

3729 on_success);	3729 on_success);

3730 }	3730 }

3731	3731

3732	3732

	3733 class RegExpExpansionLimiter {
	Lasse Reichstein 2011/06/16 12:12:47 Some documentation would be nice. What problem is Some documentation would be nice. What problem is this class supposed to solve.
	3734 public:

	3735 static const int kMaxExpansionFactor = 6;

	3736 RegExpExpansionLimiter(Isolate* isolate, int factor)

	3737 : isolate_(isolate),

	3738 saved_regexp_expansion_factor_(isolate->regexp_expansion_factor()),

	3739 ok_to_expand_(true) {

	3740 ASSERT(factor > 0);

	3741 int new_factor = saved_regexp_expansion_factor_ * factor;

	3742 if (new_factor < saved_regexp_expansion_factor_) {
	Lasse Reichstein 2011/06/16 12:12:47 Dangerous assumption. Multiplication, if it's supp Dangerous assumption. Multiplication, if it's supposed to be that, can wrap more than once. Test instead: if (kMaxInt / factor < saved_regexp_expansion_factor_) { // overflow ...
	3743 // Integer overflow.

	3744 new_factor = kMaxInt;

	3745 ok_to_expand_ = false;

	3746 } else if (new_factor > kMaxExpansionFactor) {

	3747 ok_to_expand_ = false;

	3748 }

	3749 isolate->set_regexp_expansion_factor(new_factor);

	3750 }

	3751

	3752 ~RegExpExpansionLimiter() {

	3753 isolate_->set_regexp_expansion_factor(saved_regexp_expansion_factor_);

	3754 }

	3755

	3756 bool ok_to_expand() { return ok_to_expand_; }

	3757

	3758 private:

	3759 Isolate* isolate_;

	3760 int saved_regexp_expansion_factor_;

	3761 bool ok_to_expand_;

	3762

	3763 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);

	3764 };

	3765

	3766

3733 RegExpNode* RegExpQuantifier::ToNode(int min,	3767 RegExpNode* RegExpQuantifier::ToNode(int min,

3734 int max,	3768 int max,

3735 bool is_greedy,	3769 bool is_greedy,

3736 RegExpTree* body,	3770 RegExpTree* body,

3737 RegExpCompiler* compiler,	3771 RegExpCompiler* compiler,

3738 RegExpNode* on_success,	3772 RegExpNode* on_success,

3739 bool not_at_start) {	3773 bool not_at_start) {

3740 // x{f, t} becomes this:	3774 // x{f, t} becomes this:

3741 //	3775 //

3742 // (r++)<-.	3776 // (r++)<-.

(...skipping 16 matching lines...) Expand all Loading...
3759 // this case.	3793 // this case.

3760 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}	3794 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}

3761 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}	3795 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}

3762 if (max == 0) return on_success; // This can happen due to recursion.	3796 if (max == 0) return on_success; // This can happen due to recursion.

3763 bool body_can_be_empty = (body->min_match() == 0);	3797 bool body_can_be_empty = (body->min_match() == 0);

3764 int body_start_reg = RegExpCompiler::kNoRegister;	3798 int body_start_reg = RegExpCompiler::kNoRegister;

3765 Interval capture_registers = body->CaptureRegisters();	3799 Interval capture_registers = body->CaptureRegisters();

3766 bool needs_capture_clearing = !capture_registers.is_empty();	3800 bool needs_capture_clearing = !capture_registers.is_empty();

3767 if (body_can_be_empty) {	3801 if (body_can_be_empty) {

3768 body_start_reg = compiler->AllocateRegister();	3802 body_start_reg = compiler->AllocateRegister();

3769 } else if (FLAG_regexp_optimization &&	3803 } else if (FLAG_regexp_optimization && !needs_capture_clearing) {

3770 !body->ContainsExpandedQuantifier() &&

3771 !needs_capture_clearing) {

3772 // Only unroll if there are no captures and the body can't be	3804 // Only unroll if there are no captures and the body can't be

3773 // empty.	3805 // empty.

3774 if (min > 0 && min <= kMaxUnrolledMinMatches) {	3806 {

3775 int new_max = (max == kInfinity) ? max : max - min;	3807 RegExpExpansionLimiter limiter(

3776 // Recurse once to get the loop or optional matches after the fixed ones.	3808 Isolate::Current(), min + ((max - min) ? 1 : 0));
	Lasse Reichstein 2011/06/16 12:12:47 Do we really not have a faster way to get the isol Do we really not have a faster way to get the isolate? Is it important to store the expansion factor between regexp compilations (in the isolate), or could it be stored on, e.g., the RegExpCompiler class, making this independent of isolates? It seems it should be one value per RegExp compiled.
3777 RegExpNode* answer = ToNode(	3809 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {

3778 0, new_max, is_greedy, body, compiler, on_success, true);	3810 int new_max = (max == kInfinity) ? max : max - min;

3779 // Unroll the forced matches from 0 to min. This can cause chains of	3811 // Recurse once to get the loop or optional matches after the fixed

3780 // TextNodes (which the parser does not generate). These should be	3812 // ones.

3781 // combined if it turns out they hinder good code generation.	3813 RegExpNode* answer = ToNode(

3782 for (int i = 0; i < min; i++) {	3814 0, new_max, is_greedy, body, compiler, on_success, true);

3783 answer = body->ToNode(compiler, answer);	3815 // Unroll the forced matches from 0 to min. This can cause chains of

	3816 // TextNodes (which the parser does not generate). These should be

	3817 // combined if it turns out they hinder good code generation.

	3818 for (int i = 0; i < min; i++) {

	3819 answer = body->ToNode(compiler, answer);

	3820 }

	3821 return answer;

3784 }	3822 }

3785 if (min > 1) body->set_contains_expanded_quantifier(true);

3786 return answer;

3787 }	3823 }

3788 if (max <= kMaxUnrolledMaxMatches) {	3824 if (max <= kMaxUnrolledMaxMatches && min == 0) {

3789 ASSERT(min == 0);	3825 RegExpExpansionLimiter limiter(Isolate::Current(), max);

3790 // Unroll the optional matches up to max.	3826 if (limiter.ok_to_expand()) {

3791 RegExpNode* answer = on_success;	3827 // Unroll the optional matches up to max.

3792 for (int i = 0; i < max; i++) {	3828 RegExpNode* answer = on_success;

3793 ChoiceNode* alternation = new ChoiceNode(2);	3829 for (int i = 0; i < max; i++) {

3794 if (is_greedy) {	3830 ChoiceNode* alternation = new ChoiceNode(2);

3795 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,	3831 if (is_greedy) {

3796 answer)));	3832 alternation->AddAlternative(

3797 alternation->AddAlternative(GuardedAlternative(on_success));	3833 GuardedAlternative(body->ToNode(compiler, answer)));

3798 } else {	3834 alternation->AddAlternative(GuardedAlternative(on_success));

3799 alternation->AddAlternative(GuardedAlternative(on_success));	3835 } else {

3800 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,	3836 alternation->AddAlternative(GuardedAlternative(on_success));

3801 answer)));	3837 alternation->AddAlternative(

	3838 GuardedAlternative(body->ToNode(compiler, answer)));

	3839 }

	3840 answer = alternation;

	3841 if (not_at_start) alternation->set_not_at_start();

3802 }	3842 }

3803 answer = alternation;	3843 return answer;

3804 if (not_at_start) alternation->set_not_at_start();

3805 }	3844 }

3806 if (max > 1) body->set_contains_expanded_quantifier(true);

3807 return answer;

3808 }	3845 }

3809 }	3846 }

3810 bool has_min = min > 0;	3847 bool has_min = min > 0;

3811 bool has_max = max < RegExpTree::kInfinity;	3848 bool has_max = max < RegExpTree::kInfinity;

3812 bool needs_counter = has_min \|\| has_max;	3849 bool needs_counter = has_min \|\| has_max;

3813 int reg_ctr = needs_counter	3850 int reg_ctr = needs_counter

3814 ? compiler->AllocateRegister()	3851 ? compiler->AllocateRegister()

3815 : RegExpCompiler::kNoRegister;	3852 : RegExpCompiler::kNoRegister;

3816 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);	3853 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);

3817 if (not_at_start) center->set_not_at_start();	3854 if (not_at_start) center->set_not_at_start();

(...skipping 305 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4123 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);	4160 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);

4124 for (int i = 0; i < overlay.length(); i += 2) {	4161 for (int i = 0; i < overlay.length(); i += 2) {

4125 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),	4162 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),

4126 CharacterRangeSplitter::kInOverlay);	4163 CharacterRangeSplitter::kInOverlay);

4127 }	4164 }

4128 CharacterRangeSplitter callback(included, excluded);	4165 CharacterRangeSplitter callback(included, excluded);

4129 table.ForEach(&callback);	4166 table.ForEach(&callback);

4130 }	4167 }

4131	4168

4132	4169

4133 static void AddUncanonicals(Isolate* isolate,

4134 ZoneList<CharacterRange>* ranges,

4135 int bottom,

4136 int top);

4137

4138

4139 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	4170 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

4140 bool is_ascii) {	4171 bool is_ascii) {

4141 Isolate* isolate = Isolate::Current();	4172 Isolate* isolate = Isolate::Current();

4142 uc16 bottom = from();	4173 uc16 bottom = from();

4143 uc16 top = to();	4174 uc16 top = to();

4144 if (is_ascii) {	4175 if (is_ascii) {

4145 if (bottom > String::kMaxAsciiCharCode) return;	4176 if (bottom > String::kMaxAsciiCharCode) return;

4146 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;	4177 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;

4147 }	4178 }

4148 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4179 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4289 result.SetElementsInSecondSet();	4320 result.SetElementsInSecondSet();

4290 } else if (j < range->length()) {	4321 } else if (j < range->length()) {

4291 // Argument range contains something not in word range.	4322 // Argument range contains something not in word range.

4292 result.SetElementsInFirstSet();	4323 result.SetElementsInFirstSet();

4293 }	4324 }

4294	4325

4295 return result;	4326 return result;

4296 }	4327 }

4297	4328

4298	4329

4299 static void AddUncanonicals(Isolate* isolate,

4300 ZoneList<CharacterRange>* ranges,

4301 int bottom,

4302 int top) {

4303 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4304 // Zones with no case mappings. There is a DEBUG-mode loop to assert that

4305 // this table is correct.

4306 // 0x0600 - 0x0fff

4307 // 0x1100 - 0x1cff

4308 // 0x2000 - 0x20ff

4309 // 0x2200 - 0x23ff

4310 // 0x2500 - 0x2bff

4311 // 0x2e00 - 0xa5ff

4312 // 0xa800 - 0xfaff

4313 // 0xfc00 - 0xfeff

4314 const int boundary_count = 18;

4315 int boundaries[] = {

4316 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,

4317 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};

4318

4319 // Special ASCII rule from spec can save us some work here.

4320 if (bottom == 0x80 && top == 0xffff) return;

4321

4322 if (top <= boundaries[0]) {

4323 CharacterRange range(bottom, top);

4324 range.AddCaseEquivalents(ranges, false);

4325 return;

4326 }

4327

4328 // Split up very large ranges. This helps remove ranges where there are no

4329 // case mappings.

4330 for (int i = 0; i < boundary_count; i++) {

4331 if (bottom < boundaries[i] && top >= boundaries[i]) {

4332 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);

4333 AddUncanonicals(isolate, ranges, boundaries[i], top);

4334 return;

4335 }

4336 }

4337

4338 // If we are completely in a zone with no case mappings then we are done.

4339 for (int i = 0; i < boundary_count; i += 2) {

4340 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {

4341 #ifdef DEBUG

4342 for (int j = bottom; j <= top; j++) {

4343 unsigned current_char = j;

4344 int length = isolate->jsregexp_uncanonicalize()->get(current_char,

4345 '\0', chars);

4346 for (int k = 0; k < length; k++) {

4347 ASSERT(chars[k] == current_char);

4348 }

4349 }

4350 #endif

4351 return;

4352 }

4353 }

4354

4355 // Step through the range finding equivalent characters.

4356 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);

4357 for (int i = bottom; i <= top; i++) {

4358 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);

4359 for (int j = 0; j < length; j++) {

4360 uc32 chr = chars[j];

4361 if (chr != i && (chr < bottom \|\| chr > top)) {

4362 characters->Add(chr);

4363 }

4364 }

4365 }

4366

4367 // Step through the equivalent characters finding simple ranges and

4368 // adding ranges to the character class.

4369 if (characters->length() > 0) {

4370 int new_from = characters->at(0);

4371 int new_to = new_from;

4372 for (int i = 1; i < characters->length(); i++) {

4373 int chr = characters->at(i);

4374 if (chr == new_to + 1) {

4375 new_to++;

4376 } else {

4377 if (new_to == new_from) {

4378 ranges->Add(CharacterRange::Singleton(new_from));

4379 } else {

4380 ranges->Add(CharacterRange(new_from, new_to));

4381 }

4382 new_from = new_to = chr;

4383 }

4384 }

4385 if (new_to == new_from) {

4386 ranges->Add(CharacterRange::Singleton(new_from));

4387 } else {

4388 ranges->Add(CharacterRange(new_from, new_to));

4389 }

4390 }

4391 }

4392

4393

4394 ZoneList<CharacterRange>* CharacterSet::ranges() {	4330 ZoneList<CharacterRange>* CharacterSet::ranges() {

4395 if (ranges_ == NULL) {	4331 if (ranges_ == NULL) {

4396 ranges_ = new ZoneList<CharacterRange>(2);	4332 ranges_ = new ZoneList<CharacterRange>(2);

4397 CharacterRange::AddClassEscape(standard_set_type_, ranges_);	4333 CharacterRange::AddClassEscape(standard_set_type_, ranges_);

4398 }	4334 }

4399 return ranges_;	4335 return ranges_;

4400 }	4336 }

4401	4337

4402	4338

4403 // Move a number of elements in a zonelist to another position	4339 // Move a number of elements in a zonelist to another position

(...skipping 979 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5383 }	5319 }

5384	5320

5385 return compiler.Assemble(&macro_assembler,	5321 return compiler.Assemble(&macro_assembler,

5386 node,	5322 node,

5387 data->capture_count,	5323 data->capture_count,

5388 pattern);	5324 pattern);

5389 }	5325 }

5390	5326

5391	5327

5392 }} // namespace v8::internal	5328 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/isolate.cc ('k') | test/mjsunit/regress/regress-1472.js » ('j') | no next file with comments »