Chromium Code Reviews| Index: src/jsregexp.cc |
| =================================================================== |
| --- src/jsregexp.cc (revision 8302) |
| +++ src/jsregexp.cc (working copy) |
| @@ -3730,6 +3730,40 @@ |
| } |
| +class RegExpExpansionLimiter { |
|
Lasse Reichstein
2011/06/16 12:12:47
Some documentation would be nice.
What problem is
|
| + public: |
| + static const int kMaxExpansionFactor = 6; |
| + RegExpExpansionLimiter(Isolate* isolate, int factor) |
| + : isolate_(isolate), |
| + saved_regexp_expansion_factor_(isolate->regexp_expansion_factor()), |
| + ok_to_expand_(true) { |
| + ASSERT(factor > 0); |
| + int new_factor = saved_regexp_expansion_factor_ * factor; |
| + if (new_factor < saved_regexp_expansion_factor_) { |
|
Lasse Reichstein
2011/06/16 12:12:47
Dangerous assumption. Multiplication, if it's supp
|
| + // Integer overflow. |
| + new_factor = kMaxInt; |
| + ok_to_expand_ = false; |
| + } else if (new_factor > kMaxExpansionFactor) { |
| + ok_to_expand_ = false; |
| + } |
| + isolate->set_regexp_expansion_factor(new_factor); |
| + } |
| + |
| + ~RegExpExpansionLimiter() { |
| + isolate_->set_regexp_expansion_factor(saved_regexp_expansion_factor_); |
| + } |
| + |
| + bool ok_to_expand() { return ok_to_expand_; } |
| + |
| + private: |
| + Isolate* isolate_; |
| + int saved_regexp_expansion_factor_; |
| + bool ok_to_expand_; |
| + |
| + DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
| +}; |
| + |
| + |
| RegExpNode* RegExpQuantifier::ToNode(int min, |
| int max, |
| bool is_greedy, |
| @@ -3766,45 +3800,48 @@ |
| bool needs_capture_clearing = !capture_registers.is_empty(); |
| if (body_can_be_empty) { |
| body_start_reg = compiler->AllocateRegister(); |
| - } else if (FLAG_regexp_optimization && |
| - !body->ContainsExpandedQuantifier() && |
| - !needs_capture_clearing) { |
| + } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
| // Only unroll if there are no captures and the body can't be |
| // empty. |
| - if (min > 0 && min <= kMaxUnrolledMinMatches) { |
| - int new_max = (max == kInfinity) ? max : max - min; |
| - // Recurse once to get the loop or optional matches after the fixed ones. |
| - RegExpNode* answer = ToNode( |
| - 0, new_max, is_greedy, body, compiler, on_success, true); |
| - // Unroll the forced matches from 0 to min. This can cause chains of |
| - // TextNodes (which the parser does not generate). These should be |
| - // combined if it turns out they hinder good code generation. |
| - for (int i = 0; i < min; i++) { |
| - answer = body->ToNode(compiler, answer); |
| + { |
| + RegExpExpansionLimiter limiter( |
| + Isolate::Current(), min + ((max - min) ? 1 : 0)); |
|
Lasse Reichstein
2011/06/16 12:12:47
Do we really not have a faster way to get the isol
|
| + if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
| + int new_max = (max == kInfinity) ? max : max - min; |
| + // Recurse once to get the loop or optional matches after the fixed |
| + // ones. |
| + RegExpNode* answer = ToNode( |
| + 0, new_max, is_greedy, body, compiler, on_success, true); |
| + // Unroll the forced matches from 0 to min. This can cause chains of |
| + // TextNodes (which the parser does not generate). These should be |
| + // combined if it turns out they hinder good code generation. |
| + for (int i = 0; i < min; i++) { |
| + answer = body->ToNode(compiler, answer); |
| + } |
| + return answer; |
| } |
| - if (min > 1) body->set_contains_expanded_quantifier(true); |
| - return answer; |
| } |
| - if (max <= kMaxUnrolledMaxMatches) { |
| - ASSERT(min == 0); |
| - // Unroll the optional matches up to max. |
| - RegExpNode* answer = on_success; |
| - for (int i = 0; i < max; i++) { |
| - ChoiceNode* alternation = new ChoiceNode(2); |
| - if (is_greedy) { |
| - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, |
| - answer))); |
| - alternation->AddAlternative(GuardedAlternative(on_success)); |
| - } else { |
| - alternation->AddAlternative(GuardedAlternative(on_success)); |
| - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, |
| - answer))); |
| + if (max <= kMaxUnrolledMaxMatches && min == 0) { |
| + RegExpExpansionLimiter limiter(Isolate::Current(), max); |
| + if (limiter.ok_to_expand()) { |
| + // Unroll the optional matches up to max. |
| + RegExpNode* answer = on_success; |
| + for (int i = 0; i < max; i++) { |
| + ChoiceNode* alternation = new ChoiceNode(2); |
| + if (is_greedy) { |
| + alternation->AddAlternative( |
| + GuardedAlternative(body->ToNode(compiler, answer))); |
| + alternation->AddAlternative(GuardedAlternative(on_success)); |
| + } else { |
| + alternation->AddAlternative(GuardedAlternative(on_success)); |
| + alternation->AddAlternative( |
| + GuardedAlternative(body->ToNode(compiler, answer))); |
| + } |
| + answer = alternation; |
| + if (not_at_start) alternation->set_not_at_start(); |
| } |
| - answer = alternation; |
| - if (not_at_start) alternation->set_not_at_start(); |
| + return answer; |
| } |
| - if (max > 1) body->set_contains_expanded_quantifier(true); |
| - return answer; |
| } |
| } |
| bool has_min = min > 0; |
| @@ -4130,12 +4167,6 @@ |
| } |
| -static void AddUncanonicals(Isolate* isolate, |
| - ZoneList<CharacterRange>* ranges, |
| - int bottom, |
| - int top); |
| - |
| - |
| void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| bool is_ascii) { |
| Isolate* isolate = Isolate::Current(); |
| @@ -4296,101 +4327,6 @@ |
| } |
| -static void AddUncanonicals(Isolate* isolate, |
| - ZoneList<CharacterRange>* ranges, |
| - int bottom, |
| - int top) { |
| - unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| - // Zones with no case mappings. There is a DEBUG-mode loop to assert that |
| - // this table is correct. |
| - // 0x0600 - 0x0fff |
| - // 0x1100 - 0x1cff |
| - // 0x2000 - 0x20ff |
| - // 0x2200 - 0x23ff |
| - // 0x2500 - 0x2bff |
| - // 0x2e00 - 0xa5ff |
| - // 0xa800 - 0xfaff |
| - // 0xfc00 - 0xfeff |
| - const int boundary_count = 18; |
| - int boundaries[] = { |
| - 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, |
| - 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; |
| - |
| - // Special ASCII rule from spec can save us some work here. |
| - if (bottom == 0x80 && top == 0xffff) return; |
| - |
| - if (top <= boundaries[0]) { |
| - CharacterRange range(bottom, top); |
| - range.AddCaseEquivalents(ranges, false); |
| - return; |
| - } |
| - |
| - // Split up very large ranges. This helps remove ranges where there are no |
| - // case mappings. |
| - for (int i = 0; i < boundary_count; i++) { |
| - if (bottom < boundaries[i] && top >= boundaries[i]) { |
| - AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); |
| - AddUncanonicals(isolate, ranges, boundaries[i], top); |
| - return; |
| - } |
| - } |
| - |
| - // If we are completely in a zone with no case mappings then we are done. |
| - for (int i = 0; i < boundary_count; i += 2) { |
| - if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
| -#ifdef DEBUG |
| - for (int j = bottom; j <= top; j++) { |
| - unsigned current_char = j; |
| - int length = isolate->jsregexp_uncanonicalize()->get(current_char, |
| - '\0', chars); |
| - for (int k = 0; k < length; k++) { |
| - ASSERT(chars[k] == current_char); |
| - } |
| - } |
| -#endif |
| - return; |
| - } |
| - } |
| - |
| - // Step through the range finding equivalent characters. |
| - ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
| - for (int i = bottom; i <= top; i++) { |
| - int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); |
| - for (int j = 0; j < length; j++) { |
| - uc32 chr = chars[j]; |
| - if (chr != i && (chr < bottom || chr > top)) { |
| - characters->Add(chr); |
| - } |
| - } |
| - } |
| - |
| - // Step through the equivalent characters finding simple ranges and |
| - // adding ranges to the character class. |
| - if (characters->length() > 0) { |
| - int new_from = characters->at(0); |
| - int new_to = new_from; |
| - for (int i = 1; i < characters->length(); i++) { |
| - int chr = characters->at(i); |
| - if (chr == new_to + 1) { |
| - new_to++; |
| - } else { |
| - if (new_to == new_from) { |
| - ranges->Add(CharacterRange::Singleton(new_from)); |
| - } else { |
| - ranges->Add(CharacterRange(new_from, new_to)); |
| - } |
| - new_from = new_to = chr; |
| - } |
| - } |
| - if (new_to == new_from) { |
| - ranges->Add(CharacterRange::Singleton(new_from)); |
| - } else { |
| - ranges->Add(CharacterRange(new_from, new_to)); |
| - } |
| - } |
| -} |
| - |
| - |
| ZoneList<CharacterRange>* CharacterSet::ranges() { |
| if (ranges_ == NULL) { |
| ranges_ = new ZoneList<CharacterRange>(2); |