Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 8315) |
+++ src/jsregexp.cc (working copy) |
@@ -810,6 +810,11 @@ |
inline bool ignore_case() { return ignore_case_; } |
inline bool ascii() { return ascii_; } |
+ int current_expansion_factor() { return current_expansion_factor_; } |
+ void set_current_expansion_factor(int value) { |
+ current_expansion_factor_ = value; |
+ } |
+ |
static const int kNoRegister = -1; |
private: |
@@ -821,6 +826,7 @@ |
bool ignore_case_; |
bool ascii_; |
bool reg_exp_too_big_; |
+ int current_expansion_factor_; |
}; |
@@ -848,7 +854,8 @@ |
recursion_depth_(0), |
ignore_case_(ignore_case), |
ascii_(ascii), |
- reg_exp_too_big_(false) { |
+ reg_exp_too_big_(false), |
+ current_expansion_factor_(1) { |
accept_ = new EndNode(EndNode::ACCEPT); |
ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
} |
@@ -3730,6 +3737,43 @@ |
} |
+// Scoped object to keep track of how much we unroll quantifier loops in the |
+// regexp graph generator. |
+class RegExpExpansionLimiter { |
+ public: |
+ static const int kMaxExpansionFactor = 6; |
+ RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) |
+ : compiler_(compiler), |
+ saved_expansion_factor_(compiler->current_expansion_factor()), |
+ ok_to_expand_(true) { |
Lasse Reichstein
2011/06/17 07:18:49
How about initializing this as:
ok_to_expand_(sa
Erik Corry
2011/06/17 07:31:14
Done.
|
+ ASSERT(factor > 0); |
+ if (factor > kMaxExpansionFactor) { |
Lasse Reichstein
2011/06/17 07:18:49
Add comment saying that this prevents overflow in
Erik Corry
2011/06/17 07:31:14
Done.
|
+ ok_to_expand_ = false; |
+ compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); |
+ } else { |
+ int new_factor = saved_expansion_factor_ * factor; |
+ if (new_factor > kMaxExpansionFactor) { |
+ ok_to_expand_ = false; |
+ } |
Lasse Reichstein
2011/06/17 07:18:49
Remove the if and just always assing:
ok_to_expan
Erik Corry
2011/06/17 07:31:14
Done.
|
+ compiler->set_current_expansion_factor(new_factor); |
+ } |
+ } |
+ |
+ ~RegExpExpansionLimiter() { |
+ compiler_->set_current_expansion_factor(saved_expansion_factor_); |
+ } |
+ |
+ bool ok_to_expand() { return ok_to_expand_; } |
+ |
+ private: |
+ RegExpCompiler* compiler_; |
+ int saved_expansion_factor_; |
+ bool ok_to_expand_; |
+ |
+ DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
+}; |
+ |
+ |
RegExpNode* RegExpQuantifier::ToNode(int min, |
int max, |
bool is_greedy, |
@@ -3766,45 +3810,49 @@ |
bool needs_capture_clearing = !capture_registers.is_empty(); |
if (body_can_be_empty) { |
body_start_reg = compiler->AllocateRegister(); |
- } else if (FLAG_regexp_optimization && |
- !body->ContainsExpandedQuantifier() && |
- !needs_capture_clearing) { |
+ } else if (FLAG_regexp_optimization && !needs_capture_clearing) { |
// Only unroll if there are no captures and the body can't be |
// empty. |
- if (min > 0 && min <= kMaxUnrolledMinMatches) { |
- int new_max = (max == kInfinity) ? max : max - min; |
- // Recurse once to get the loop or optional matches after the fixed ones. |
- RegExpNode* answer = ToNode( |
- 0, new_max, is_greedy, body, compiler, on_success, true); |
- // Unroll the forced matches from 0 to min. This can cause chains of |
- // TextNodes (which the parser does not generate). These should be |
- // combined if it turns out they hinder good code generation. |
- for (int i = 0; i < min; i++) { |
- answer = body->ToNode(compiler, answer); |
+ { |
+ RegExpExpansionLimiter limiter( |
+ compiler, min + ((max - min) ? 1 : 0)); |
Lasse Reichstein
2011/06/17 07:18:49
I just noticed the using of (max - min) as a boole
Erik Corry
2011/06/17 07:31:14
max - min replaced with max != min
|
+ if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
+ int new_max = (max == kInfinity) ? max : max - min; |
+ // Recurse once to get the loop or optional matches after the fixed |
+ // ones. |
+ RegExpNode* answer = ToNode( |
+ 0, new_max, is_greedy, body, compiler, on_success, true); |
+ // Unroll the forced matches from 0 to min. This can cause chains of |
+ // TextNodes (which the parser does not generate). These should be |
+ // combined if it turns out they hinder good code generation. |
+ for (int i = 0; i < min; i++) { |
+ answer = body->ToNode(compiler, answer); |
+ } |
+ return answer; |
} |
- if (min > 1) body->set_contains_expanded_quantifier(true); |
- return answer; |
} |
- if (max <= kMaxUnrolledMaxMatches) { |
- ASSERT(min == 0); |
- // Unroll the optional matches up to max. |
- RegExpNode* answer = on_success; |
- for (int i = 0; i < max; i++) { |
- ChoiceNode* alternation = new ChoiceNode(2); |
- if (is_greedy) { |
- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, |
- answer))); |
- alternation->AddAlternative(GuardedAlternative(on_success)); |
- } else { |
- alternation->AddAlternative(GuardedAlternative(on_success)); |
- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, |
- answer))); |
+ if (max <= kMaxUnrolledMaxMatches && min == 0) { |
+ ASSERT(max > 0); // Due to the 'if' above. |
+ RegExpExpansionLimiter limiter(compiler, max); |
+ if (limiter.ok_to_expand()) { |
+ // Unroll the optional matches up to max. |
+ RegExpNode* answer = on_success; |
+ for (int i = 0; i < max; i++) { |
+ ChoiceNode* alternation = new ChoiceNode(2); |
+ if (is_greedy) { |
+ alternation->AddAlternative( |
+ GuardedAlternative(body->ToNode(compiler, answer))); |
+ alternation->AddAlternative(GuardedAlternative(on_success)); |
+ } else { |
+ alternation->AddAlternative(GuardedAlternative(on_success)); |
+ alternation->AddAlternative( |
+ GuardedAlternative(body->ToNode(compiler, answer))); |
+ } |
+ answer = alternation; |
+ if (not_at_start) alternation->set_not_at_start(); |
} |
- answer = alternation; |
- if (not_at_start) alternation->set_not_at_start(); |
+ return answer; |
} |
- if (max > 1) body->set_contains_expanded_quantifier(true); |
- return answer; |
} |
} |
bool has_min = min > 0; |
@@ -4130,12 +4178,6 @@ |
} |
-static void AddUncanonicals(Isolate* isolate, |
- ZoneList<CharacterRange>* ranges, |
- int bottom, |
- int top); |
- |
- |
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
bool is_ascii) { |
Isolate* isolate = Isolate::Current(); |
@@ -4296,101 +4338,6 @@ |
} |
-static void AddUncanonicals(Isolate* isolate, |
- ZoneList<CharacterRange>* ranges, |
- int bottom, |
- int top) { |
- unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
- // Zones with no case mappings. There is a DEBUG-mode loop to assert that |
- // this table is correct. |
- // 0x0600 - 0x0fff |
- // 0x1100 - 0x1cff |
- // 0x2000 - 0x20ff |
- // 0x2200 - 0x23ff |
- // 0x2500 - 0x2bff |
- // 0x2e00 - 0xa5ff |
- // 0xa800 - 0xfaff |
- // 0xfc00 - 0xfeff |
- const int boundary_count = 18; |
- int boundaries[] = { |
- 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, |
- 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; |
- |
- // Special ASCII rule from spec can save us some work here. |
- if (bottom == 0x80 && top == 0xffff) return; |
- |
- if (top <= boundaries[0]) { |
- CharacterRange range(bottom, top); |
- range.AddCaseEquivalents(ranges, false); |
- return; |
- } |
- |
- // Split up very large ranges. This helps remove ranges where there are no |
- // case mappings. |
- for (int i = 0; i < boundary_count; i++) { |
- if (bottom < boundaries[i] && top >= boundaries[i]) { |
- AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); |
- AddUncanonicals(isolate, ranges, boundaries[i], top); |
- return; |
- } |
- } |
- |
- // If we are completely in a zone with no case mappings then we are done. |
- for (int i = 0; i < boundary_count; i += 2) { |
- if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
-#ifdef DEBUG |
- for (int j = bottom; j <= top; j++) { |
- unsigned current_char = j; |
- int length = isolate->jsregexp_uncanonicalize()->get(current_char, |
- '\0', chars); |
- for (int k = 0; k < length; k++) { |
- ASSERT(chars[k] == current_char); |
- } |
- } |
-#endif |
- return; |
- } |
- } |
- |
- // Step through the range finding equivalent characters. |
- ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
- for (int i = bottom; i <= top; i++) { |
- int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); |
- for (int j = 0; j < length; j++) { |
- uc32 chr = chars[j]; |
- if (chr != i && (chr < bottom || chr > top)) { |
- characters->Add(chr); |
- } |
- } |
- } |
- |
- // Step through the equivalent characters finding simple ranges and |
- // adding ranges to the character class. |
- if (characters->length() > 0) { |
- int new_from = characters->at(0); |
- int new_to = new_from; |
- for (int i = 1; i < characters->length(); i++) { |
- int chr = characters->at(i); |
- if (chr == new_to + 1) { |
- new_to++; |
- } else { |
- if (new_to == new_from) { |
- ranges->Add(CharacterRange::Singleton(new_from)); |
- } else { |
- ranges->Add(CharacterRange(new_from, new_to)); |
- } |
- new_from = new_to = chr; |
- } |
- } |
- if (new_to == new_from) { |
- ranges->Add(CharacterRange::Singleton(new_from)); |
- } else { |
- ranges->Add(CharacterRange(new_from, new_to)); |
- } |
- } |
-} |
- |
- |
ZoneList<CharacterRange>* CharacterSet::ranges() { |
if (ranges_ == NULL) { |
ranges_ = new ZoneList<CharacterRange>(2); |