Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(533)

Unified Diff: src/jsregexp.cc

Issue 7348008: Merge up to 8597 to experimental/gc from the bleeding edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/
Patch Set: '' Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/lithium.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 8618)
+++ src/jsregexp.cc (working copy)
@@ -127,7 +127,7 @@
return re;
}
pattern = FlattenGetString(pattern);
- CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
+ ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
PostponeInterruptsScope postpone(isolate);
RegExpCompileData parse_result;
FlatStringReader reader(isolate, pattern);
@@ -295,23 +295,62 @@
#else // V8_INTERPRETED_REGEXP (RegExp native code)
if (compiled_code->IsCode()) return true;
#endif
+ // We could potentially have marked this as flushable, but have kept
+ // a saved version if we did not flush it yet.
+ Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ if (saved_code->IsCode()) {
+ // Reinstate the code in the original place.
+ re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
+ ASSERT(compiled_code->IsSmi());
+ return true;
+ }
return CompileIrregexp(re, is_ascii);
}
+static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
+ bool is_ascii,
+ Handle<String> error_message,
+ Isolate* isolate) {
+ Factory* factory = isolate->factory();
+ Handle<FixedArray> elements = factory->NewFixedArray(2);
+ elements->set(0, re->Pattern());
+ elements->set(1, *error_message);
+ Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
+ Handle<Object> regexp_err =
+ factory->NewSyntaxError("malformed_regexp", array);
+ isolate->Throw(*regexp_err);
+ return false;
+}
+
+
bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
// Compile the RegExp.
Isolate* isolate = re->GetIsolate();
- CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
+ ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
PostponeInterruptsScope postpone(isolate);
+ // If we had a compilation error the last time this is saved at the
+ // saved code index.
Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
- if (entry->IsJSObject()) {
- // If it's a JSObject, a previous compilation failed and threw this object.
- // Re-throw the object without trying again.
- isolate->Throw(entry);
+ // When arriving here entry can only be a smi, either representing an
+ // uncompiled regexp, a previous compilation error, or code that has
+ // been flushed.
+ ASSERT(entry->IsSmi());
+ int entry_value = Smi::cast(entry)->value();
+ ASSERT(entry_value == JSRegExp::kUninitializedValue ||
+ entry_value == JSRegExp::kCompilationErrorValue ||
+ (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
+
+ if (entry_value == JSRegExp::kCompilationErrorValue) {
+ // A previous compilation failed and threw an error which we store in
+ // the saved code index (we store the error message, not the actual
+ // error). Recreate the error object and throw it.
+ Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ ASSERT(error_string->IsString());
+ Handle<String> error_message(String::cast(error_string));
+ CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
return false;
}
- ASSERT(entry->IsTheHole());
JSRegExp::Flags flags = re->GetFlags();
@@ -340,17 +379,9 @@
is_ascii);
if (result.error_message != NULL) {
// Unable to compile regexp.
- Factory* factory = isolate->factory();
- Handle<FixedArray> elements = factory->NewFixedArray(2);
- elements->set(0, *pattern);
Handle<String> error_message =
- factory->NewStringFromUtf8(CStrVector(result.error_message));
- elements->set(1, *error_message);
- Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
- Handle<Object> regexp_err =
- factory->NewSyntaxError("malformed_regexp", array);
- isolate->Throw(*regexp_err);
- re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
+ isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
+ CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
return false;
}
@@ -460,6 +491,7 @@
ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
bool is_ascii = subject->IsAsciiRepresentation();
+ EnsureCompiledIrregexp(regexp, is_ascii);
Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
NativeRegExpMacroAssembler::Result res =
NativeRegExpMacroAssembler::Match(code,
@@ -810,7 +842,13 @@
inline bool ignore_case() { return ignore_case_; }
inline bool ascii() { return ascii_; }
+ int current_expansion_factor() { return current_expansion_factor_; }
+ void set_current_expansion_factor(int value) {
+ current_expansion_factor_ = value;
+ }
+
static const int kNoRegister = -1;
+
private:
EndNode* accept_;
int next_register_;
@@ -820,6 +858,7 @@
bool ignore_case_;
bool ascii_;
bool reg_exp_too_big_;
+ int current_expansion_factor_;
};
@@ -847,7 +886,8 @@
recursion_depth_(0),
ignore_case_(ignore_case),
ascii_(ascii),
- reg_exp_too_big_(false) {
+ reg_exp_too_big_(false),
+ current_expansion_factor_(1) {
accept_ = new EndNode(EndNode::ACCEPT);
ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
}
@@ -2767,6 +2807,7 @@
AlternativeGeneration* at(int i) {
return alt_gens_[i];
}
+
private:
static const int kAFew = 10;
ZoneList<AlternativeGeneration*> alt_gens_;
@@ -3326,6 +3367,7 @@
}
stream()->Add("}}");
}
+
private:
bool first_;
StringStream* stream() { return stream_; }
@@ -3727,6 +3769,44 @@
}
+// Scoped object to keep track of how much we unroll quantifier loops in the
+// regexp graph generator.
+class RegExpExpansionLimiter {
+ public:
+ static const int kMaxExpansionFactor = 6;
+ RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
+ : compiler_(compiler),
+ saved_expansion_factor_(compiler->current_expansion_factor()),
+ ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
+ ASSERT(factor > 0);
+ if (ok_to_expand_) {
+ if (factor > kMaxExpansionFactor) {
+ // Avoid integer overflow of the current expansion factor.
+ ok_to_expand_ = false;
+ compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
+ } else {
+ int new_factor = saved_expansion_factor_ * factor;
+ ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
+ compiler->set_current_expansion_factor(new_factor);
+ }
+ }
+ }
+
+ ~RegExpExpansionLimiter() {
+ compiler_->set_current_expansion_factor(saved_expansion_factor_);
+ }
+
+ bool ok_to_expand() { return ok_to_expand_; }
+
+ private:
+ RegExpCompiler* compiler_;
+ int saved_expansion_factor_;
+ bool ok_to_expand_;
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
+};
+
+
RegExpNode* RegExpQuantifier::ToNode(int min,
int max,
bool is_greedy,
@@ -3766,38 +3846,46 @@
} else if (FLAG_regexp_optimization && !needs_capture_clearing) {
// Only unroll if there are no captures and the body can't be
// empty.
- if (min > 0 && min <= kMaxUnrolledMinMatches) {
- int new_max = (max == kInfinity) ? max : max - min;
- // Recurse once to get the loop or optional matches after the fixed ones.
- RegExpNode* answer = ToNode(
- 0, new_max, is_greedy, body, compiler, on_success, true);
- // Unroll the forced matches from 0 to min. This can cause chains of
- // TextNodes (which the parser does not generate). These should be
- // combined if it turns out they hinder good code generation.
- for (int i = 0; i < min; i++) {
- answer = body->ToNode(compiler, answer);
+ {
+ RegExpExpansionLimiter limiter(
+ compiler, min + ((max != min) ? 1 : 0));
+ if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
+ int new_max = (max == kInfinity) ? max : max - min;
+ // Recurse once to get the loop or optional matches after the fixed
+ // ones.
+ RegExpNode* answer = ToNode(
+ 0, new_max, is_greedy, body, compiler, on_success, true);
+ // Unroll the forced matches from 0 to min. This can cause chains of
+ // TextNodes (which the parser does not generate). These should be
+ // combined if it turns out they hinder good code generation.
+ for (int i = 0; i < min; i++) {
+ answer = body->ToNode(compiler, answer);
+ }
+ return answer;
}
- return answer;
}
- if (max <= kMaxUnrolledMaxMatches) {
- ASSERT(min == 0);
- // Unroll the optional matches up to max.
- RegExpNode* answer = on_success;
- for (int i = 0; i < max; i++) {
- ChoiceNode* alternation = new ChoiceNode(2);
- if (is_greedy) {
- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
- answer)));
- alternation->AddAlternative(GuardedAlternative(on_success));
- } else {
- alternation->AddAlternative(GuardedAlternative(on_success));
- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
- answer)));
+ if (max <= kMaxUnrolledMaxMatches && min == 0) {
+ ASSERT(max > 0); // Due to the 'if' above.
+ RegExpExpansionLimiter limiter(compiler, max);
+ if (limiter.ok_to_expand()) {
+ // Unroll the optional matches up to max.
+ RegExpNode* answer = on_success;
+ for (int i = 0; i < max; i++) {
+ ChoiceNode* alternation = new ChoiceNode(2);
+ if (is_greedy) {
+ alternation->AddAlternative(
+ GuardedAlternative(body->ToNode(compiler, answer)));
+ alternation->AddAlternative(GuardedAlternative(on_success));
+ } else {
+ alternation->AddAlternative(GuardedAlternative(on_success));
+ alternation->AddAlternative(
+ GuardedAlternative(body->ToNode(compiler, answer)));
+ }
+ answer = alternation;
+ if (not_at_start) alternation->set_not_at_start();
}
- answer = alternation;
- if (not_at_start) alternation->set_not_at_start();
+ return answer;
}
- return answer;
}
}
bool has_min = min > 0;
@@ -4123,12 +4211,6 @@
}
-static void AddUncanonicals(Isolate* isolate,
- ZoneList<CharacterRange>* ranges,
- int bottom,
- int top);
-
-
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
bool is_ascii) {
Isolate* isolate = Isolate::Current();
@@ -4289,101 +4371,6 @@
}
-static void AddUncanonicals(Isolate* isolate,
- ZoneList<CharacterRange>* ranges,
- int bottom,
- int top) {
- unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- // Zones with no case mappings. There is a DEBUG-mode loop to assert that
- // this table is correct.
- // 0x0600 - 0x0fff
- // 0x1100 - 0x1cff
- // 0x2000 - 0x20ff
- // 0x2200 - 0x23ff
- // 0x2500 - 0x2bff
- // 0x2e00 - 0xa5ff
- // 0xa800 - 0xfaff
- // 0xfc00 - 0xfeff
- const int boundary_count = 18;
- int boundaries[] = {
- 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
- 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
-
- // Special ASCII rule from spec can save us some work here.
- if (bottom == 0x80 && top == 0xffff) return;
-
- if (top <= boundaries[0]) {
- CharacterRange range(bottom, top);
- range.AddCaseEquivalents(ranges, false);
- return;
- }
-
- // Split up very large ranges. This helps remove ranges where there are no
- // case mappings.
- for (int i = 0; i < boundary_count; i++) {
- if (bottom < boundaries[i] && top >= boundaries[i]) {
- AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
- AddUncanonicals(isolate, ranges, boundaries[i], top);
- return;
- }
- }
-
- // If we are completely in a zone with no case mappings then we are done.
- for (int i = 0; i < boundary_count; i += 2) {
- if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
-#ifdef DEBUG
- for (int j = bottom; j <= top; j++) {
- unsigned current_char = j;
- int length = isolate->jsregexp_uncanonicalize()->get(current_char,
- '\0', chars);
- for (int k = 0; k < length; k++) {
- ASSERT(chars[k] == current_char);
- }
- }
-#endif
- return;
- }
- }
-
- // Step through the range finding equivalent characters.
- ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
- for (int i = bottom; i <= top; i++) {
- int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
- for (int j = 0; j < length; j++) {
- uc32 chr = chars[j];
- if (chr != i && (chr < bottom || chr > top)) {
- characters->Add(chr);
- }
- }
- }
-
- // Step through the equivalent characters finding simple ranges and
- // adding ranges to the character class.
- if (characters->length() > 0) {
- int new_from = characters->at(0);
- int new_to = new_from;
- for (int i = 1; i < characters->length(); i++) {
- int chr = characters->at(i);
- if (chr == new_to + 1) {
- new_to++;
- } else {
- if (new_to == new_from) {
- ranges->Add(CharacterRange::Singleton(new_from));
- } else {
- ranges->Add(CharacterRange(new_from, new_to));
- }
- new_from = new_to = chr;
- }
- }
- if (new_to == new_from) {
- ranges->Add(CharacterRange::Singleton(new_from));
- } else {
- ranges->Add(CharacterRange(new_from, new_to));
- }
- }
-}
-
-
ZoneList<CharacterRange>* CharacterSet::ranges() {
if (ranges_ == NULL) {
ranges_ = new ZoneList<CharacterRange>(2);
« no previous file with comments | « src/jsregexp.h ('k') | src/lithium.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698