| Index: src/jsregexp.cc
|
| ===================================================================
|
| --- src/jsregexp.cc (revision 8618)
|
| +++ src/jsregexp.cc (working copy)
|
| @@ -127,7 +127,7 @@
|
| return re;
|
| }
|
| pattern = FlattenGetString(pattern);
|
| - CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
|
| + ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
|
| PostponeInterruptsScope postpone(isolate);
|
| RegExpCompileData parse_result;
|
| FlatStringReader reader(isolate, pattern);
|
| @@ -295,23 +295,62 @@
|
| #else // V8_INTERPRETED_REGEXP (RegExp native code)
|
| if (compiled_code->IsCode()) return true;
|
| #endif
|
| + // We could potentially have marked this as flushable, but have kept
|
| + // a saved version if we did not flush it yet.
|
| + Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
|
| + if (saved_code->IsCode()) {
|
| + // Reinstate the code in the original place.
|
| + re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
|
| + ASSERT(compiled_code->IsSmi());
|
| + return true;
|
| + }
|
| return CompileIrregexp(re, is_ascii);
|
| }
|
|
|
|
|
| +static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
|
| + bool is_ascii,
|
| + Handle<String> error_message,
|
| + Isolate* isolate) {
|
| + Factory* factory = isolate->factory();
|
| + Handle<FixedArray> elements = factory->NewFixedArray(2);
|
| + elements->set(0, re->Pattern());
|
| + elements->set(1, *error_message);
|
| + Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
|
| + Handle<Object> regexp_err =
|
| + factory->NewSyntaxError("malformed_regexp", array);
|
| + isolate->Throw(*regexp_err);
|
| + return false;
|
| +}
|
| +
|
| +
|
| bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
|
| // Compile the RegExp.
|
| Isolate* isolate = re->GetIsolate();
|
| - CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);
|
| + ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
|
| PostponeInterruptsScope postpone(isolate);
|
| + // If we had a compilation error the last time this is saved at the
|
| + // saved code index.
|
| Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
|
| - if (entry->IsJSObject()) {
|
| - // If it's a JSObject, a previous compilation failed and threw this object.
|
| - // Re-throw the object without trying again.
|
| - isolate->Throw(entry);
|
| + // When arriving here entry can only be a smi, either representing an
|
| + // uncompiled regexp, a previous compilation error, or code that has
|
| + // been flushed.
|
| + ASSERT(entry->IsSmi());
|
| + int entry_value = Smi::cast(entry)->value();
|
| + ASSERT(entry_value == JSRegExp::kUninitializedValue ||
|
| + entry_value == JSRegExp::kCompilationErrorValue ||
|
| + (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
|
| +
|
| + if (entry_value == JSRegExp::kCompilationErrorValue) {
|
| + // A previous compilation failed and threw an error which we store in
|
| + // the saved code index (we store the error message, not the actual
|
| + // error). Recreate the error object and throw it.
|
| + Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
|
| + ASSERT(error_string->IsString());
|
| + Handle<String> error_message(String::cast(error_string));
|
| + CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
|
| return false;
|
| }
|
| - ASSERT(entry->IsTheHole());
|
|
|
| JSRegExp::Flags flags = re->GetFlags();
|
|
|
| @@ -340,17 +379,9 @@
|
| is_ascii);
|
| if (result.error_message != NULL) {
|
| // Unable to compile regexp.
|
| - Factory* factory = isolate->factory();
|
| - Handle<FixedArray> elements = factory->NewFixedArray(2);
|
| - elements->set(0, *pattern);
|
| Handle<String> error_message =
|
| - factory->NewStringFromUtf8(CStrVector(result.error_message));
|
| - elements->set(1, *error_message);
|
| - Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
|
| - Handle<Object> regexp_err =
|
| - factory->NewSyntaxError("malformed_regexp", array);
|
| - isolate->Throw(*regexp_err);
|
| - re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
|
| + isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
|
| + CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
|
| return false;
|
| }
|
|
|
| @@ -460,6 +491,7 @@
|
| ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
|
| do {
|
| bool is_ascii = subject->IsAsciiRepresentation();
|
| + EnsureCompiledIrregexp(regexp, is_ascii);
|
| Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
|
| NativeRegExpMacroAssembler::Result res =
|
| NativeRegExpMacroAssembler::Match(code,
|
| @@ -810,7 +842,13 @@
|
| inline bool ignore_case() { return ignore_case_; }
|
| inline bool ascii() { return ascii_; }
|
|
|
| + int current_expansion_factor() { return current_expansion_factor_; }
|
| + void set_current_expansion_factor(int value) {
|
| + current_expansion_factor_ = value;
|
| + }
|
| +
|
| static const int kNoRegister = -1;
|
| +
|
| private:
|
| EndNode* accept_;
|
| int next_register_;
|
| @@ -820,6 +858,7 @@
|
| bool ignore_case_;
|
| bool ascii_;
|
| bool reg_exp_too_big_;
|
| + int current_expansion_factor_;
|
| };
|
|
|
|
|
| @@ -847,7 +886,8 @@
|
| recursion_depth_(0),
|
| ignore_case_(ignore_case),
|
| ascii_(ascii),
|
| - reg_exp_too_big_(false) {
|
| + reg_exp_too_big_(false),
|
| + current_expansion_factor_(1) {
|
| accept_ = new EndNode(EndNode::ACCEPT);
|
| ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
|
| }
|
| @@ -2767,6 +2807,7 @@
|
| AlternativeGeneration* at(int i) {
|
| return alt_gens_[i];
|
| }
|
| +
|
| private:
|
| static const int kAFew = 10;
|
| ZoneList<AlternativeGeneration*> alt_gens_;
|
| @@ -3326,6 +3367,7 @@
|
| }
|
| stream()->Add("}}");
|
| }
|
| +
|
| private:
|
| bool first_;
|
| StringStream* stream() { return stream_; }
|
| @@ -3727,6 +3769,44 @@
|
| }
|
|
|
|
|
| +// Scoped object to keep track of how much we unroll quantifier loops in the
|
| +// regexp graph generator.
|
| +class RegExpExpansionLimiter {
|
| + public:
|
| + static const int kMaxExpansionFactor = 6;
|
| + RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
|
| + : compiler_(compiler),
|
| + saved_expansion_factor_(compiler->current_expansion_factor()),
|
| + ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
|
| + ASSERT(factor > 0);
|
| + if (ok_to_expand_) {
|
| + if (factor > kMaxExpansionFactor) {
|
| + // Avoid integer overflow of the current expansion factor.
|
| + ok_to_expand_ = false;
|
| + compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
|
| + } else {
|
| + int new_factor = saved_expansion_factor_ * factor;
|
| + ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
|
| + compiler->set_current_expansion_factor(new_factor);
|
| + }
|
| + }
|
| + }
|
| +
|
| + ~RegExpExpansionLimiter() {
|
| + compiler_->set_current_expansion_factor(saved_expansion_factor_);
|
| + }
|
| +
|
| + bool ok_to_expand() { return ok_to_expand_; }
|
| +
|
| + private:
|
| + RegExpCompiler* compiler_;
|
| + int saved_expansion_factor_;
|
| + bool ok_to_expand_;
|
| +
|
| + DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
|
| +};
|
| +
|
| +
|
| RegExpNode* RegExpQuantifier::ToNode(int min,
|
| int max,
|
| bool is_greedy,
|
| @@ -3766,38 +3846,46 @@
|
| } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
|
| // Only unroll if there are no captures and the body can't be
|
| // empty.
|
| - if (min > 0 && min <= kMaxUnrolledMinMatches) {
|
| - int new_max = (max == kInfinity) ? max : max - min;
|
| - // Recurse once to get the loop or optional matches after the fixed ones.
|
| - RegExpNode* answer = ToNode(
|
| - 0, new_max, is_greedy, body, compiler, on_success, true);
|
| - // Unroll the forced matches from 0 to min. This can cause chains of
|
| - // TextNodes (which the parser does not generate). These should be
|
| - // combined if it turns out they hinder good code generation.
|
| - for (int i = 0; i < min; i++) {
|
| - answer = body->ToNode(compiler, answer);
|
| + {
|
| + RegExpExpansionLimiter limiter(
|
| + compiler, min + ((max != min) ? 1 : 0));
|
| + if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
|
| + int new_max = (max == kInfinity) ? max : max - min;
|
| + // Recurse once to get the loop or optional matches after the fixed
|
| + // ones.
|
| + RegExpNode* answer = ToNode(
|
| + 0, new_max, is_greedy, body, compiler, on_success, true);
|
| + // Unroll the forced matches from 0 to min. This can cause chains of
|
| + // TextNodes (which the parser does not generate). These should be
|
| + // combined if it turns out they hinder good code generation.
|
| + for (int i = 0; i < min; i++) {
|
| + answer = body->ToNode(compiler, answer);
|
| + }
|
| + return answer;
|
| }
|
| - return answer;
|
| }
|
| - if (max <= kMaxUnrolledMaxMatches) {
|
| - ASSERT(min == 0);
|
| - // Unroll the optional matches up to max.
|
| - RegExpNode* answer = on_success;
|
| - for (int i = 0; i < max; i++) {
|
| - ChoiceNode* alternation = new ChoiceNode(2);
|
| - if (is_greedy) {
|
| - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
|
| - answer)));
|
| - alternation->AddAlternative(GuardedAlternative(on_success));
|
| - } else {
|
| - alternation->AddAlternative(GuardedAlternative(on_success));
|
| - alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
|
| - answer)));
|
| + if (max <= kMaxUnrolledMaxMatches && min == 0) {
|
| + ASSERT(max > 0); // Due to the 'if' above.
|
| + RegExpExpansionLimiter limiter(compiler, max);
|
| + if (limiter.ok_to_expand()) {
|
| + // Unroll the optional matches up to max.
|
| + RegExpNode* answer = on_success;
|
| + for (int i = 0; i < max; i++) {
|
| + ChoiceNode* alternation = new ChoiceNode(2);
|
| + if (is_greedy) {
|
| + alternation->AddAlternative(
|
| + GuardedAlternative(body->ToNode(compiler, answer)));
|
| + alternation->AddAlternative(GuardedAlternative(on_success));
|
| + } else {
|
| + alternation->AddAlternative(GuardedAlternative(on_success));
|
| + alternation->AddAlternative(
|
| + GuardedAlternative(body->ToNode(compiler, answer)));
|
| + }
|
| + answer = alternation;
|
| + if (not_at_start) alternation->set_not_at_start();
|
| }
|
| - answer = alternation;
|
| - if (not_at_start) alternation->set_not_at_start();
|
| + return answer;
|
| }
|
| - return answer;
|
| }
|
| }
|
| bool has_min = min > 0;
|
| @@ -4123,12 +4211,6 @@
|
| }
|
|
|
|
|
| -static void AddUncanonicals(Isolate* isolate,
|
| - ZoneList<CharacterRange>* ranges,
|
| - int bottom,
|
| - int top);
|
| -
|
| -
|
| void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
| bool is_ascii) {
|
| Isolate* isolate = Isolate::Current();
|
| @@ -4289,101 +4371,6 @@
|
| }
|
|
|
|
|
| -static void AddUncanonicals(Isolate* isolate,
|
| - ZoneList<CharacterRange>* ranges,
|
| - int bottom,
|
| - int top) {
|
| - unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - // Zones with no case mappings. There is a DEBUG-mode loop to assert that
|
| - // this table is correct.
|
| - // 0x0600 - 0x0fff
|
| - // 0x1100 - 0x1cff
|
| - // 0x2000 - 0x20ff
|
| - // 0x2200 - 0x23ff
|
| - // 0x2500 - 0x2bff
|
| - // 0x2e00 - 0xa5ff
|
| - // 0xa800 - 0xfaff
|
| - // 0xfc00 - 0xfeff
|
| - const int boundary_count = 18;
|
| - int boundaries[] = {
|
| - 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
|
| - 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
|
| -
|
| - // Special ASCII rule from spec can save us some work here.
|
| - if (bottom == 0x80 && top == 0xffff) return;
|
| -
|
| - if (top <= boundaries[0]) {
|
| - CharacterRange range(bottom, top);
|
| - range.AddCaseEquivalents(ranges, false);
|
| - return;
|
| - }
|
| -
|
| - // Split up very large ranges. This helps remove ranges where there are no
|
| - // case mappings.
|
| - for (int i = 0; i < boundary_count; i++) {
|
| - if (bottom < boundaries[i] && top >= boundaries[i]) {
|
| - AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
|
| - AddUncanonicals(isolate, ranges, boundaries[i], top);
|
| - return;
|
| - }
|
| - }
|
| -
|
| - // If we are completely in a zone with no case mappings then we are done.
|
| - for (int i = 0; i < boundary_count; i += 2) {
|
| - if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
|
| -#ifdef DEBUG
|
| - for (int j = bottom; j <= top; j++) {
|
| - unsigned current_char = j;
|
| - int length = isolate->jsregexp_uncanonicalize()->get(current_char,
|
| - '\0', chars);
|
| - for (int k = 0; k < length; k++) {
|
| - ASSERT(chars[k] == current_char);
|
| - }
|
| - }
|
| -#endif
|
| - return;
|
| - }
|
| - }
|
| -
|
| - // Step through the range finding equivalent characters.
|
| - ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
|
| - for (int i = bottom; i <= top; i++) {
|
| - int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
|
| - for (int j = 0; j < length; j++) {
|
| - uc32 chr = chars[j];
|
| - if (chr != i && (chr < bottom || chr > top)) {
|
| - characters->Add(chr);
|
| - }
|
| - }
|
| - }
|
| -
|
| - // Step through the equivalent characters finding simple ranges and
|
| - // adding ranges to the character class.
|
| - if (characters->length() > 0) {
|
| - int new_from = characters->at(0);
|
| - int new_to = new_from;
|
| - for (int i = 1; i < characters->length(); i++) {
|
| - int chr = characters->at(i);
|
| - if (chr == new_to + 1) {
|
| - new_to++;
|
| - } else {
|
| - if (new_to == new_from) {
|
| - ranges->Add(CharacterRange::Singleton(new_from));
|
| - } else {
|
| - ranges->Add(CharacterRange(new_from, new_to));
|
| - }
|
| - new_from = new_to = chr;
|
| - }
|
| - }
|
| - if (new_to == new_from) {
|
| - ranges->Add(CharacterRange::Singleton(new_from));
|
| - } else {
|
| - ranges->Add(CharacterRange(new_from, new_to));
|
| - }
|
| - }
|
| -}
|
| -
|
| -
|
| ZoneList<CharacterRange>* CharacterSet::ranges() {
|
| if (ranges_ == NULL) {
|
| ranges_ = new ZoneList<CharacterRange>(2);
|
|
|