src/jsregexp.cc - Issue 7348008: Merge up to 8597 to experimental/gc from the bleeding edge.

Unified Diff: src/jsregexp.cc

Issue 7348008: Merge up to 8597 to experimental/gc from the bleeding edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/

Patch Set: '' Created 9 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

===================================================================

--- src/jsregexp.cc (revision 8618)

+++ src/jsregexp.cc (working copy)

@@ -127,7 +127,7 @@

return re;

}

pattern = FlattenGetString(pattern);

- CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);

+ ZoneScope zone_scope(isolate, DELETE_ON_EXIT);

PostponeInterruptsScope postpone(isolate);

RegExpCompileData parse_result;

FlatStringReader reader(isolate, pattern);

@@ -295,23 +295,62 @@

#else // V8_INTERPRETED_REGEXP (RegExp native code)

if (compiled_code->IsCode()) return true;

#endif

+ // We could potentially have marked this as flushable, but have kept

+ // a saved version if we did not flush it yet.

+ Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));

+ if (saved_code->IsCode()) {

+ // Reinstate the code in the original place.

+ re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);

+ ASSERT(compiled_code->IsSmi());

+ return true;

+ }

return CompileIrregexp(re, is_ascii);

}

+static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,

+ bool is_ascii,

+ Handle<String> error_message,

+ Isolate* isolate) {

+ Factory* factory = isolate->factory();

+ Handle<FixedArray> elements = factory->NewFixedArray(2);

+ elements->set(0, re->Pattern());

+ elements->set(1, *error_message);

+ Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

+ Handle<Object> regexp_err =

+ factory->NewSyntaxError("malformed_regexp", array);

+ isolate->Throw(*regexp_err);

+ return false;

bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {

// Compile the RegExp.

Isolate* isolate = re->GetIsolate();

- CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT);

+ ZoneScope zone_scope(isolate, DELETE_ON_EXIT);

PostponeInterruptsScope postpone(isolate);

+ // If we had a compilation error the last time this is saved at the

+ // saved code index.

Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));

- if (entry->IsJSObject()) {

- // If it's a JSObject, a previous compilation failed and threw this object.

- // Re-throw the object without trying again.

- isolate->Throw(entry);

+ // When arriving here entry can only be a smi, either representing an

+ // uncompiled regexp, a previous compilation error, or code that has

+ // been flushed.

+ ASSERT(entry->IsSmi());

+ int entry_value = Smi::cast(entry)->value();

+ ASSERT(entry_value == JSRegExp::kUninitializedValue ||

+ entry_value == JSRegExp::kCompilationErrorValue ||

+ (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));

+ if (entry_value == JSRegExp::kCompilationErrorValue) {

+ // A previous compilation failed and threw an error which we store in

+ // the saved code index (we store the error message, not the actual

+ // error). Recreate the error object and throw it.

+ Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));

+ ASSERT(error_string->IsString());

+ Handle<String> error_message(String::cast(error_string));

+ CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

return false;

}

- ASSERT(entry->IsTheHole());

JSRegExp::Flags flags = re->GetFlags();

@@ -340,17 +379,9 @@

is_ascii);

if (result.error_message != NULL) {

// Unable to compile regexp.

- Factory* factory = isolate->factory();

- Handle<FixedArray> elements = factory->NewFixedArray(2);

- elements->set(0, *pattern);

Handle<String> error_message =

- factory->NewStringFromUtf8(CStrVector(result.error_message));

- elements->set(1, *error_message);

- Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

- Handle<Object> regexp_err =

- factory->NewSyntaxError("malformed_regexp", array);

- isolate->Throw(*regexp_err);

- re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);

+ isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));

+ CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

return false;

}

@@ -460,6 +491,7 @@

ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);

do {

bool is_ascii = subject->IsAsciiRepresentation();

+ EnsureCompiledIrregexp(regexp, is_ascii);

Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);

NativeRegExpMacroAssembler::Result res =

NativeRegExpMacroAssembler::Match(code,

@@ -810,7 +842,13 @@

inline bool ignore_case() { return ignore_case_; }

inline bool ascii() { return ascii_; }

+ int current_expansion_factor() { return current_expansion_factor_; }

+ void set_current_expansion_factor(int value) {

+ current_expansion_factor_ = value;

+ }

static const int kNoRegister = -1;

private:

EndNode* accept_;

int next_register_;

@@ -820,6 +858,7 @@

bool ignore_case_;

bool ascii_;

bool reg_exp_too_big_;

+ int current_expansion_factor_;

};

@@ -847,7 +886,8 @@

recursion_depth_(0),

ignore_case_(ignore_case),

ascii_(ascii),

- reg_exp_too_big_(false) {

+ reg_exp_too_big_(false),

+ current_expansion_factor_(1) {

accept_ = new EndNode(EndNode::ACCEPT);

ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);

}

@@ -2767,6 +2807,7 @@

AlternativeGeneration* at(int i) {

return alt_gens_[i];

}

private:

static const int kAFew = 10;

ZoneList<AlternativeGeneration*> alt_gens_;

@@ -3326,6 +3367,7 @@

}

stream()->Add("}}");

}

private:

bool first_;

StringStream* stream() { return stream_; }

@@ -3727,6 +3769,44 @@

}

+// Scoped object to keep track of how much we unroll quantifier loops in the

+// regexp graph generator.

+class RegExpExpansionLimiter {

+ public:

+ static const int kMaxExpansionFactor = 6;

+ RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)

+ : compiler_(compiler),

+ saved_expansion_factor_(compiler->current_expansion_factor()),

+ ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {

+ ASSERT(factor > 0);

+ if (ok_to_expand_) {

+ if (factor > kMaxExpansionFactor) {

+ // Avoid integer overflow of the current expansion factor.

+ ok_to_expand_ = false;

+ compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);

+ } else {

+ int new_factor = saved_expansion_factor_ * factor;

+ ok_to_expand_ = (new_factor <= kMaxExpansionFactor);

+ compiler->set_current_expansion_factor(new_factor);

+ }

+ ~RegExpExpansionLimiter() {

+ compiler_->set_current_expansion_factor(saved_expansion_factor_);

+ }

+ bool ok_to_expand() { return ok_to_expand_; }

+ private:

+ RegExpCompiler* compiler_;

+ int saved_expansion_factor_;

+ bool ok_to_expand_;

+ DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);

+};

RegExpNode* RegExpQuantifier::ToNode(int min,

int max,

bool is_greedy,

@@ -3766,38 +3846,46 @@

} else if (FLAG_regexp_optimization && !needs_capture_clearing) {

// Only unroll if there are no captures and the body can't be

// empty.

- if (min > 0 && min <= kMaxUnrolledMinMatches) {

- int new_max = (max == kInfinity) ? max : max - min;

- // Recurse once to get the loop or optional matches after the fixed ones.

- RegExpNode* answer = ToNode(

- 0, new_max, is_greedy, body, compiler, on_success, true);

- // Unroll the forced matches from 0 to min. This can cause chains of

- // TextNodes (which the parser does not generate). These should be

- // combined if it turns out they hinder good code generation.

- for (int i = 0; i < min; i++) {

- answer = body->ToNode(compiler, answer);

+ {

+ RegExpExpansionLimiter limiter(

+ compiler, min + ((max != min) ? 1 : 0));

+ if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {

+ int new_max = (max == kInfinity) ? max : max - min;

+ // Recurse once to get the loop or optional matches after the fixed

+ // ones.

+ RegExpNode* answer = ToNode(

+ 0, new_max, is_greedy, body, compiler, on_success, true);

+ // Unroll the forced matches from 0 to min. This can cause chains of

+ // TextNodes (which the parser does not generate). These should be

+ // combined if it turns out they hinder good code generation.

+ for (int i = 0; i < min; i++) {

+ answer = body->ToNode(compiler, answer);

+ }

+ return answer;

}

- return answer;

}

- if (max <= kMaxUnrolledMaxMatches) {

- ASSERT(min == 0);

- // Unroll the optional matches up to max.

- RegExpNode* answer = on_success;

- for (int i = 0; i < max; i++) {

- ChoiceNode* alternation = new ChoiceNode(2);

- if (is_greedy) {

- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,

- answer)));

- alternation->AddAlternative(GuardedAlternative(on_success));

- } else {

- alternation->AddAlternative(GuardedAlternative(on_success));

- alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,

- answer)));

+ if (max <= kMaxUnrolledMaxMatches && min == 0) {

+ ASSERT(max > 0); // Due to the 'if' above.

+ RegExpExpansionLimiter limiter(compiler, max);

+ if (limiter.ok_to_expand()) {

+ // Unroll the optional matches up to max.

+ RegExpNode* answer = on_success;

+ for (int i = 0; i < max; i++) {

+ ChoiceNode* alternation = new ChoiceNode(2);

+ if (is_greedy) {

+ alternation->AddAlternative(

+ GuardedAlternative(body->ToNode(compiler, answer)));

+ alternation->AddAlternative(GuardedAlternative(on_success));

+ } else {

+ alternation->AddAlternative(GuardedAlternative(on_success));

+ alternation->AddAlternative(

+ GuardedAlternative(body->ToNode(compiler, answer)));

+ }

+ answer = alternation;

+ if (not_at_start) alternation->set_not_at_start();

}

- answer = alternation;

- if (not_at_start) alternation->set_not_at_start();

+ return answer;

}

- return answer;

}

bool has_min = min > 0;

@@ -4123,12 +4211,6 @@

}

-static void AddUncanonicals(Isolate* isolate,

- ZoneList<CharacterRange>* ranges,

- int bottom,

- int top);

void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

bool is_ascii) {

Isolate* isolate = Isolate::Current();

@@ -4289,101 +4371,6 @@

}

-static void AddUncanonicals(Isolate* isolate,

- ZoneList<CharacterRange>* ranges,

- int bottom,

- int top) {

- unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

- // Zones with no case mappings. There is a DEBUG-mode loop to assert that

- // this table is correct.

- // 0x0600 - 0x0fff

- // 0x1100 - 0x1cff

- // 0x2000 - 0x20ff

- // 0x2200 - 0x23ff

- // 0x2500 - 0x2bff

- // 0x2e00 - 0xa5ff

- // 0xa800 - 0xfaff

- // 0xfc00 - 0xfeff

- const int boundary_count = 18;

- int boundaries[] = {

- 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,

- 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};

- // Special ASCII rule from spec can save us some work here.

- if (bottom == 0x80 && top == 0xffff) return;

- if (top <= boundaries[0]) {

- CharacterRange range(bottom, top);

- range.AddCaseEquivalents(ranges, false);

- return;

- }

- // Split up very large ranges. This helps remove ranges where there are no

- // case mappings.

- for (int i = 0; i < boundary_count; i++) {

- if (bottom < boundaries[i] && top >= boundaries[i]) {

- AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);

- AddUncanonicals(isolate, ranges, boundaries[i], top);

- return;

- }

- // If we are completely in a zone with no case mappings then we are done.

- for (int i = 0; i < boundary_count; i += 2) {

- if (bottom >= boundaries[i] && top < boundaries[i + 1]) {

-#ifdef DEBUG

- for (int j = bottom; j <= top; j++) {

- unsigned current_char = j;

- int length = isolate->jsregexp_uncanonicalize()->get(current_char,

- '\0', chars);

- for (int k = 0; k < length; k++) {

- ASSERT(chars[k] == current_char);

- }

-#endif

- return;

- }

- // Step through the range finding equivalent characters.

- ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);

- for (int i = bottom; i <= top; i++) {

- int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);

- for (int j = 0; j < length; j++) {

- uc32 chr = chars[j];

- if (chr != i && (chr < bottom || chr > top)) {

- characters->Add(chr);

- }

- // Step through the equivalent characters finding simple ranges and

- // adding ranges to the character class.

- if (characters->length() > 0) {

- int new_from = characters->at(0);

- int new_to = new_from;

- for (int i = 1; i < characters->length(); i++) {

- int chr = characters->at(i);

- if (chr == new_to + 1) {

- new_to++;

- } else {

- if (new_to == new_from) {

- ranges->Add(CharacterRange::Singleton(new_from));

- } else {

- ranges->Add(CharacterRange(new_from, new_to));

- }

- new_from = new_to = chr;

- }

- if (new_to == new_from) {

- ranges->Add(CharacterRange::Singleton(new_from));

- } else {

- ranges->Add(CharacterRange(new_from, new_to));

- }

ZoneList<CharacterRange>* CharacterSet::ranges() {

if (ranges_ == NULL) {

ranges_ = new ZoneList<CharacterRange>(2);

« no previous file with comments | « src/jsregexp.h ('k') | src/lithium.h » ('j') | no next file with comments »