| Index: src/runtime/runtime-regexp.cc
|
| diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
|
| index 824835401dd4fafe776c7539a185d5375a32b855..e5d6e4c71ed8cfac85fc4d823d1cb45c89bf1a80 100644
|
| --- a/src/runtime/runtime-regexp.cc
|
| +++ b/src/runtime/runtime-regexp.cc
|
| @@ -19,14 +19,45 @@
|
| namespace v8 {
|
| namespace internal {
|
|
|
| +namespace {
|
| +
|
| +// Looks up the capture of the given name. Returns the (1-based) numbered
|
| +// capture index or -1 on failure.
|
| +int LookupNamedCapture(std::function<bool(String*)> name_matches,
|
| + FixedArray* capture_name_map) {
|
| + // TODO(jgruber): Sort capture_name_map and do binary search via
|
| + // internalized strings.
|
| +
|
| + int maybe_capture_index = -1;
|
| + const int named_capture_count = capture_name_map->length() >> 1;
|
| + for (int j = 0; j < named_capture_count; j++) {
|
| + // The format of {capture_name_map} is documented at
|
| + // JSRegExp::kIrregexpCaptureNameMapIndex.
|
| + const int name_ix = j * 2;
|
| + const int index_ix = j * 2 + 1;
|
| +
|
| + String* capture_name = String::cast(capture_name_map->get(name_ix));
|
| + if (!name_matches(capture_name)) continue;
|
| +
|
| + maybe_capture_index = Smi::cast(capture_name_map->get(index_ix))->value();
|
| + break;
|
| + }
|
| +
|
| + return maybe_capture_index;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| class CompiledReplacement {
|
| public:
|
| explicit CompiledReplacement(Zone* zone)
|
| : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
|
|
|
| - // Return whether the replacement is simple.
|
| - bool Compile(Handle<String> replacement, int capture_count,
|
| - int subject_length);
|
| + // Return whether the replacement is simple. Can also fail and return Nothing
|
| + // if the given replacement string is invalid (and requires throwing a
|
| + // SyntaxError).
|
| + Maybe<bool> Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
|
| + int capture_count, int subject_length);
|
|
|
| // Use Apply only if Compile returned false.
|
| void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
|
| @@ -44,6 +75,7 @@ class CompiledReplacement {
|
| SUBJECT_CAPTURE,
|
| REPLACEMENT_SUBSTRING,
|
| REPLACEMENT_STRING,
|
| + EMPTY,
|
| NUMBER_OF_PART_TYPES
|
| };
|
|
|
| @@ -68,6 +100,7 @@ class CompiledReplacement {
|
| DCHECK(to > from);
|
| return ReplacementPart(-from, to);
|
| }
|
| + static inline ReplacementPart Empty() { return ReplacementPart(EMPTY, 0); }
|
|
|
| // If tag <= 0 then it is the negation of a start index of a substring of
|
| // the replacement pattern, otherwise it's a value from PartType.
|
| @@ -80,7 +113,8 @@ class CompiledReplacement {
|
| int tag;
|
| // The data value's interpretation depends on the value of tag:
|
| // tag == SUBJECT_PREFIX ||
|
| - // tag == SUBJECT_SUFFIX: data is unused.
|
| + // tag == SUBJECT_SUFFIX ||
|
| + // tag == EMPTY: data is unused.
|
| // tag == SUBJECT_CAPTURE: data is the number of the capture.
|
| // tag == REPLACEMENT_SUBSTRING ||
|
| // tag == REPLACEMENT_STRING: data is index into array of substrings
|
| @@ -93,9 +127,17 @@ class CompiledReplacement {
|
| };
|
|
|
| template <typename Char>
|
| - bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
|
| - Vector<Char> characters, int capture_count,
|
| - int subject_length, Zone* zone) {
|
| + Maybe<bool> ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
|
| + Vector<Char> characters,
|
| + FixedArray* capture_name_map,
|
| + int capture_count, int subject_length,
|
| + Zone* zone) {
|
| + // Equivalent to String::GetSubstitution, except that this method converts
|
| + // the replacement string into an internal representation that avoids
|
| + // repeated parsing when used repeatedly.
|
| + DCHECK_IMPLIES(capture_name_map != nullptr,
|
| + FLAG_harmony_regexp_named_captures);
|
| +
|
| int length = characters.length();
|
| int last = 0;
|
| for (int i = 0; i < length; i++) {
|
| @@ -183,6 +225,60 @@ class CompiledReplacement {
|
| i = next_index;
|
| break;
|
| }
|
| + case '<': {
|
| + if (capture_name_map == nullptr) {
|
| + i = next_index;
|
| + break;
|
| + }
|
| +
|
| + // Scan until the next '>', throwing a SyntaxError exception if one
|
| + // is not found, and let the enclosed substring be groupName.
|
| +
|
| + const int name_start_index = next_index + 1;
|
| + int closing_bracket_index = -1;
|
| + for (int j = name_start_index; j < length; j++) {
|
| + if (characters[j] == '>') {
|
| + closing_bracket_index = j;
|
| + break;
|
| + }
|
| + }
|
| +
|
| + // Throw a SyntaxError for invalid replacement strings.
|
| + if (closing_bracket_index == -1) return Nothing<bool>();
|
| +
|
| + Vector<Char> requested_name =
|
| + characters.SubVector(name_start_index, closing_bracket_index);
|
| +
|
| + // Let capture be ? Get(namedCaptures, groupName).
|
| +
|
| + int capture_index = LookupNamedCapture(
|
| + [=](String* capture_name) {
|
| + return capture_name->IsEqualTo(requested_name);
|
| + },
|
| + capture_name_map);
|
| +
|
| + // If capture is undefined, replace the text through the following
|
| + // '>' with the empty string.
|
| + // Otherwise, replace the text through the following '>' with
|
| + // ? ToString(capture).
|
| +
|
| + DCHECK_IMPLIES(
|
| + capture_index != -1,
|
| + 1 <= capture_index && capture_index <= capture_count);
|
| +
|
| + ReplacementPart replacement =
|
| + (capture_index == -1)
|
| + ? ReplacementPart::Empty()
|
| + : ReplacementPart::SubjectCapture(capture_index);
|
| +
|
| + if (i > last) {
|
| + parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
|
| + }
|
| + parts->Add(replacement, zone);
|
| + last = closing_bracket_index + 1;
|
| + i = closing_bracket_index;
|
| + break;
|
| + }
|
| default:
|
| i = next_index;
|
| break;
|
| @@ -192,12 +288,12 @@ class CompiledReplacement {
|
| if (length > last) {
|
| if (last == 0) {
|
| // Replacement is simple. Do not use Apply to do the replacement.
|
| - return true;
|
| + return Just(true);
|
| } else {
|
| parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
|
| }
|
| }
|
| - return false;
|
| + return Just(false);
|
| }
|
|
|
| ZoneList<ReplacementPart> parts_;
|
| @@ -205,23 +301,37 @@ class CompiledReplacement {
|
| Zone* zone_;
|
| };
|
|
|
| -
|
| -bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
|
| - int subject_length) {
|
| +Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
|
| + Handle<String> replacement,
|
| + int capture_count,
|
| + int subject_length) {
|
| {
|
| DisallowHeapAllocation no_gc;
|
| String::FlatContent content = replacement->GetFlatContent();
|
| DCHECK(content.IsFlat());
|
| - bool simple = false;
|
| +
|
| + FixedArray* capture_name_map = nullptr;
|
| + if (capture_count > 0) {
|
| + DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
|
| + Object* maybe_capture_name_map = regexp->CaptureNameMap();
|
| + if (maybe_capture_name_map->IsFixedArray()) {
|
| + DCHECK(FLAG_harmony_regexp_named_captures);
|
| + capture_name_map = FixedArray::cast(maybe_capture_name_map);
|
| + }
|
| + }
|
| +
|
| + Maybe<bool> simple = Nothing<bool>();
|
| if (content.IsOneByte()) {
|
| simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
|
| - capture_count, subject_length, zone());
|
| + capture_name_map, capture_count,
|
| + subject_length, zone());
|
| } else {
|
| DCHECK(content.IsTwoByte());
|
| simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
|
| - capture_count, subject_length, zone());
|
| + capture_name_map, capture_count,
|
| + subject_length, zone());
|
| }
|
| - if (simple) return true;
|
| + if (simple.IsNothing() || simple.FromJust()) return simple;
|
| }
|
|
|
| Isolate* isolate = replacement->GetIsolate();
|
| @@ -243,7 +353,7 @@ bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
|
| substring_index++;
|
| }
|
| }
|
| - return false;
|
| + return Just(false);
|
| }
|
|
|
|
|
| @@ -276,6 +386,8 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
|
| case REPLACEMENT_STRING:
|
| builder->AddString(replacement_substrings_[part.data]);
|
| break;
|
| + case EMPTY:
|
| + break;
|
| default:
|
| UNREACHABLE();
|
| }
|
| @@ -491,14 +603,27 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
|
| int capture_count = regexp->CaptureCount();
|
| int subject_length = subject->length();
|
|
|
| + JSRegExp::Type typeTag = regexp->TypeTag();
|
| + if (typeTag == JSRegExp::IRREGEXP) {
|
| + // Ensure the RegExp is compiled so we can access the capture-name map.
|
| + RegExpImpl::IrregexpPrepare(regexp, subject);
|
| + }
|
| +
|
| // CompiledReplacement uses zone allocation.
|
| Zone zone(isolate->allocator(), ZONE_NAME);
|
| CompiledReplacement compiled_replacement(&zone);
|
| - bool simple_replace =
|
| - compiled_replacement.Compile(replacement, capture_count, subject_length);
|
| + Maybe<bool> maybe_simple_replace = compiled_replacement.Compile(
|
| + regexp, replacement, capture_count, subject_length);
|
| + if (maybe_simple_replace.IsNothing()) {
|
| + THROW_NEW_ERROR_RETURN_FAILURE(
|
| + isolate, NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
|
| + replacement));
|
| + }
|
| +
|
| + const bool simple_replace = maybe_simple_replace.FromJust();
|
|
|
| // Shortcut for simple non-regexp global replacements
|
| - if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
|
| + if (typeTag == JSRegExp::ATOM && simple_replace) {
|
| if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
|
| return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
|
| isolate, subject, regexp, replacement, last_match_info);
|
| @@ -649,7 +774,7 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
|
| Heap* heap = isolate->heap();
|
|
|
| // The trimming is performed on a newly allocated object, which is on a
|
| - // fresly allocated page or on an already swept page. Hence, the sweeper
|
| + // freshly allocated page or on an already swept page. Hence, the sweeper
|
| // thread can not get confused with the filler creation. No synchronization
|
| // needed.
|
| // TODO(hpayer): We should shrink the large object page if the size
|
| @@ -843,23 +968,28 @@ namespace {
|
|
|
| class MatchInfoBackedMatch : public String::Match {
|
| public:
|
| - MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
|
| + MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
|
| + Handle<String> subject,
|
| Handle<RegExpMatchInfo> match_info)
|
| : isolate_(isolate), match_info_(match_info) {
|
| subject_ = String::Flatten(subject);
|
| +
|
| + if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
|
| + Object* o = regexp->CaptureNameMap();
|
| + has_named_captures_ = o->IsFixedArray();
|
| + if (has_named_captures_) {
|
| + DCHECK(FLAG_harmony_regexp_named_captures);
|
| + capture_name_map_ = handle(FixedArray::cast(o));
|
| + }
|
| + } else {
|
| + has_named_captures_ = false;
|
| + }
|
| }
|
|
|
| Handle<String> GetMatch() override {
|
| return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
|
| }
|
|
|
| - MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
|
| - Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
|
| - isolate_, match_info_, i, capture_exists);
|
| - return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
|
| - : isolate_->factory()->empty_string();
|
| - }
|
| -
|
| Handle<String> GetPrefix() override {
|
| const int match_start = match_info_->Capture(0);
|
| return isolate_->factory()->NewSubString(subject_, 0, match_start);
|
| @@ -871,42 +1001,63 @@ class MatchInfoBackedMatch : public String::Match {
|
| subject_->length());
|
| }
|
|
|
| + bool HasNamedCaptures() override { return has_named_captures_; }
|
| +
|
| int CaptureCount() override {
|
| return match_info_->NumberOfCaptureRegisters() / 2;
|
| }
|
|
|
| - virtual ~MatchInfoBackedMatch() {}
|
| + MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
|
| + Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
|
| + isolate_, match_info_, i, capture_exists);
|
| + return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
|
| + : isolate_->factory()->empty_string();
|
| + }
|
| +
|
| + MaybeHandle<String> GetNamedCapture(Handle<String> name,
|
| + bool* capture_exists) override {
|
| + DCHECK(has_named_captures_);
|
| + const int capture_index = LookupNamedCapture(
|
| + [=](String* capture_name) { return capture_name->Equals(*name); },
|
| + *capture_name_map_);
|
| +
|
| + if (capture_index == -1) {
|
| + *capture_exists = false;
|
| + return name; // Arbitrary string handle.
|
| + }
|
| +
|
| + DCHECK(1 <= capture_index && capture_index <= CaptureCount());
|
| + return GetCapture(capture_index, capture_exists);
|
| + }
|
|
|
| private:
|
| Isolate* isolate_;
|
| Handle<String> subject_;
|
| Handle<RegExpMatchInfo> match_info_;
|
| +
|
| + bool has_named_captures_;
|
| + Handle<FixedArray> capture_name_map_;
|
| };
|
|
|
| class VectorBackedMatch : public String::Match {
|
| public:
|
| VectorBackedMatch(Isolate* isolate, Handle<String> subject,
|
| Handle<String> match, int match_position,
|
| - std::vector<Handle<Object>>* captures)
|
| + std::vector<Handle<Object>>* captures,
|
| + Handle<Object> groups_obj)
|
| : isolate_(isolate),
|
| match_(match),
|
| match_position_(match_position),
|
| captures_(captures) {
|
| subject_ = String::Flatten(subject);
|
| +
|
| + DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
|
| + has_named_captures_ = !groups_obj->IsUndefined(isolate);
|
| + if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
|
| }
|
|
|
| Handle<String> GetMatch() override { return match_; }
|
|
|
| - MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
|
| - Handle<Object> capture_obj = captures_->at(i);
|
| - if (capture_obj->IsUndefined(isolate_)) {
|
| - *capture_exists = false;
|
| - return isolate_->factory()->empty_string();
|
| - }
|
| - *capture_exists = true;
|
| - return Object::ToString(isolate_, capture_obj);
|
| - }
|
| -
|
| Handle<String> GetPrefix() override {
|
| return isolate_->factory()->NewSubString(subject_, 0, match_position_);
|
| }
|
| @@ -917,9 +1068,34 @@ class VectorBackedMatch : public String::Match {
|
| subject_->length());
|
| }
|
|
|
| + bool HasNamedCaptures() override { return has_named_captures_; }
|
| +
|
| int CaptureCount() override { return static_cast<int>(captures_->size()); }
|
|
|
| - virtual ~VectorBackedMatch() {}
|
| + MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
|
| + Handle<Object> capture_obj = captures_->at(i);
|
| + if (capture_obj->IsUndefined(isolate_)) {
|
| + *capture_exists = false;
|
| + return isolate_->factory()->empty_string();
|
| + }
|
| + *capture_exists = true;
|
| + return Object::ToString(isolate_, capture_obj);
|
| + }
|
| +
|
| + MaybeHandle<String> GetNamedCapture(Handle<String> name,
|
| + bool* capture_exists) override {
|
| + DCHECK(has_named_captures_);
|
| + Handle<Object> capture_obj;
|
| + ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
|
| + Object::GetProperty(groups_obj_, name), String);
|
| + if (capture_obj->IsUndefined(isolate_)) {
|
| + *capture_exists = false;
|
| + return name;
|
| + } else {
|
| + *capture_exists = true;
|
| + return Object::ToString(isolate_, capture_obj);
|
| + }
|
| + }
|
|
|
| private:
|
| Isolate* isolate_;
|
| @@ -927,6 +1103,9 @@ class VectorBackedMatch : public String::Match {
|
| Handle<String> match_;
|
| const int match_position_;
|
| std::vector<Handle<Object>>* captures_;
|
| +
|
| + bool has_named_captures_;
|
| + Handle<JSReceiver> groups_obj_;
|
| };
|
|
|
| // Create the groups object (see also the RegExp result creation in
|
| @@ -1072,6 +1251,7 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
|
| elements->set(cursor++, *subject);
|
|
|
| if (has_named_captures) {
|
| + DCHECK(FLAG_harmony_regexp_named_captures);
|
| Handle<FixedArray> capture_map =
|
| Handle<FixedArray>::cast(maybe_capture_map);
|
| Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
|
| @@ -1183,7 +1363,7 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
|
| builder.AppendString(factory->NewSubString(string, 0, start_index));
|
|
|
| if (replace->length() > 0) {
|
| - MatchInfoBackedMatch m(isolate, string, match_indices);
|
| + MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
|
| Handle<String> replacement;
|
| ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
|
| String::GetSubstitution(isolate, &m, replace),
|
| @@ -1316,6 +1496,7 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
|
|
|
| Object* maybe_capture_map = regexp->CaptureNameMap();
|
| if (maybe_capture_map->IsFixedArray()) {
|
| + DCHECK(FLAG_harmony_regexp_named_captures);
|
| has_named_captures = true;
|
| capture_map = handle(FixedArray::cast(maybe_capture_map));
|
| }
|
| @@ -1703,7 +1884,13 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
|
| isolate, replacement, Object::ToString(isolate, replacement_obj));
|
| } else {
|
| DCHECK(!functional_replace);
|
| - VectorBackedMatch m(isolate, string, match, position, &captures);
|
| + if (!groups_obj->IsUndefined(isolate)) {
|
| + // TODO(jgruber): Behavior in this case is not yet specced.
|
| + ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
| + isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
|
| + }
|
| + VectorBackedMatch m(isolate, string, match, position, &captures,
|
| + groups_obj);
|
| ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
| isolate, replacement, String::GetSubstitution(isolate, &m, replace));
|
| }
|
|
|