Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1219)

Unified Diff: src/runtime/runtime-regexp.cc

Issue 2775303002: [regexp] Named capture support for string replacements (Closed)
Patch Set: Only cast if not undefined Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/objects.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime/runtime-regexp.cc
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
index 824835401dd4fafe776c7539a185d5375a32b855..e5d6e4c71ed8cfac85fc4d823d1cb45c89bf1a80 100644
--- a/src/runtime/runtime-regexp.cc
+++ b/src/runtime/runtime-regexp.cc
@@ -19,14 +19,45 @@
namespace v8 {
namespace internal {
+namespace {
+
+// Looks up the capture of the given name. Returns the (1-based) numbered
+// capture index or -1 on failure.
+int LookupNamedCapture(std::function<bool(String*)> name_matches,
+ FixedArray* capture_name_map) {
+ // TODO(jgruber): Sort capture_name_map and do binary search via
+ // internalized strings.
+
+ int maybe_capture_index = -1;
+ const int named_capture_count = capture_name_map->length() >> 1;
+ for (int j = 0; j < named_capture_count; j++) {
+ // The format of {capture_name_map} is documented at
+ // JSRegExp::kIrregexpCaptureNameMapIndex.
+ const int name_ix = j * 2;
+ const int index_ix = j * 2 + 1;
+
+ String* capture_name = String::cast(capture_name_map->get(name_ix));
+ if (!name_matches(capture_name)) continue;
+
+ maybe_capture_index = Smi::cast(capture_name_map->get(index_ix))->value();
+ break;
+ }
+
+ return maybe_capture_index;
+}
+
+} // namespace
+
class CompiledReplacement {
public:
explicit CompiledReplacement(Zone* zone)
: parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
- // Return whether the replacement is simple.
- bool Compile(Handle<String> replacement, int capture_count,
- int subject_length);
+ // Return whether the replacement is simple. Can also fail and return Nothing
+ // if the given replacement string is invalid (and requires throwing a
+ // SyntaxError).
+ Maybe<bool> Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
+ int capture_count, int subject_length);
// Use Apply only if Compile returned false.
void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
@@ -44,6 +75,7 @@ class CompiledReplacement {
SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING,
+ EMPTY,
NUMBER_OF_PART_TYPES
};
@@ -68,6 +100,7 @@ class CompiledReplacement {
DCHECK(to > from);
return ReplacementPart(-from, to);
}
+ static inline ReplacementPart Empty() { return ReplacementPart(EMPTY, 0); }
// If tag <= 0 then it is the negation of a start index of a substring of
// the replacement pattern, otherwise it's a value from PartType.
@@ -80,7 +113,8 @@ class CompiledReplacement {
int tag;
// The data value's interpretation depends on the value of tag:
// tag == SUBJECT_PREFIX ||
- // tag == SUBJECT_SUFFIX: data is unused.
+ // tag == SUBJECT_SUFFIX ||
+ // tag == EMPTY: data is unused.
// tag == SUBJECT_CAPTURE: data is the number of the capture.
// tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings
@@ -93,9 +127,17 @@ class CompiledReplacement {
};
template <typename Char>
- bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
- Vector<Char> characters, int capture_count,
- int subject_length, Zone* zone) {
+ Maybe<bool> ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
+ Vector<Char> characters,
+ FixedArray* capture_name_map,
+ int capture_count, int subject_length,
+ Zone* zone) {
+ // Equivalent to String::GetSubstitution, except that this method converts
+ // the replacement string into an internal representation that avoids
+ // repeated parsing when used repeatedly.
+ DCHECK_IMPLIES(capture_name_map != nullptr,
+ FLAG_harmony_regexp_named_captures);
+
int length = characters.length();
int last = 0;
for (int i = 0; i < length; i++) {
@@ -183,6 +225,60 @@ class CompiledReplacement {
i = next_index;
break;
}
+ case '<': {
+ if (capture_name_map == nullptr) {
+ i = next_index;
+ break;
+ }
+
+ // Scan until the next '>', throwing a SyntaxError exception if one
+ // is not found, and let the enclosed substring be groupName.
+
+ const int name_start_index = next_index + 1;
+ int closing_bracket_index = -1;
+ for (int j = name_start_index; j < length; j++) {
+ if (characters[j] == '>') {
+ closing_bracket_index = j;
+ break;
+ }
+ }
+
+ // Throw a SyntaxError for invalid replacement strings.
+ if (closing_bracket_index == -1) return Nothing<bool>();
+
+ Vector<Char> requested_name =
+ characters.SubVector(name_start_index, closing_bracket_index);
+
+ // Let capture be ? Get(namedCaptures, groupName).
+
+ int capture_index = LookupNamedCapture(
+ [=](String* capture_name) {
+ return capture_name->IsEqualTo(requested_name);
+ },
+ capture_name_map);
+
+ // If capture is undefined, replace the text through the following
+ // '>' with the empty string.
+ // Otherwise, replace the text through the following '>' with
+ // ? ToString(capture).
+
+ DCHECK_IMPLIES(
+ capture_index != -1,
+ 1 <= capture_index && capture_index <= capture_count);
+
+ ReplacementPart replacement =
+ (capture_index == -1)
+ ? ReplacementPart::Empty()
+ : ReplacementPart::SubjectCapture(capture_index);
+
+ if (i > last) {
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
+ }
+ parts->Add(replacement, zone);
+ last = closing_bracket_index + 1;
+ i = closing_bracket_index;
+ break;
+ }
default:
i = next_index;
break;
@@ -192,12 +288,12 @@ class CompiledReplacement {
if (length > last) {
if (last == 0) {
// Replacement is simple. Do not use Apply to do the replacement.
- return true;
+ return Just(true);
} else {
parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
}
}
- return false;
+ return Just(false);
}
ZoneList<ReplacementPart> parts_;
@@ -205,23 +301,37 @@ class CompiledReplacement {
Zone* zone_;
};
-
-bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
- int subject_length) {
+Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
+ Handle<String> replacement,
+ int capture_count,
+ int subject_length) {
{
DisallowHeapAllocation no_gc;
String::FlatContent content = replacement->GetFlatContent();
DCHECK(content.IsFlat());
- bool simple = false;
+
+ FixedArray* capture_name_map = nullptr;
+ if (capture_count > 0) {
+ DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+ Object* maybe_capture_name_map = regexp->CaptureNameMap();
+ if (maybe_capture_name_map->IsFixedArray()) {
+ DCHECK(FLAG_harmony_regexp_named_captures);
+ capture_name_map = FixedArray::cast(maybe_capture_name_map);
+ }
+ }
+
+ Maybe<bool> simple = Nothing<bool>();
if (content.IsOneByte()) {
simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
- capture_count, subject_length, zone());
+ capture_name_map, capture_count,
+ subject_length, zone());
} else {
DCHECK(content.IsTwoByte());
simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
- capture_count, subject_length, zone());
+ capture_name_map, capture_count,
+ subject_length, zone());
}
- if (simple) return true;
+ if (simple.IsNothing() || simple.FromJust()) return simple;
}
Isolate* isolate = replacement->GetIsolate();
@@ -243,7 +353,7 @@ bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
substring_index++;
}
}
- return false;
+ return Just(false);
}
@@ -276,6 +386,8 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]);
break;
+ case EMPTY:
+ break;
default:
UNREACHABLE();
}
@@ -491,14 +603,27 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
+ JSRegExp::Type typeTag = regexp->TypeTag();
+ if (typeTag == JSRegExp::IRREGEXP) {
+ // Ensure the RegExp is compiled so we can access the capture-name map.
+ RegExpImpl::IrregexpPrepare(regexp, subject);
+ }
+
// CompiledReplacement uses zone allocation.
Zone zone(isolate->allocator(), ZONE_NAME);
CompiledReplacement compiled_replacement(&zone);
- bool simple_replace =
- compiled_replacement.Compile(replacement, capture_count, subject_length);
+ Maybe<bool> maybe_simple_replace = compiled_replacement.Compile(
+ regexp, replacement, capture_count, subject_length);
+ if (maybe_simple_replace.IsNothing()) {
+ THROW_NEW_ERROR_RETURN_FAILURE(
+ isolate, NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
+ replacement));
+ }
+
+ const bool simple_replace = maybe_simple_replace.FromJust();
// Shortcut for simple non-regexp global replacements
- if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
+ if (typeTag == JSRegExp::ATOM && simple_replace) {
if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
isolate, subject, regexp, replacement, last_match_info);
@@ -649,7 +774,7 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
Heap* heap = isolate->heap();
// The trimming is performed on a newly allocated object, which is on a
- // fresly allocated page or on an already swept page. Hence, the sweeper
+ // freshly allocated page or on an already swept page. Hence, the sweeper
// thread can not get confused with the filler creation. No synchronization
// needed.
// TODO(hpayer): We should shrink the large object page if the size
@@ -843,23 +968,28 @@ namespace {
class MatchInfoBackedMatch : public String::Match {
public:
- MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
+ MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject,
Handle<RegExpMatchInfo> match_info)
: isolate_(isolate), match_info_(match_info) {
subject_ = String::Flatten(subject);
+
+ if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
+ Object* o = regexp->CaptureNameMap();
+ has_named_captures_ = o->IsFixedArray();
+ if (has_named_captures_) {
+ DCHECK(FLAG_harmony_regexp_named_captures);
+ capture_name_map_ = handle(FixedArray::cast(o));
+ }
+ } else {
+ has_named_captures_ = false;
+ }
}
Handle<String> GetMatch() override {
return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
}
- MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
- Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
- isolate_, match_info_, i, capture_exists);
- return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
- : isolate_->factory()->empty_string();
- }
-
Handle<String> GetPrefix() override {
const int match_start = match_info_->Capture(0);
return isolate_->factory()->NewSubString(subject_, 0, match_start);
@@ -871,42 +1001,63 @@ class MatchInfoBackedMatch : public String::Match {
subject_->length());
}
+ bool HasNamedCaptures() override { return has_named_captures_; }
+
int CaptureCount() override {
return match_info_->NumberOfCaptureRegisters() / 2;
}
- virtual ~MatchInfoBackedMatch() {}
+ MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
+ Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
+ isolate_, match_info_, i, capture_exists);
+ return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
+ : isolate_->factory()->empty_string();
+ }
+
+ MaybeHandle<String> GetNamedCapture(Handle<String> name,
+ bool* capture_exists) override {
+ DCHECK(has_named_captures_);
+ const int capture_index = LookupNamedCapture(
+ [=](String* capture_name) { return capture_name->Equals(*name); },
+ *capture_name_map_);
+
+ if (capture_index == -1) {
+ *capture_exists = false;
+ return name; // Arbitrary string handle.
+ }
+
+ DCHECK(1 <= capture_index && capture_index <= CaptureCount());
+ return GetCapture(capture_index, capture_exists);
+ }
private:
Isolate* isolate_;
Handle<String> subject_;
Handle<RegExpMatchInfo> match_info_;
+
+ bool has_named_captures_;
+ Handle<FixedArray> capture_name_map_;
};
class VectorBackedMatch : public String::Match {
public:
VectorBackedMatch(Isolate* isolate, Handle<String> subject,
Handle<String> match, int match_position,
- std::vector<Handle<Object>>* captures)
+ std::vector<Handle<Object>>* captures,
+ Handle<Object> groups_obj)
: isolate_(isolate),
match_(match),
match_position_(match_position),
captures_(captures) {
subject_ = String::Flatten(subject);
+
+ DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
+ has_named_captures_ = !groups_obj->IsUndefined(isolate);
+ if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
}
Handle<String> GetMatch() override { return match_; }
- MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
- Handle<Object> capture_obj = captures_->at(i);
- if (capture_obj->IsUndefined(isolate_)) {
- *capture_exists = false;
- return isolate_->factory()->empty_string();
- }
- *capture_exists = true;
- return Object::ToString(isolate_, capture_obj);
- }
-
Handle<String> GetPrefix() override {
return isolate_->factory()->NewSubString(subject_, 0, match_position_);
}
@@ -917,9 +1068,34 @@ class VectorBackedMatch : public String::Match {
subject_->length());
}
+ bool HasNamedCaptures() override { return has_named_captures_; }
+
int CaptureCount() override { return static_cast<int>(captures_->size()); }
- virtual ~VectorBackedMatch() {}
+ MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
+ Handle<Object> capture_obj = captures_->at(i);
+ if (capture_obj->IsUndefined(isolate_)) {
+ *capture_exists = false;
+ return isolate_->factory()->empty_string();
+ }
+ *capture_exists = true;
+ return Object::ToString(isolate_, capture_obj);
+ }
+
+ MaybeHandle<String> GetNamedCapture(Handle<String> name,
+ bool* capture_exists) override {
+ DCHECK(has_named_captures_);
+ Handle<Object> capture_obj;
+ ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
+ Object::GetProperty(groups_obj_, name), String);
+ if (capture_obj->IsUndefined(isolate_)) {
+ *capture_exists = false;
+ return name;
+ } else {
+ *capture_exists = true;
+ return Object::ToString(isolate_, capture_obj);
+ }
+ }
private:
Isolate* isolate_;
@@ -927,6 +1103,9 @@ class VectorBackedMatch : public String::Match {
Handle<String> match_;
const int match_position_;
std::vector<Handle<Object>>* captures_;
+
+ bool has_named_captures_;
+ Handle<JSReceiver> groups_obj_;
};
// Create the groups object (see also the RegExp result creation in
@@ -1072,6 +1251,7 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
elements->set(cursor++, *subject);
if (has_named_captures) {
+ DCHECK(FLAG_harmony_regexp_named_captures);
Handle<FixedArray> capture_map =
Handle<FixedArray>::cast(maybe_capture_map);
Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
@@ -1183,7 +1363,7 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
builder.AppendString(factory->NewSubString(string, 0, start_index));
if (replace->length() > 0) {
- MatchInfoBackedMatch m(isolate, string, match_indices);
+ MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
Handle<String> replacement;
ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
String::GetSubstitution(isolate, &m, replace),
@@ -1316,6 +1496,7 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
Object* maybe_capture_map = regexp->CaptureNameMap();
if (maybe_capture_map->IsFixedArray()) {
+ DCHECK(FLAG_harmony_regexp_named_captures);
has_named_captures = true;
capture_map = handle(FixedArray::cast(maybe_capture_map));
}
@@ -1703,7 +1884,13 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
isolate, replacement, Object::ToString(isolate, replacement_obj));
} else {
DCHECK(!functional_replace);
- VectorBackedMatch m(isolate, string, match, position, &captures);
+ if (!groups_obj->IsUndefined(isolate)) {
+ // TODO(jgruber): Behavior in this case is not yet specced.
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
+ }
+ VectorBackedMatch m(isolate, string, match, position, &captures,
+ groups_obj);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement, String::GetSubstitution(isolate, &m, replace));
}
« no previous file with comments | « src/objects.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698