Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(450)

Side by Side Diff: src/runtime/runtime-regexp.cc

Issue 2775303002: [regexp] Named capture support for string replacements (Closed)
Patch Set: Only cast if not undefined Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/objects.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/runtime/runtime-utils.h" 5 #include "src/runtime/runtime-utils.h"
6 6
7 #include <functional> 7 #include <functional>
8 8
9 #include "src/arguments.h" 9 #include "src/arguments.h"
10 #include "src/conversions-inl.h" 10 #include "src/conversions-inl.h"
11 #include "src/isolate-inl.h" 11 #include "src/isolate-inl.h"
12 #include "src/messages.h" 12 #include "src/messages.h"
13 #include "src/regexp/jsregexp-inl.h" 13 #include "src/regexp/jsregexp-inl.h"
14 #include "src/regexp/jsregexp.h" 14 #include "src/regexp/jsregexp.h"
15 #include "src/regexp/regexp-utils.h" 15 #include "src/regexp/regexp-utils.h"
16 #include "src/string-builder.h" 16 #include "src/string-builder.h"
17 #include "src/string-search.h" 17 #include "src/string-search.h"
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 namespace {
23
24 // Looks up the capture of the given name. Returns the (1-based) numbered
25 // capture index or -1 on failure.
26 int LookupNamedCapture(std::function<bool(String*)> name_matches,
27 FixedArray* capture_name_map) {
28 // TODO(jgruber): Sort capture_name_map and do binary search via
29 // internalized strings.
30
31 int maybe_capture_index = -1;
32 const int named_capture_count = capture_name_map->length() >> 1;
33 for (int j = 0; j < named_capture_count; j++) {
34 // The format of {capture_name_map} is documented at
35 // JSRegExp::kIrregexpCaptureNameMapIndex.
36 const int name_ix = j * 2;
37 const int index_ix = j * 2 + 1;
38
39 String* capture_name = String::cast(capture_name_map->get(name_ix));
40 if (!name_matches(capture_name)) continue;
41
42 maybe_capture_index = Smi::cast(capture_name_map->get(index_ix))->value();
43 break;
44 }
45
46 return maybe_capture_index;
47 }
48
49 } // namespace
50
22 class CompiledReplacement { 51 class CompiledReplacement {
23 public: 52 public:
24 explicit CompiledReplacement(Zone* zone) 53 explicit CompiledReplacement(Zone* zone)
25 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} 54 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
26 55
27 // Return whether the replacement is simple. 56 // Return whether the replacement is simple. Can also fail and return Nothing
28 bool Compile(Handle<String> replacement, int capture_count, 57 // if the given replacement string is invalid (and requires throwing a
29 int subject_length); 58 // SyntaxError).
59 Maybe<bool> Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
60 int capture_count, int subject_length);
30 61
31 // Use Apply only if Compile returned false. 62 // Use Apply only if Compile returned false.
32 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, 63 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
33 int32_t* match); 64 int32_t* match);
34 65
35 // Number of distinct parts of the replacement pattern. 66 // Number of distinct parts of the replacement pattern.
36 int parts() { return parts_.length(); } 67 int parts() { return parts_.length(); }
37 68
38 Zone* zone() const { return zone_; } 69 Zone* zone() const { return zone_; }
39 70
40 private: 71 private:
41 enum PartType { 72 enum PartType {
42 SUBJECT_PREFIX = 1, 73 SUBJECT_PREFIX = 1,
43 SUBJECT_SUFFIX, 74 SUBJECT_SUFFIX,
44 SUBJECT_CAPTURE, 75 SUBJECT_CAPTURE,
45 REPLACEMENT_SUBSTRING, 76 REPLACEMENT_SUBSTRING,
46 REPLACEMENT_STRING, 77 REPLACEMENT_STRING,
78 EMPTY,
47 NUMBER_OF_PART_TYPES 79 NUMBER_OF_PART_TYPES
48 }; 80 };
49 81
50 struct ReplacementPart { 82 struct ReplacementPart {
51 static inline ReplacementPart SubjectMatch() { 83 static inline ReplacementPart SubjectMatch() {
52 return ReplacementPart(SUBJECT_CAPTURE, 0); 84 return ReplacementPart(SUBJECT_CAPTURE, 0);
53 } 85 }
54 static inline ReplacementPart SubjectCapture(int capture_index) { 86 static inline ReplacementPart SubjectCapture(int capture_index) {
55 return ReplacementPart(SUBJECT_CAPTURE, capture_index); 87 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
56 } 88 }
57 static inline ReplacementPart SubjectPrefix() { 89 static inline ReplacementPart SubjectPrefix() {
58 return ReplacementPart(SUBJECT_PREFIX, 0); 90 return ReplacementPart(SUBJECT_PREFIX, 0);
59 } 91 }
60 static inline ReplacementPart SubjectSuffix(int subject_length) { 92 static inline ReplacementPart SubjectSuffix(int subject_length) {
61 return ReplacementPart(SUBJECT_SUFFIX, subject_length); 93 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
62 } 94 }
63 static inline ReplacementPart ReplacementString() { 95 static inline ReplacementPart ReplacementString() {
64 return ReplacementPart(REPLACEMENT_STRING, 0); 96 return ReplacementPart(REPLACEMENT_STRING, 0);
65 } 97 }
66 static inline ReplacementPart ReplacementSubString(int from, int to) { 98 static inline ReplacementPart ReplacementSubString(int from, int to) {
67 DCHECK(from >= 0); 99 DCHECK(from >= 0);
68 DCHECK(to > from); 100 DCHECK(to > from);
69 return ReplacementPart(-from, to); 101 return ReplacementPart(-from, to);
70 } 102 }
103 static inline ReplacementPart Empty() { return ReplacementPart(EMPTY, 0); }
71 104
72 // If tag <= 0 then it is the negation of a start index of a substring of 105 // If tag <= 0 then it is the negation of a start index of a substring of
73 // the replacement pattern, otherwise it's a value from PartType. 106 // the replacement pattern, otherwise it's a value from PartType.
74 ReplacementPart(int tag, int data) : tag(tag), data(data) { 107 ReplacementPart(int tag, int data) : tag(tag), data(data) {
75 // Must be non-positive or a PartType value. 108 // Must be non-positive or a PartType value.
76 DCHECK(tag < NUMBER_OF_PART_TYPES); 109 DCHECK(tag < NUMBER_OF_PART_TYPES);
77 } 110 }
78 // Either a value of PartType or a non-positive number that is 111 // Either a value of PartType or a non-positive number that is
79 // the negation of an index into the replacement string. 112 // the negation of an index into the replacement string.
80 int tag; 113 int tag;
81 // The data value's interpretation depends on the value of tag: 114 // The data value's interpretation depends on the value of tag:
82 // tag == SUBJECT_PREFIX || 115 // tag == SUBJECT_PREFIX ||
83 // tag == SUBJECT_SUFFIX: data is unused. 116 // tag == SUBJECT_SUFFIX ||
117 // tag == EMPTY: data is unused.
84 // tag == SUBJECT_CAPTURE: data is the number of the capture. 118 // tag == SUBJECT_CAPTURE: data is the number of the capture.
85 // tag == REPLACEMENT_SUBSTRING || 119 // tag == REPLACEMENT_SUBSTRING ||
86 // tag == REPLACEMENT_STRING: data is index into array of substrings 120 // tag == REPLACEMENT_STRING: data is index into array of substrings
87 // of the replacement string. 121 // of the replacement string.
88 // tag <= 0: Temporary representation of the substring of the replacement 122 // tag <= 0: Temporary representation of the substring of the replacement
89 // string ranging over -tag .. data. 123 // string ranging over -tag .. data.
90 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 124 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
91 // substring objects. 125 // substring objects.
92 int data; 126 int data;
93 }; 127 };
94 128
95 template <typename Char> 129 template <typename Char>
96 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 130 Maybe<bool> ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
97 Vector<Char> characters, int capture_count, 131 Vector<Char> characters,
98 int subject_length, Zone* zone) { 132 FixedArray* capture_name_map,
133 int capture_count, int subject_length,
134 Zone* zone) {
135 // Equivalent to String::GetSubstitution, except that this method converts
136 // the replacement string into an internal representation that avoids
137 // repeated parsing when used repeatedly.
138 DCHECK_IMPLIES(capture_name_map != nullptr,
139 FLAG_harmony_regexp_named_captures);
140
99 int length = characters.length(); 141 int length = characters.length();
100 int last = 0; 142 int last = 0;
101 for (int i = 0; i < length; i++) { 143 for (int i = 0; i < length; i++) {
102 Char c = characters[i]; 144 Char c = characters[i];
103 if (c == '$') { 145 if (c == '$') {
104 int next_index = i + 1; 146 int next_index = i + 1;
105 if (next_index == length) { // No next character! 147 if (next_index == length) { // No next character!
106 break; 148 break;
107 } 149 }
108 Char c2 = characters[next_index]; 150 Char c2 = characters[next_index];
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 parts->Add(ReplacementPart::ReplacementSubString(last, i), 218 parts->Add(ReplacementPart::ReplacementSubString(last, i),
177 zone); 219 zone);
178 } 220 }
179 DCHECK(capture_ref <= capture_count); 221 DCHECK(capture_ref <= capture_count);
180 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); 222 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
181 last = next_index + 1; 223 last = next_index + 1;
182 } 224 }
183 i = next_index; 225 i = next_index;
184 break; 226 break;
185 } 227 }
228 case '<': {
229 if (capture_name_map == nullptr) {
230 i = next_index;
231 break;
232 }
233
234 // Scan until the next '>', throwing a SyntaxError exception if one
235 // is not found, and let the enclosed substring be groupName.
236
237 const int name_start_index = next_index + 1;
238 int closing_bracket_index = -1;
239 for (int j = name_start_index; j < length; j++) {
240 if (characters[j] == '>') {
241 closing_bracket_index = j;
242 break;
243 }
244 }
245
246 // Throw a SyntaxError for invalid replacement strings.
247 if (closing_bracket_index == -1) return Nothing<bool>();
248
249 Vector<Char> requested_name =
250 characters.SubVector(name_start_index, closing_bracket_index);
251
252 // Let capture be ? Get(namedCaptures, groupName).
253
254 int capture_index = LookupNamedCapture(
255 [=](String* capture_name) {
256 return capture_name->IsEqualTo(requested_name);
257 },
258 capture_name_map);
259
260 // If capture is undefined, replace the text through the following
261 // '>' with the empty string.
262 // Otherwise, replace the text through the following '>' with
263 // ? ToString(capture).
264
265 DCHECK_IMPLIES(
266 capture_index != -1,
267 1 <= capture_index && capture_index <= capture_count);
268
269 ReplacementPart replacement =
270 (capture_index == -1)
271 ? ReplacementPart::Empty()
272 : ReplacementPart::SubjectCapture(capture_index);
273
274 if (i > last) {
275 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
276 }
277 parts->Add(replacement, zone);
278 last = closing_bracket_index + 1;
279 i = closing_bracket_index;
280 break;
281 }
186 default: 282 default:
187 i = next_index; 283 i = next_index;
188 break; 284 break;
189 } 285 }
190 } 286 }
191 } 287 }
192 if (length > last) { 288 if (length > last) {
193 if (last == 0) { 289 if (last == 0) {
194 // Replacement is simple. Do not use Apply to do the replacement. 290 // Replacement is simple. Do not use Apply to do the replacement.
195 return true; 291 return Just(true);
196 } else { 292 } else {
197 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 293 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
198 } 294 }
199 } 295 }
200 return false; 296 return Just(false);
201 } 297 }
202 298
203 ZoneList<ReplacementPart> parts_; 299 ZoneList<ReplacementPart> parts_;
204 ZoneList<Handle<String> > replacement_substrings_; 300 ZoneList<Handle<String> > replacement_substrings_;
205 Zone* zone_; 301 Zone* zone_;
206 }; 302 };
207 303
208 304 Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
209 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, 305 Handle<String> replacement,
210 int subject_length) { 306 int capture_count,
307 int subject_length) {
211 { 308 {
212 DisallowHeapAllocation no_gc; 309 DisallowHeapAllocation no_gc;
213 String::FlatContent content = replacement->GetFlatContent(); 310 String::FlatContent content = replacement->GetFlatContent();
214 DCHECK(content.IsFlat()); 311 DCHECK(content.IsFlat());
215 bool simple = false; 312
313 FixedArray* capture_name_map = nullptr;
314 if (capture_count > 0) {
315 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
316 Object* maybe_capture_name_map = regexp->CaptureNameMap();
317 if (maybe_capture_name_map->IsFixedArray()) {
318 DCHECK(FLAG_harmony_regexp_named_captures);
319 capture_name_map = FixedArray::cast(maybe_capture_name_map);
320 }
321 }
322
323 Maybe<bool> simple = Nothing<bool>();
216 if (content.IsOneByte()) { 324 if (content.IsOneByte()) {
217 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), 325 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
218 capture_count, subject_length, zone()); 326 capture_name_map, capture_count,
327 subject_length, zone());
219 } else { 328 } else {
220 DCHECK(content.IsTwoByte()); 329 DCHECK(content.IsTwoByte());
221 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), 330 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
222 capture_count, subject_length, zone()); 331 capture_name_map, capture_count,
332 subject_length, zone());
223 } 333 }
224 if (simple) return true; 334 if (simple.IsNothing() || simple.FromJust()) return simple;
225 } 335 }
226 336
227 Isolate* isolate = replacement->GetIsolate(); 337 Isolate* isolate = replacement->GetIsolate();
228 // Find substrings of replacement string and create them as String objects. 338 // Find substrings of replacement string and create them as String objects.
229 int substring_index = 0; 339 int substring_index = 0;
230 for (int i = 0, n = parts_.length(); i < n; i++) { 340 for (int i = 0, n = parts_.length(); i < n; i++) {
231 int tag = parts_[i].tag; 341 int tag = parts_[i].tag;
232 if (tag <= 0) { // A replacement string slice. 342 if (tag <= 0) { // A replacement string slice.
233 int from = -tag; 343 int from = -tag;
234 int to = parts_[i].data; 344 int to = parts_[i].data;
235 replacement_substrings_.Add( 345 replacement_substrings_.Add(
236 isolate->factory()->NewSubString(replacement, from, to), zone()); 346 isolate->factory()->NewSubString(replacement, from, to), zone());
237 parts_[i].tag = REPLACEMENT_SUBSTRING; 347 parts_[i].tag = REPLACEMENT_SUBSTRING;
238 parts_[i].data = substring_index; 348 parts_[i].data = substring_index;
239 substring_index++; 349 substring_index++;
240 } else if (tag == REPLACEMENT_STRING) { 350 } else if (tag == REPLACEMENT_STRING) {
241 replacement_substrings_.Add(replacement, zone()); 351 replacement_substrings_.Add(replacement, zone());
242 parts_[i].data = substring_index; 352 parts_[i].data = substring_index;
243 substring_index++; 353 substring_index++;
244 } 354 }
245 } 355 }
246 return false; 356 return Just(false);
247 } 357 }
248 358
249 359
250 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 360 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
251 int match_from, int match_to, int32_t* match) { 361 int match_from, int match_to, int32_t* match) {
252 DCHECK_LT(0, parts_.length()); 362 DCHECK_LT(0, parts_.length());
253 for (int i = 0, n = parts_.length(); i < n; i++) { 363 for (int i = 0, n = parts_.length(); i < n; i++) {
254 ReplacementPart part = parts_[i]; 364 ReplacementPart part = parts_[i];
255 switch (part.tag) { 365 switch (part.tag) {
256 case SUBJECT_PREFIX: 366 case SUBJECT_PREFIX:
(...skipping 12 matching lines...) Expand all
269 int to = match[capture * 2 + 1]; 379 int to = match[capture * 2 + 1];
270 if (from >= 0 && to > from) { 380 if (from >= 0 && to > from) {
271 builder->AddSubjectSlice(from, to); 381 builder->AddSubjectSlice(from, to);
272 } 382 }
273 break; 383 break;
274 } 384 }
275 case REPLACEMENT_SUBSTRING: 385 case REPLACEMENT_SUBSTRING:
276 case REPLACEMENT_STRING: 386 case REPLACEMENT_STRING:
277 builder->AddString(replacement_substrings_[part.data]); 387 builder->AddString(replacement_substrings_[part.data]);
278 break; 388 break;
389 case EMPTY:
390 break;
279 default: 391 default:
280 UNREACHABLE(); 392 UNREACHABLE();
281 } 393 }
282 } 394 }
283 } 395 }
284 396
285 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, 397 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
286 List<int>* indices, unsigned int limit) { 398 List<int>* indices, unsigned int limit) {
287 DCHECK(limit > 0); 399 DCHECK(limit > 0);
288 // Collect indices of pattern in subject using memchr. 400 // Collect indices of pattern in subject using memchr.
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
484 596
485 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( 597 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
486 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, 598 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
487 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { 599 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
488 DCHECK(subject->IsFlat()); 600 DCHECK(subject->IsFlat());
489 DCHECK(replacement->IsFlat()); 601 DCHECK(replacement->IsFlat());
490 602
491 int capture_count = regexp->CaptureCount(); 603 int capture_count = regexp->CaptureCount();
492 int subject_length = subject->length(); 604 int subject_length = subject->length();
493 605
606 JSRegExp::Type typeTag = regexp->TypeTag();
607 if (typeTag == JSRegExp::IRREGEXP) {
608 // Ensure the RegExp is compiled so we can access the capture-name map.
609 RegExpImpl::IrregexpPrepare(regexp, subject);
610 }
611
494 // CompiledReplacement uses zone allocation. 612 // CompiledReplacement uses zone allocation.
495 Zone zone(isolate->allocator(), ZONE_NAME); 613 Zone zone(isolate->allocator(), ZONE_NAME);
496 CompiledReplacement compiled_replacement(&zone); 614 CompiledReplacement compiled_replacement(&zone);
497 bool simple_replace = 615 Maybe<bool> maybe_simple_replace = compiled_replacement.Compile(
498 compiled_replacement.Compile(replacement, capture_count, subject_length); 616 regexp, replacement, capture_count, subject_length);
617 if (maybe_simple_replace.IsNothing()) {
618 THROW_NEW_ERROR_RETURN_FAILURE(
619 isolate, NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
620 replacement));
621 }
622
623 const bool simple_replace = maybe_simple_replace.FromJust();
499 624
500 // Shortcut for simple non-regexp global replacements 625 // Shortcut for simple non-regexp global replacements
501 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { 626 if (typeTag == JSRegExp::ATOM && simple_replace) {
502 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { 627 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
503 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( 628 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
504 isolate, subject, regexp, replacement, last_match_info); 629 isolate, subject, regexp, replacement, last_match_info);
505 } else { 630 } else {
506 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( 631 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
507 isolate, subject, regexp, replacement, last_match_info); 632 isolate, subject, regexp, replacement, last_match_info);
508 } 633 }
509 } 634 }
510 635
511 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); 636 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
642 int allocated_string_size = ResultSeqString::SizeFor(new_length); 767 int allocated_string_size = ResultSeqString::SizeFor(new_length);
643 int delta = allocated_string_size - string_size; 768 int delta = allocated_string_size - string_size;
644 769
645 answer->set_length(position); 770 answer->set_length(position);
646 if (delta == 0) return *answer; 771 if (delta == 0) return *answer;
647 772
648 Address end_of_string = answer->address() + string_size; 773 Address end_of_string = answer->address() + string_size;
649 Heap* heap = isolate->heap(); 774 Heap* heap = isolate->heap();
650 775
651 // The trimming is performed on a newly allocated object, which is on a 776 // The trimming is performed on a newly allocated object, which is on a
652 // fresly allocated page or on an already swept page. Hence, the sweeper 777 // freshly allocated page or on an already swept page. Hence, the sweeper
653 // thread can not get confused with the filler creation. No synchronization 778 // thread can not get confused with the filler creation. No synchronization
654 // needed. 779 // needed.
655 // TODO(hpayer): We should shrink the large object page if the size 780 // TODO(hpayer): We should shrink the large object page if the size
656 // of the object changed significantly. 781 // of the object changed significantly.
657 if (!heap->lo_space()->Contains(*answer)) { 782 if (!heap->lo_space()->Contains(*answer)) {
658 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); 783 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
659 } 784 }
660 heap->AdjustLiveBytes(*answer, -delta); 785 heap->AdjustLiveBytes(*answer, -delta);
661 return *answer; 786 return *answer;
662 } 787 }
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
836 isolate->regexp_internal_match_info(); 961 isolate->regexp_internal_match_info();
837 962
838 return StringReplaceGlobalRegExpWithStringHelper( 963 return StringReplaceGlobalRegExpWithStringHelper(
839 isolate, regexp, subject, replacement, internal_match_info); 964 isolate, regexp, subject, replacement, internal_match_info);
840 } 965 }
841 966
842 namespace { 967 namespace {
843 968
844 class MatchInfoBackedMatch : public String::Match { 969 class MatchInfoBackedMatch : public String::Match {
845 public: 970 public:
846 MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject, 971 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
972 Handle<String> subject,
847 Handle<RegExpMatchInfo> match_info) 973 Handle<RegExpMatchInfo> match_info)
848 : isolate_(isolate), match_info_(match_info) { 974 : isolate_(isolate), match_info_(match_info) {
849 subject_ = String::Flatten(subject); 975 subject_ = String::Flatten(subject);
976
977 if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
978 Object* o = regexp->CaptureNameMap();
979 has_named_captures_ = o->IsFixedArray();
980 if (has_named_captures_) {
981 DCHECK(FLAG_harmony_regexp_named_captures);
982 capture_name_map_ = handle(FixedArray::cast(o));
983 }
984 } else {
985 has_named_captures_ = false;
986 }
850 } 987 }
851 988
852 Handle<String> GetMatch() override { 989 Handle<String> GetMatch() override {
853 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr); 990 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
854 } 991 }
855 992
856 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
857 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
858 isolate_, match_info_, i, capture_exists);
859 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
860 : isolate_->factory()->empty_string();
861 }
862
863 Handle<String> GetPrefix() override { 993 Handle<String> GetPrefix() override {
864 const int match_start = match_info_->Capture(0); 994 const int match_start = match_info_->Capture(0);
865 return isolate_->factory()->NewSubString(subject_, 0, match_start); 995 return isolate_->factory()->NewSubString(subject_, 0, match_start);
866 } 996 }
867 997
868 Handle<String> GetSuffix() override { 998 Handle<String> GetSuffix() override {
869 const int match_end = match_info_->Capture(1); 999 const int match_end = match_info_->Capture(1);
870 return isolate_->factory()->NewSubString(subject_, match_end, 1000 return isolate_->factory()->NewSubString(subject_, match_end,
871 subject_->length()); 1001 subject_->length());
872 } 1002 }
873 1003
1004 bool HasNamedCaptures() override { return has_named_captures_; }
1005
874 int CaptureCount() override { 1006 int CaptureCount() override {
875 return match_info_->NumberOfCaptureRegisters() / 2; 1007 return match_info_->NumberOfCaptureRegisters() / 2;
876 } 1008 }
877 1009
878 virtual ~MatchInfoBackedMatch() {} 1010 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1011 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1012 isolate_, match_info_, i, capture_exists);
1013 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1014 : isolate_->factory()->empty_string();
1015 }
1016
1017 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1018 bool* capture_exists) override {
1019 DCHECK(has_named_captures_);
1020 const int capture_index = LookupNamedCapture(
1021 [=](String* capture_name) { return capture_name->Equals(*name); },
1022 *capture_name_map_);
1023
1024 if (capture_index == -1) {
1025 *capture_exists = false;
1026 return name; // Arbitrary string handle.
1027 }
1028
1029 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1030 return GetCapture(capture_index, capture_exists);
1031 }
879 1032
880 private: 1033 private:
881 Isolate* isolate_; 1034 Isolate* isolate_;
882 Handle<String> subject_; 1035 Handle<String> subject_;
883 Handle<RegExpMatchInfo> match_info_; 1036 Handle<RegExpMatchInfo> match_info_;
1037
1038 bool has_named_captures_;
1039 Handle<FixedArray> capture_name_map_;
884 }; 1040 };
885 1041
886 class VectorBackedMatch : public String::Match { 1042 class VectorBackedMatch : public String::Match {
887 public: 1043 public:
888 VectorBackedMatch(Isolate* isolate, Handle<String> subject, 1044 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
889 Handle<String> match, int match_position, 1045 Handle<String> match, int match_position,
890 std::vector<Handle<Object>>* captures) 1046 std::vector<Handle<Object>>* captures,
1047 Handle<Object> groups_obj)
891 : isolate_(isolate), 1048 : isolate_(isolate),
892 match_(match), 1049 match_(match),
893 match_position_(match_position), 1050 match_position_(match_position),
894 captures_(captures) { 1051 captures_(captures) {
895 subject_ = String::Flatten(subject); 1052 subject_ = String::Flatten(subject);
1053
1054 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1055 has_named_captures_ = !groups_obj->IsUndefined(isolate);
1056 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
896 } 1057 }
897 1058
898 Handle<String> GetMatch() override { return match_; } 1059 Handle<String> GetMatch() override { return match_; }
899 1060
900 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
901 Handle<Object> capture_obj = captures_->at(i);
902 if (capture_obj->IsUndefined(isolate_)) {
903 *capture_exists = false;
904 return isolate_->factory()->empty_string();
905 }
906 *capture_exists = true;
907 return Object::ToString(isolate_, capture_obj);
908 }
909
910 Handle<String> GetPrefix() override { 1061 Handle<String> GetPrefix() override {
911 return isolate_->factory()->NewSubString(subject_, 0, match_position_); 1062 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
912 } 1063 }
913 1064
914 Handle<String> GetSuffix() override { 1065 Handle<String> GetSuffix() override {
915 const int match_end_position = match_position_ + match_->length(); 1066 const int match_end_position = match_position_ + match_->length();
916 return isolate_->factory()->NewSubString(subject_, match_end_position, 1067 return isolate_->factory()->NewSubString(subject_, match_end_position,
917 subject_->length()); 1068 subject_->length());
918 } 1069 }
919 1070
1071 bool HasNamedCaptures() override { return has_named_captures_; }
1072
920 int CaptureCount() override { return static_cast<int>(captures_->size()); } 1073 int CaptureCount() override { return static_cast<int>(captures_->size()); }
921 1074
922 virtual ~VectorBackedMatch() {} 1075 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1076 Handle<Object> capture_obj = captures_->at(i);
1077 if (capture_obj->IsUndefined(isolate_)) {
1078 *capture_exists = false;
1079 return isolate_->factory()->empty_string();
1080 }
1081 *capture_exists = true;
1082 return Object::ToString(isolate_, capture_obj);
1083 }
1084
1085 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1086 bool* capture_exists) override {
1087 DCHECK(has_named_captures_);
1088 Handle<Object> capture_obj;
1089 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1090 Object::GetProperty(groups_obj_, name), String);
1091 if (capture_obj->IsUndefined(isolate_)) {
1092 *capture_exists = false;
1093 return name;
1094 } else {
1095 *capture_exists = true;
1096 return Object::ToString(isolate_, capture_obj);
1097 }
1098 }
923 1099
924 private: 1100 private:
925 Isolate* isolate_; 1101 Isolate* isolate_;
926 Handle<String> subject_; 1102 Handle<String> subject_;
927 Handle<String> match_; 1103 Handle<String> match_;
928 const int match_position_; 1104 const int match_position_;
929 std::vector<Handle<Object>>* captures_; 1105 std::vector<Handle<Object>>* captures_;
1106
1107 bool has_named_captures_;
1108 Handle<JSReceiver> groups_obj_;
930 }; 1109 };
931 1110
932 // Create the groups object (see also the RegExp result creation in 1111 // Create the groups object (see also the RegExp result creation in
933 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo). 1112 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
934 Handle<JSObject> ConstructNamedCaptureGroupsObject( 1113 Handle<JSObject> ConstructNamedCaptureGroupsObject(
935 Isolate* isolate, Handle<FixedArray> capture_map, 1114 Isolate* isolate, Handle<FixedArray> capture_map,
936 std::function<Object*(int)> f_get_capture) { 1115 std::function<Object*(int)> f_get_capture) {
937 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto(); 1116 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
938 1117
939 const int capture_count = capture_map->length() >> 1; 1118 const int capture_count = capture_map->length() >> 1;
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1065 } else { 1244 } else {
1066 DCHECK(current_match[i * 2 + 1] < 0); 1245 DCHECK(current_match[i * 2 + 1] < 0);
1067 elements->set(cursor++, isolate->heap()->undefined_value()); 1246 elements->set(cursor++, isolate->heap()->undefined_value());
1068 } 1247 }
1069 } 1248 }
1070 1249
1071 elements->set(cursor++, Smi::FromInt(match_start)); 1250 elements->set(cursor++, Smi::FromInt(match_start));
1072 elements->set(cursor++, *subject); 1251 elements->set(cursor++, *subject);
1073 1252
1074 if (has_named_captures) { 1253 if (has_named_captures) {
1254 DCHECK(FLAG_harmony_regexp_named_captures);
1075 Handle<FixedArray> capture_map = 1255 Handle<FixedArray> capture_map =
1076 Handle<FixedArray>::cast(maybe_capture_map); 1256 Handle<FixedArray>::cast(maybe_capture_map);
1077 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject( 1257 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1078 isolate, capture_map, [=](int ix) { return elements->get(ix); }); 1258 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1079 elements->set(cursor++, *groups); 1259 elements->set(cursor++, *groups);
1080 } 1260 }
1081 1261
1082 DCHECK_EQ(cursor, argc); 1262 DCHECK_EQ(cursor, argc);
1083 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 1263 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1084 } else { 1264 } else {
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
1176 1356
1177 const int start_index = match_indices->Capture(0); 1357 const int start_index = match_indices->Capture(0);
1178 const int end_index = match_indices->Capture(1); 1358 const int end_index = match_indices->Capture(1);
1179 1359
1180 if (sticky) regexp->SetLastIndex(end_index); 1360 if (sticky) regexp->SetLastIndex(end_index);
1181 1361
1182 IncrementalStringBuilder builder(isolate); 1362 IncrementalStringBuilder builder(isolate);
1183 builder.AppendString(factory->NewSubString(string, 0, start_index)); 1363 builder.AppendString(factory->NewSubString(string, 0, start_index));
1184 1364
1185 if (replace->length() > 0) { 1365 if (replace->length() > 0) {
1186 MatchInfoBackedMatch m(isolate, string, match_indices); 1366 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1187 Handle<String> replacement; 1367 Handle<String> replacement;
1188 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement, 1368 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1189 String::GetSubstitution(isolate, &m, replace), 1369 String::GetSubstitution(isolate, &m, replace),
1190 String); 1370 String);
1191 builder.AppendString(replacement); 1371 builder.AppendString(replacement);
1192 } 1372 }
1193 1373
1194 builder.AppendString( 1374 builder.AppendString(
1195 factory->NewSubString(string, end_index, string->length())); 1375 factory->NewSubString(string, end_index, string->length()));
1196 return builder.Finish(); 1376 return builder.Finish();
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
1309 const int m = match_indices->NumberOfCaptureRegisters() / 2; 1489 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1310 1490
1311 bool has_named_captures = false; 1491 bool has_named_captures = false;
1312 Handle<FixedArray> capture_map; 1492 Handle<FixedArray> capture_map;
1313 if (m > 1) { 1493 if (m > 1) {
1314 // The existence of capture groups implies IRREGEXP kind. 1494 // The existence of capture groups implies IRREGEXP kind.
1315 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 1495 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1316 1496
1317 Object* maybe_capture_map = regexp->CaptureNameMap(); 1497 Object* maybe_capture_map = regexp->CaptureNameMap();
1318 if (maybe_capture_map->IsFixedArray()) { 1498 if (maybe_capture_map->IsFixedArray()) {
1499 DCHECK(FLAG_harmony_regexp_named_captures);
1319 has_named_captures = true; 1500 has_named_captures = true;
1320 capture_map = handle(FixedArray::cast(maybe_capture_map)); 1501 capture_map = handle(FixedArray::cast(maybe_capture_map));
1321 } 1502 }
1322 } 1503 }
1323 1504
1324 const int argc = has_named_captures ? m + 3 : m + 2; 1505 const int argc = has_named_captures ? m + 3 : m + 2;
1325 ScopedVector<Handle<Object>> argv(argc); 1506 ScopedVector<Handle<Object>> argv(argc);
1326 1507
1327 int cursor = 0; 1508 int cursor = 0;
1328 for (int j = 0; j < m; j++) { 1509 for (int j = 0; j < m; j++) {
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after
1696 Handle<Object> replacement_obj; 1877 Handle<Object> replacement_obj;
1697 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1878 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1698 isolate, replacement_obj, 1879 isolate, replacement_obj,
1699 Execution::Call(isolate, replace_obj, factory->undefined_value(), 1880 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1700 argc, argv.start())); 1881 argc, argv.start()));
1701 1882
1702 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1883 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1703 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1884 isolate, replacement, Object::ToString(isolate, replacement_obj));
1704 } else { 1885 } else {
1705 DCHECK(!functional_replace); 1886 DCHECK(!functional_replace);
1706 VectorBackedMatch m(isolate, string, match, position, &captures); 1887 if (!groups_obj->IsUndefined(isolate)) {
1888 // TODO(jgruber): Behavior in this case is not yet specced.
1889 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1890 isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1891 }
1892 VectorBackedMatch m(isolate, string, match, position, &captures,
1893 groups_obj);
1707 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1894 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1708 isolate, replacement, String::GetSubstitution(isolate, &m, replace)); 1895 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1709 } 1896 }
1710 1897
1711 if (position >= next_source_position) { 1898 if (position >= next_source_position) {
1712 builder.AppendString( 1899 builder.AppendString(
1713 factory->NewSubString(string, next_source_position, position)); 1900 factory->NewSubString(string, next_source_position, position));
1714 builder.AppendString(replacement); 1901 builder.AppendString(replacement);
1715 1902
1716 next_source_position = position + match_length; 1903 next_source_position = position + match_length;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
1748 1935
1749 RUNTIME_FUNCTION(Runtime_IsRegExp) { 1936 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1750 SealHandleScope shs(isolate); 1937 SealHandleScope shs(isolate);
1751 DCHECK_EQ(1, args.length()); 1938 DCHECK_EQ(1, args.length());
1752 CONVERT_ARG_CHECKED(Object, obj, 0); 1939 CONVERT_ARG_CHECKED(Object, obj, 0);
1753 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); 1940 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1754 } 1941 }
1755 1942
1756 } // namespace internal 1943 } // namespace internal
1757 } // namespace v8 1944 } // namespace v8
OLDNEW
« no previous file with comments | « src/objects.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698