Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(639)

Side by Side Diff: src/runtime/runtime-regexp.cc

Issue 604703004: Move string-related runtime functions into separate files. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/runtime/runtime-json.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6
7 #include "src/arguments.h"
8 #include "src/jsregexp-inl.h"
9 #include "src/jsregexp.h"
10 #include "src/runtime/runtime.h"
11 #include "src/runtime/runtime-utils.h"
12 #include "src/runtime/string-builder.h"
13 #include "src/string-search.h"
14
15 namespace v8 {
16 namespace internal {
17
18 class CompiledReplacement {
19 public:
20 explicit CompiledReplacement(Zone* zone)
21 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
22
23 // Return whether the replacement is simple.
24 bool Compile(Handle<String> replacement, int capture_count,
25 int subject_length);
26
27 // Use Apply only if Compile returned false.
28 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
29 int32_t* match);
30
31 // Number of distinct parts of the replacement pattern.
32 int parts() { return parts_.length(); }
33
34 Zone* zone() const { return zone_; }
35
36 private:
37 enum PartType {
38 SUBJECT_PREFIX = 1,
39 SUBJECT_SUFFIX,
40 SUBJECT_CAPTURE,
41 REPLACEMENT_SUBSTRING,
42 REPLACEMENT_STRING,
43 NUMBER_OF_PART_TYPES
44 };
45
46 struct ReplacementPart {
47 static inline ReplacementPart SubjectMatch() {
48 return ReplacementPart(SUBJECT_CAPTURE, 0);
49 }
50 static inline ReplacementPart SubjectCapture(int capture_index) {
51 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
52 }
53 static inline ReplacementPart SubjectPrefix() {
54 return ReplacementPart(SUBJECT_PREFIX, 0);
55 }
56 static inline ReplacementPart SubjectSuffix(int subject_length) {
57 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
58 }
59 static inline ReplacementPart ReplacementString() {
60 return ReplacementPart(REPLACEMENT_STRING, 0);
61 }
62 static inline ReplacementPart ReplacementSubString(int from, int to) {
63 DCHECK(from >= 0);
64 DCHECK(to > from);
65 return ReplacementPart(-from, to);
66 }
67
68 // If tag <= 0 then it is the negation of a start index of a substring of
69 // the replacement pattern, otherwise it's a value from PartType.
70 ReplacementPart(int tag, int data) : tag(tag), data(data) {
71 // Must be non-positive or a PartType value.
72 DCHECK(tag < NUMBER_OF_PART_TYPES);
73 }
74 // Either a value of PartType or a non-positive number that is
75 // the negation of an index into the replacement string.
76 int tag;
77 // The data value's interpretation depends on the value of tag:
78 // tag == SUBJECT_PREFIX ||
79 // tag == SUBJECT_SUFFIX: data is unused.
80 // tag == SUBJECT_CAPTURE: data is the number of the capture.
81 // tag == REPLACEMENT_SUBSTRING ||
82 // tag == REPLACEMENT_STRING: data is index into array of substrings
83 // of the replacement string.
84 // tag <= 0: Temporary representation of the substring of the replacement
85 // string ranging over -tag .. data.
86 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
87 // substring objects.
88 int data;
89 };
90
91 template <typename Char>
92 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
93 Vector<Char> characters, int capture_count,
94 int subject_length, Zone* zone) {
95 int length = characters.length();
96 int last = 0;
97 for (int i = 0; i < length; i++) {
98 Char c = characters[i];
99 if (c == '$') {
100 int next_index = i + 1;
101 if (next_index == length) { // No next character!
102 break;
103 }
104 Char c2 = characters[next_index];
105 switch (c2) {
106 case '$':
107 if (i > last) {
108 // There is a substring before. Include the first "$".
109 parts->Add(
110 ReplacementPart::ReplacementSubString(last, next_index),
111 zone);
112 last = next_index + 1; // Continue after the second "$".
113 } else {
114 // Let the next substring start with the second "$".
115 last = next_index;
116 }
117 i = next_index;
118 break;
119 case '`':
120 if (i > last) {
121 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
122 }
123 parts->Add(ReplacementPart::SubjectPrefix(), zone);
124 i = next_index;
125 last = i + 1;
126 break;
127 case '\'':
128 if (i > last) {
129 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
130 }
131 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
132 i = next_index;
133 last = i + 1;
134 break;
135 case '&':
136 if (i > last) {
137 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
138 }
139 parts->Add(ReplacementPart::SubjectMatch(), zone);
140 i = next_index;
141 last = i + 1;
142 break;
143 case '0':
144 case '1':
145 case '2':
146 case '3':
147 case '4':
148 case '5':
149 case '6':
150 case '7':
151 case '8':
152 case '9': {
153 int capture_ref = c2 - '0';
154 if (capture_ref > capture_count) {
155 i = next_index;
156 continue;
157 }
158 int second_digit_index = next_index + 1;
159 if (second_digit_index < length) {
160 // Peek ahead to see if we have two digits.
161 Char c3 = characters[second_digit_index];
162 if ('0' <= c3 && c3 <= '9') { // Double digits.
163 int double_digit_ref = capture_ref * 10 + c3 - '0';
164 if (double_digit_ref <= capture_count) {
165 next_index = second_digit_index;
166 capture_ref = double_digit_ref;
167 }
168 }
169 }
170 if (capture_ref > 0) {
171 if (i > last) {
172 parts->Add(ReplacementPart::ReplacementSubString(last, i),
173 zone);
174 }
175 DCHECK(capture_ref <= capture_count);
176 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
177 last = next_index + 1;
178 }
179 i = next_index;
180 break;
181 }
182 default:
183 i = next_index;
184 break;
185 }
186 }
187 }
188 if (length > last) {
189 if (last == 0) {
190 // Replacement is simple. Do not use Apply to do the replacement.
191 return true;
192 } else {
193 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
194 }
195 }
196 return false;
197 }
198
199 ZoneList<ReplacementPart> parts_;
200 ZoneList<Handle<String> > replacement_substrings_;
201 Zone* zone_;
202 };
203
204
205 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
206 int subject_length) {
207 {
208 DisallowHeapAllocation no_gc;
209 String::FlatContent content = replacement->GetFlatContent();
210 DCHECK(content.IsFlat());
211 bool simple = false;
212 if (content.IsOneByte()) {
213 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
214 capture_count, subject_length, zone());
215 } else {
216 DCHECK(content.IsTwoByte());
217 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
218 capture_count, subject_length, zone());
219 }
220 if (simple) return true;
221 }
222
223 Isolate* isolate = replacement->GetIsolate();
224 // Find substrings of replacement string and create them as String objects.
225 int substring_index = 0;
226 for (int i = 0, n = parts_.length(); i < n; i++) {
227 int tag = parts_[i].tag;
228 if (tag <= 0) { // A replacement string slice.
229 int from = -tag;
230 int to = parts_[i].data;
231 replacement_substrings_.Add(
232 isolate->factory()->NewSubString(replacement, from, to), zone());
233 parts_[i].tag = REPLACEMENT_SUBSTRING;
234 parts_[i].data = substring_index;
235 substring_index++;
236 } else if (tag == REPLACEMENT_STRING) {
237 replacement_substrings_.Add(replacement, zone());
238 parts_[i].data = substring_index;
239 substring_index++;
240 }
241 }
242 return false;
243 }
244
245
246 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
247 int match_from, int match_to, int32_t* match) {
248 DCHECK_LT(0, parts_.length());
249 for (int i = 0, n = parts_.length(); i < n; i++) {
250 ReplacementPart part = parts_[i];
251 switch (part.tag) {
252 case SUBJECT_PREFIX:
253 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
254 break;
255 case SUBJECT_SUFFIX: {
256 int subject_length = part.data;
257 if (match_to < subject_length) {
258 builder->AddSubjectSlice(match_to, subject_length);
259 }
260 break;
261 }
262 case SUBJECT_CAPTURE: {
263 int capture = part.data;
264 int from = match[capture * 2];
265 int to = match[capture * 2 + 1];
266 if (from >= 0 && to > from) {
267 builder->AddSubjectSlice(from, to);
268 }
269 break;
270 }
271 case REPLACEMENT_SUBSTRING:
272 case REPLACEMENT_STRING:
273 builder->AddString(replacement_substrings_[part.data]);
274 break;
275 default:
276 UNREACHABLE();
277 }
278 }
279 }
280
281
282 void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern,
283 ZoneList<int>* indices, unsigned int limit,
284 Zone* zone) {
285 DCHECK(limit > 0);
286 // Collect indices of pattern in subject using memchr.
287 // Stop after finding at most limit values.
288 const uint8_t* subject_start = subject.start();
289 const uint8_t* subject_end = subject_start + subject.length();
290 const uint8_t* pos = subject_start;
291 while (limit > 0) {
292 pos = reinterpret_cast<const uint8_t*>(
293 memchr(pos, pattern, subject_end - pos));
294 if (pos == NULL) return;
295 indices->Add(static_cast<int>(pos - subject_start), zone);
296 pos++;
297 limit--;
298 }
299 }
300
301
302 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
303 ZoneList<int>* indices, unsigned int limit,
304 Zone* zone) {
305 DCHECK(limit > 0);
306 const uc16* subject_start = subject.start();
307 const uc16* subject_end = subject_start + subject.length();
308 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
309 if (*pos == pattern) {
310 indices->Add(static_cast<int>(pos - subject_start), zone);
311 limit--;
312 }
313 }
314 }
315
316
317 template <typename SubjectChar, typename PatternChar>
318 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
319 Vector<const PatternChar> pattern,
320 ZoneList<int>* indices, unsigned int limit, Zone* zone) {
321 DCHECK(limit > 0);
322 // Collect indices of pattern in subject.
323 // Stop after finding at most limit values.
324 int pattern_length = pattern.length();
325 int index = 0;
326 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
327 while (limit > 0) {
328 index = search.Search(subject, index);
329 if (index < 0) return;
330 indices->Add(index, zone);
331 index += pattern_length;
332 limit--;
333 }
334 }
335
336
337 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
338 String* pattern, ZoneList<int>* indices,
339 unsigned int limit, Zone* zone) {
340 {
341 DisallowHeapAllocation no_gc;
342 String::FlatContent subject_content = subject->GetFlatContent();
343 String::FlatContent pattern_content = pattern->GetFlatContent();
344 DCHECK(subject_content.IsFlat());
345 DCHECK(pattern_content.IsFlat());
346 if (subject_content.IsOneByte()) {
347 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
348 if (pattern_content.IsOneByte()) {
349 Vector<const uint8_t> pattern_vector =
350 pattern_content.ToOneByteVector();
351 if (pattern_vector.length() == 1) {
352 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
353 limit, zone);
354 } else {
355 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
356 limit, zone);
357 }
358 } else {
359 FindStringIndices(isolate, subject_vector,
360 pattern_content.ToUC16Vector(), indices, limit, zone);
361 }
362 } else {
363 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
364 if (pattern_content.IsOneByte()) {
365 Vector<const uint8_t> pattern_vector =
366 pattern_content.ToOneByteVector();
367 if (pattern_vector.length() == 1) {
368 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
369 limit, zone);
370 } else {
371 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
372 limit, zone);
373 }
374 } else {
375 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
376 if (pattern_vector.length() == 1) {
377 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
378 limit, zone);
379 } else {
380 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
381 limit, zone);
382 }
383 }
384 }
385 }
386 }
387
388
389 template <typename ResultSeqString>
390 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
391 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
392 Handle<String> replacement, Handle<JSArray> last_match_info) {
393 DCHECK(subject->IsFlat());
394 DCHECK(replacement->IsFlat());
395
396 ZoneScope zone_scope(isolate->runtime_zone());
397 ZoneList<int> indices(8, zone_scope.zone());
398 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
399 String* pattern =
400 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
401 int subject_len = subject->length();
402 int pattern_len = pattern->length();
403 int replacement_len = replacement->length();
404
405 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
406 zone_scope.zone());
407
408 int matches = indices.length();
409 if (matches == 0) return *subject;
410
411 // Detect integer overflow.
412 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
413 static_cast<int64_t>(pattern_len)) *
414 static_cast<int64_t>(matches) +
415 static_cast<int64_t>(subject_len);
416 int result_len;
417 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
418 STATIC_ASSERT(String::kMaxLength < kMaxInt);
419 result_len = kMaxInt; // Provoke exception.
420 } else {
421 result_len = static_cast<int>(result_len_64);
422 }
423
424 int subject_pos = 0;
425 int result_pos = 0;
426
427 MaybeHandle<SeqString> maybe_res;
428 if (ResultSeqString::kHasOneByteEncoding) {
429 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
430 } else {
431 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
432 }
433 Handle<SeqString> untyped_res;
434 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
435 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
436
437 for (int i = 0; i < matches; i++) {
438 // Copy non-matched subject content.
439 if (subject_pos < indices.at(i)) {
440 String::WriteToFlat(*subject, result->GetChars() + result_pos,
441 subject_pos, indices.at(i));
442 result_pos += indices.at(i) - subject_pos;
443 }
444
445 // Replace match.
446 if (replacement_len > 0) {
447 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
448 replacement_len);
449 result_pos += replacement_len;
450 }
451
452 subject_pos = indices.at(i) + pattern_len;
453 }
454 // Add remaining subject content at the end.
455 if (subject_pos < subject_len) {
456 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
457 subject_len);
458 }
459
460 int32_t match_indices[] = {indices.at(matches - 1),
461 indices.at(matches - 1) + pattern_len};
462 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
463
464 return *result;
465 }
466
467
468 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
469 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
470 Handle<String> replacement, Handle<JSArray> last_match_info) {
471 DCHECK(subject->IsFlat());
472 DCHECK(replacement->IsFlat());
473
474 int capture_count = regexp->CaptureCount();
475 int subject_length = subject->length();
476
477 // CompiledReplacement uses zone allocation.
478 ZoneScope zone_scope(isolate->runtime_zone());
479 CompiledReplacement compiled_replacement(zone_scope.zone());
480 bool simple_replace =
481 compiled_replacement.Compile(replacement, capture_count, subject_length);
482
483 // Shortcut for simple non-regexp global replacements
484 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
485 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
486 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
487 isolate, subject, regexp, replacement, last_match_info);
488 } else {
489 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
490 isolate, subject, regexp, replacement, last_match_info);
491 }
492 }
493
494 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
495 if (global_cache.HasException()) return isolate->heap()->exception();
496
497 int32_t* current_match = global_cache.FetchNext();
498 if (current_match == NULL) {
499 if (global_cache.HasException()) return isolate->heap()->exception();
500 return *subject;
501 }
502
503 // Guessing the number of parts that the final result string is built
504 // from. Global regexps can match any number of times, so we guess
505 // conservatively.
506 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
507 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
508
509 // Number of parts added by compiled replacement plus preceeding
510 // string and possibly suffix after last match. It is possible for
511 // all components to use two elements when encoded as two smis.
512 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
513
514 int prev = 0;
515
516 do {
517 builder.EnsureCapacity(parts_added_per_loop);
518
519 int start = current_match[0];
520 int end = current_match[1];
521
522 if (prev < start) {
523 builder.AddSubjectSlice(prev, start);
524 }
525
526 if (simple_replace) {
527 builder.AddString(replacement);
528 } else {
529 compiled_replacement.Apply(&builder, start, end, current_match);
530 }
531 prev = end;
532
533 current_match = global_cache.FetchNext();
534 } while (current_match != NULL);
535
536 if (global_cache.HasException()) return isolate->heap()->exception();
537
538 if (prev < subject_length) {
539 builder.EnsureCapacity(2);
540 builder.AddSubjectSlice(prev, subject_length);
541 }
542
543 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
544 global_cache.LastSuccessfulMatch());
545
546 Handle<String> result;
547 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
548 return *result;
549 }
550
551
552 template <typename ResultSeqString>
553 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
554 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
555 Handle<JSArray> last_match_info) {
556 DCHECK(subject->IsFlat());
557
558 // Shortcut for simple non-regexp global replacements
559 if (regexp->TypeTag() == JSRegExp::ATOM) {
560 Handle<String> empty_string = isolate->factory()->empty_string();
561 if (subject->IsOneByteRepresentation()) {
562 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
563 isolate, subject, regexp, empty_string, last_match_info);
564 } else {
565 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
566 isolate, subject, regexp, empty_string, last_match_info);
567 }
568 }
569
570 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
571 if (global_cache.HasException()) return isolate->heap()->exception();
572
573 int32_t* current_match = global_cache.FetchNext();
574 if (current_match == NULL) {
575 if (global_cache.HasException()) return isolate->heap()->exception();
576 return *subject;
577 }
578
579 int start = current_match[0];
580 int end = current_match[1];
581 int capture_count = regexp->CaptureCount();
582 int subject_length = subject->length();
583
584 int new_length = subject_length - (end - start);
585 if (new_length == 0) return isolate->heap()->empty_string();
586
587 Handle<ResultSeqString> answer;
588 if (ResultSeqString::kHasOneByteEncoding) {
589 answer = Handle<ResultSeqString>::cast(
590 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
591 } else {
592 answer = Handle<ResultSeqString>::cast(
593 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
594 }
595
596 int prev = 0;
597 int position = 0;
598
599 do {
600 start = current_match[0];
601 end = current_match[1];
602 if (prev < start) {
603 // Add substring subject[prev;start] to answer string.
604 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
605 position += start - prev;
606 }
607 prev = end;
608
609 current_match = global_cache.FetchNext();
610 } while (current_match != NULL);
611
612 if (global_cache.HasException()) return isolate->heap()->exception();
613
614 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
615 global_cache.LastSuccessfulMatch());
616
617 if (prev < subject_length) {
618 // Add substring subject[prev;length] to answer string.
619 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
620 subject_length);
621 position += subject_length - prev;
622 }
623
624 if (position == 0) return isolate->heap()->empty_string();
625
626 // Shorten string and fill
627 int string_size = ResultSeqString::SizeFor(position);
628 int allocated_string_size = ResultSeqString::SizeFor(new_length);
629 int delta = allocated_string_size - string_size;
630
631 answer->set_length(position);
632 if (delta == 0) return *answer;
633
634 Address end_of_string = answer->address() + string_size;
635 Heap* heap = isolate->heap();
636
637 // The trimming is performed on a newly allocated object, which is on a
638 // fresly allocated page or on an already swept page. Hence, the sweeper
639 // thread can not get confused with the filler creation. No synchronization
640 // needed.
641 heap->CreateFillerObjectAt(end_of_string, delta);
642 heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
643 return *answer;
644 }
645
646
647 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
648 HandleScope scope(isolate);
649 DCHECK(args.length() == 4);
650
651 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
652 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
653 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
654 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
655
656 RUNTIME_ASSERT(regexp->GetFlags().is_global());
657 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
658
659 subject = String::Flatten(subject);
660
661 if (replacement->length() == 0) {
662 if (subject->HasOnlyOneByteChars()) {
663 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
664 isolate, subject, regexp, last_match_info);
665 } else {
666 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
667 isolate, subject, regexp, last_match_info);
668 }
669 }
670
671 replacement = String::Flatten(replacement);
672
673 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
674 replacement, last_match_info);
675 }
676
677
678 RUNTIME_FUNCTION(Runtime_StringSplit) {
679 HandleScope handle_scope(isolate);
680 DCHECK(args.length() == 3);
681 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
682 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
683 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
684 RUNTIME_ASSERT(limit > 0);
685
686 int subject_length = subject->length();
687 int pattern_length = pattern->length();
688 RUNTIME_ASSERT(pattern_length > 0);
689
690 if (limit == 0xffffffffu) {
691 Handle<Object> cached_answer(
692 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
693 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
694 isolate);
695 if (*cached_answer != Smi::FromInt(0)) {
696 // The cache FixedArray is a COW-array and can therefore be reused.
697 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
698 Handle<FixedArray>::cast(cached_answer));
699 return *result;
700 }
701 }
702
703 // The limit can be very large (0xffffffffu), but since the pattern
704 // isn't empty, we can never create more parts than ~half the length
705 // of the subject.
706
707 subject = String::Flatten(subject);
708 pattern = String::Flatten(pattern);
709
710 static const int kMaxInitialListCapacity = 16;
711
712 ZoneScope zone_scope(isolate->runtime_zone());
713
714 // Find (up to limit) indices of separator and end-of-string in subject
715 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
716 ZoneList<int> indices(initial_capacity, zone_scope.zone());
717
718 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
719 zone_scope.zone());
720
721 if (static_cast<uint32_t>(indices.length()) < limit) {
722 indices.Add(subject_length, zone_scope.zone());
723 }
724
725 // The list indices now contains the end of each part to create.
726
727 // Create JSArray of substrings separated by separator.
728 int part_count = indices.length();
729
730 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
731 JSObject::EnsureCanContainHeapObjectElements(result);
732 result->set_length(Smi::FromInt(part_count));
733
734 DCHECK(result->HasFastObjectElements());
735
736 if (part_count == 1 && indices.at(0) == subject_length) {
737 FixedArray::cast(result->elements())->set(0, *subject);
738 return *result;
739 }
740
741 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
742 int part_start = 0;
743 for (int i = 0; i < part_count; i++) {
744 HandleScope local_loop_handle(isolate);
745 int part_end = indices.at(i);
746 Handle<String> substring =
747 isolate->factory()->NewProperSubString(subject, part_start, part_end);
748 elements->set(i, *substring);
749 part_start = part_end + pattern_length;
750 }
751
752 if (limit == 0xffffffffu) {
753 if (result->HasFastObjectElements()) {
754 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
755 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
756 }
757 }
758
759 return *result;
760 }
761
762
763 RUNTIME_FUNCTION(Runtime_RegExpCompile) {
764 HandleScope scope(isolate);
765 DCHECK(args.length() == 3);
766 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0);
767 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
768 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
769 Handle<Object> result;
770 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result,
771 RegExpImpl::Compile(re, pattern, flags));
772 return *result;
773 }
774
775
776 RUNTIME_FUNCTION(Runtime_RegExpExecRT) {
777 HandleScope scope(isolate);
778 DCHECK(args.length() == 4);
779 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
780 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
781 CONVERT_INT32_ARG_CHECKED(index, 2);
782 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
783 // Due to the way the JS calls are constructed this must be less than the
784 // length of a string, i.e. it is always a Smi. We check anyway for security.
785 RUNTIME_ASSERT(index >= 0);
786 RUNTIME_ASSERT(index <= subject->length());
787 isolate->counters()->regexp_entry_runtime()->Increment();
788 Handle<Object> result;
789 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
790 isolate, result,
791 RegExpImpl::Exec(regexp, subject, index, last_match_info));
792 return *result;
793 }
794
795
796 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
797 HandleScope handle_scope(isolate);
798 DCHECK(args.length() == 3);
799 CONVERT_SMI_ARG_CHECKED(size, 0);
800 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
801 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
802 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
803 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
804 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
805 Handle<JSObject> object =
806 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
807 Handle<JSArray> array = Handle<JSArray>::cast(object);
808 array->set_elements(*elements);
809 array->set_length(Smi::FromInt(size));
810 // Write in-object properties after the length of the array.
811 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
812 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
813 return *array;
814 }
815
816
817 RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
818 HandleScope scope(isolate);
819 DCHECK(args.length() == 6);
820 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
821 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
822 // If source is the empty string we set it to "(?:)" instead as
823 // suggested by ECMA-262, 5th, section 15.10.4.1.
824 if (source->length() == 0) source = isolate->factory()->query_colon_string();
825
826 CONVERT_ARG_HANDLE_CHECKED(Object, global, 2);
827 if (!global->IsTrue()) global = isolate->factory()->false_value();
828
829 CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3);
830 if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value();
831
832 CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4);
833 if (!multiline->IsTrue()) multiline = isolate->factory()->false_value();
834
835 CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5);
836 if (!sticky->IsTrue()) sticky = isolate->factory()->false_value();
837
838 Map* map = regexp->map();
839 Object* constructor = map->constructor();
840 if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
841 JSFunction::cast(constructor)->initial_map() == map) {
842 // If we still have the original map, set in-object properties directly.
843 regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source);
844 // Both true and false are immovable immortal objects so no need for write
845 // barrier.
846 regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
847 SKIP_WRITE_BARRIER);
848 regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase,
849 SKIP_WRITE_BARRIER);
850 regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
851 SKIP_WRITE_BARRIER);
852 regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
853 Smi::FromInt(0), SKIP_WRITE_BARRIER);
854 return *regexp;
855 }
856
857 // Map has changed, so use generic, but slower, method. We also end here if
858 // the --harmony-regexp flag is set, because the initial map does not have
859 // space for the 'sticky' flag, since it is from the snapshot, but must work
860 // both with and without --harmony-regexp. When sticky comes out from under
861 // the flag, we will be able to use the fast initial map.
862 PropertyAttributes final =
863 static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
864 PropertyAttributes writable =
865 static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
866 Handle<Object> zero(Smi::FromInt(0), isolate);
867 Factory* factory = isolate->factory();
868 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(),
869 source, final).Check();
870 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
871 global, final).Check();
872 JSObject::SetOwnPropertyIgnoreAttributes(
873 regexp, factory->ignore_case_string(), ignoreCase, final).Check();
874 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(),
875 multiline, final).Check();
876 if (FLAG_harmony_regexps) {
877 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
878 sticky, final).Check();
879 }
880 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(),
881 zero, writable).Check();
882 return *regexp;
883 }
884
885
886 RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
887 HandleScope scope(isolate);
888 DCHECK(args.length() == 4);
889 CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
890 CONVERT_SMI_ARG_CHECKED(index, 1);
891 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
892 CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
893
894 // Get the RegExp function from the context in the literals array.
895 // This is the RegExp function from the context in which the
896 // function was created. We do not use the RegExp function from the
897 // current native context because this might be the RegExp function
898 // from another context which we should not have access to.
899 Handle<JSFunction> constructor = Handle<JSFunction>(
900 JSFunction::NativeContextFromLiterals(*literals)->regexp_function());
901 // Compute the regular expression literal.
902 Handle<Object> regexp;
903 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
904 isolate, regexp,
905 RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
906 literals->set(index, *regexp);
907 return *regexp;
908 }
909
910
911 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
912 // separate last match info. See comment on that function.
913 template <bool has_capture>
914 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
915 Handle<JSRegExp> regexp,
916 Handle<JSArray> last_match_array,
917 Handle<JSArray> result_array) {
918 DCHECK(subject->IsFlat());
919 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
920
921 int capture_count = regexp->CaptureCount();
922 int subject_length = subject->length();
923
924 static const int kMinLengthToCache = 0x1000;
925
926 if (subject_length > kMinLengthToCache) {
927 Handle<Object> cached_answer(
928 RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
929 RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
930 isolate);
931 if (*cached_answer != Smi::FromInt(0)) {
932 Handle<FixedArray> cached_fixed_array =
933 Handle<FixedArray>(FixedArray::cast(*cached_answer));
934 // The cache FixedArray is a COW-array and can therefore be reused.
935 JSArray::SetContent(result_array, cached_fixed_array);
936 // The actual length of the result array is stored in the last element of
937 // the backing store (the backing FixedArray may have a larger capacity).
938 Object* cached_fixed_array_last_element =
939 cached_fixed_array->get(cached_fixed_array->length() - 1);
940 Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
941 result_array->set_length(js_array_length);
942 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
943 NULL);
944 return *result_array;
945 }
946 }
947
948 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
949 if (global_cache.HasException()) return isolate->heap()->exception();
950
951 // Ensured in Runtime_RegExpExecMultiple.
952 DCHECK(result_array->HasFastObjectElements());
953 Handle<FixedArray> result_elements(
954 FixedArray::cast(result_array->elements()));
955 if (result_elements->length() < 16) {
956 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
957 }
958
959 FixedArrayBuilder builder(result_elements);
960
961 // Position to search from.
962 int match_start = -1;
963 int match_end = 0;
964 bool first = true;
965
966 // Two smis before and after the match, for very long strings.
967 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
968
969 while (true) {
970 int32_t* current_match = global_cache.FetchNext();
971 if (current_match == NULL) break;
972 match_start = current_match[0];
973 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
974 if (match_end < match_start) {
975 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
976 match_start);
977 }
978 match_end = current_match[1];
979 {
980 // Avoid accumulating new handles inside loop.
981 HandleScope temp_scope(isolate);
982 Handle<String> match;
983 if (!first) {
984 match = isolate->factory()->NewProperSubString(subject, match_start,
985 match_end);
986 } else {
987 match =
988 isolate->factory()->NewSubString(subject, match_start, match_end);
989 first = false;
990 }
991
992 if (has_capture) {
993 // Arguments array to replace function is match, captures, index and
994 // subject, i.e., 3 + capture count in total.
995 Handle<FixedArray> elements =
996 isolate->factory()->NewFixedArray(3 + capture_count);
997
998 elements->set(0, *match);
999 for (int i = 1; i <= capture_count; i++) {
1000 int start = current_match[i * 2];
1001 if (start >= 0) {
1002 int end = current_match[i * 2 + 1];
1003 DCHECK(start <= end);
1004 Handle<String> substring =
1005 isolate->factory()->NewSubString(subject, start, end);
1006 elements->set(i, *substring);
1007 } else {
1008 DCHECK(current_match[i * 2 + 1] < 0);
1009 elements->set(i, isolate->heap()->undefined_value());
1010 }
1011 }
1012 elements->set(capture_count + 1, Smi::FromInt(match_start));
1013 elements->set(capture_count + 2, *subject);
1014 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1015 } else {
1016 builder.Add(*match);
1017 }
1018 }
1019 }
1020
1021 if (global_cache.HasException()) return isolate->heap()->exception();
1022
1023 if (match_start >= 0) {
1024 // Finished matching, with at least one match.
1025 if (match_end < subject_length) {
1026 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1027 subject_length);
1028 }
1029
1030 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1031 NULL);
1032
1033 if (subject_length > kMinLengthToCache) {
1034 // Store the length of the result array into the last element of the
1035 // backing FixedArray.
1036 builder.EnsureCapacity(1);
1037 Handle<FixedArray> fixed_array = builder.array();
1038 fixed_array->set(fixed_array->length() - 1,
1039 Smi::FromInt(builder.length()));
1040 // Cache the result and turn the FixedArray into a COW array.
1041 RegExpResultsCache::Enter(isolate, subject,
1042 handle(regexp->data(), isolate), fixed_array,
1043 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1044 }
1045 return *builder.ToJSArray(result_array);
1046 } else {
1047 return isolate->heap()->null_value(); // No matches at all.
1048 }
1049 }
1050
1051
1052 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
1053 // lastMatchInfoOverride to maintain the last match info, so we don't need to
1054 // set any other last match array info.
1055 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1056 HandleScope handles(isolate);
1057 DCHECK(args.length() == 4);
1058
1059 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1060 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1061 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
1062 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1063 RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1064 RUNTIME_ASSERT(result_array->HasFastObjectElements());
1065
1066 subject = String::Flatten(subject);
1067 RUNTIME_ASSERT(regexp->GetFlags().is_global());
1068
1069 if (regexp->CaptureCount() == 0) {
1070 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1071 last_match_info, result_array);
1072 } else {
1073 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1074 result_array);
1075 }
1076 }
1077
1078
1079 RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {
1080 SealHandleScope shs(isolate);
1081 return __RT_impl_Runtime_RegExpConstructResult(args, isolate);
1082 }
1083
1084
1085 RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {
1086 SealHandleScope shs(isolate);
1087 return __RT_impl_Runtime_RegExpExecRT(args, isolate);
1088 }
1089
1090
1091 // Perform string match of pattern on subject, starting at start index.
1092 // Caller must ensure that 0 <= start_index <= sub->length(),
1093 // and should check that pat->length() + start_index <= sub->length().
1094 int Runtime::StringMatch(Isolate* isolate, Handle<String> sub,
1095 Handle<String> pat, int start_index) {
1096 DCHECK(0 <= start_index);
1097 DCHECK(start_index <= sub->length());
1098
1099 int pattern_length = pat->length();
1100 if (pattern_length == 0) return start_index;
1101
1102 int subject_length = sub->length();
1103 if (start_index + pattern_length > subject_length) return -1;
1104
1105 sub = String::Flatten(sub);
1106 pat = String::Flatten(pat);
1107
1108 DisallowHeapAllocation no_gc; // ensure vectors stay valid
1109 // Extract flattened substrings of cons strings before getting encoding.
1110 String::FlatContent seq_sub = sub->GetFlatContent();
1111 String::FlatContent seq_pat = pat->GetFlatContent();
1112
1113 // dispatch on type of strings
1114 if (seq_pat.IsOneByte()) {
1115 Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();
1116 if (seq_sub.IsOneByte()) {
1117 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
1118 start_index);
1119 }
1120 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector,
1121 start_index);
1122 }
1123 Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
1124 if (seq_sub.IsOneByte()) {
1125 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
1126 start_index);
1127 }
1128 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index);
1129 }
1130 }
1131 } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/runtime/runtime-json.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698