Index: src/runtime/runtime-regexp.cc |
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e96d50195a80aaa8233ed3da9b89282bfd0aa469 |
--- /dev/null |
+++ b/src/runtime/runtime-regexp.cc |
@@ -0,0 +1,1131 @@ |
+// Copyright 2014 the V8 project authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "src/v8.h" |
+ |
+#include "src/arguments.h" |
+#include "src/jsregexp-inl.h" |
+#include "src/jsregexp.h" |
+#include "src/runtime/runtime.h" |
+#include "src/runtime/runtime-utils.h" |
+#include "src/runtime/string-builder.h" |
+#include "src/string-search.h" |
+ |
+namespace v8 { |
+namespace internal { |
+ |
+class CompiledReplacement { |
+ public: |
+ explicit CompiledReplacement(Zone* zone) |
+ : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} |
+ |
+ // Return whether the replacement is simple. |
+ bool Compile(Handle<String> replacement, int capture_count, |
+ int subject_length); |
+ |
+ // Use Apply only if Compile returned false. |
+ void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, |
+ int32_t* match); |
+ |
+ // Number of distinct parts of the replacement pattern. |
+ int parts() { return parts_.length(); } |
+ |
+ Zone* zone() const { return zone_; } |
+ |
+ private: |
+ enum PartType { |
+ SUBJECT_PREFIX = 1, |
+ SUBJECT_SUFFIX, |
+ SUBJECT_CAPTURE, |
+ REPLACEMENT_SUBSTRING, |
+ REPLACEMENT_STRING, |
+ NUMBER_OF_PART_TYPES |
+ }; |
+ |
+ struct ReplacementPart { |
+ static inline ReplacementPart SubjectMatch() { |
+ return ReplacementPart(SUBJECT_CAPTURE, 0); |
+ } |
+ static inline ReplacementPart SubjectCapture(int capture_index) { |
+ return ReplacementPart(SUBJECT_CAPTURE, capture_index); |
+ } |
+ static inline ReplacementPart SubjectPrefix() { |
+ return ReplacementPart(SUBJECT_PREFIX, 0); |
+ } |
+ static inline ReplacementPart SubjectSuffix(int subject_length) { |
+ return ReplacementPart(SUBJECT_SUFFIX, subject_length); |
+ } |
+ static inline ReplacementPart ReplacementString() { |
+ return ReplacementPart(REPLACEMENT_STRING, 0); |
+ } |
+ static inline ReplacementPart ReplacementSubString(int from, int to) { |
+ DCHECK(from >= 0); |
+ DCHECK(to > from); |
+ return ReplacementPart(-from, to); |
+ } |
+ |
+ // If tag <= 0 then it is the negation of a start index of a substring of |
+ // the replacement pattern, otherwise it's a value from PartType. |
+ ReplacementPart(int tag, int data) : tag(tag), data(data) { |
+ // Must be non-positive or a PartType value. |
+ DCHECK(tag < NUMBER_OF_PART_TYPES); |
+ } |
+ // Either a value of PartType or a non-positive number that is |
+ // the negation of an index into the replacement string. |
+ int tag; |
+ // The data value's interpretation depends on the value of tag: |
+ // tag == SUBJECT_PREFIX || |
+ // tag == SUBJECT_SUFFIX: data is unused. |
+ // tag == SUBJECT_CAPTURE: data is the number of the capture. |
+ // tag == REPLACEMENT_SUBSTRING || |
+ // tag == REPLACEMENT_STRING: data is index into array of substrings |
+ // of the replacement string. |
+ // tag <= 0: Temporary representation of the substring of the replacement |
+ // string ranging over -tag .. data. |
+ // Is replaced by REPLACEMENT_{SUB,}STRING when we create the |
+ // substring objects. |
+ int data; |
+ }; |
+ |
+ template <typename Char> |
+ bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
+ Vector<Char> characters, int capture_count, |
+ int subject_length, Zone* zone) { |
+ int length = characters.length(); |
+ int last = 0; |
+ for (int i = 0; i < length; i++) { |
+ Char c = characters[i]; |
+ if (c == '$') { |
+ int next_index = i + 1; |
+ if (next_index == length) { // No next character! |
+ break; |
+ } |
+ Char c2 = characters[next_index]; |
+ switch (c2) { |
+ case '$': |
+ if (i > last) { |
+ // There is a substring before. Include the first "$". |
+ parts->Add( |
+ ReplacementPart::ReplacementSubString(last, next_index), |
+ zone); |
+ last = next_index + 1; // Continue after the second "$". |
+ } else { |
+ // Let the next substring start with the second "$". |
+ last = next_index; |
+ } |
+ i = next_index; |
+ break; |
+ case '`': |
+ if (i > last) { |
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
+ } |
+ parts->Add(ReplacementPart::SubjectPrefix(), zone); |
+ i = next_index; |
+ last = i + 1; |
+ break; |
+ case '\'': |
+ if (i > last) { |
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
+ } |
+ parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); |
+ i = next_index; |
+ last = i + 1; |
+ break; |
+ case '&': |
+ if (i > last) { |
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
+ } |
+ parts->Add(ReplacementPart::SubjectMatch(), zone); |
+ i = next_index; |
+ last = i + 1; |
+ break; |
+ case '0': |
+ case '1': |
+ case '2': |
+ case '3': |
+ case '4': |
+ case '5': |
+ case '6': |
+ case '7': |
+ case '8': |
+ case '9': { |
+ int capture_ref = c2 - '0'; |
+ if (capture_ref > capture_count) { |
+ i = next_index; |
+ continue; |
+ } |
+ int second_digit_index = next_index + 1; |
+ if (second_digit_index < length) { |
+ // Peek ahead to see if we have two digits. |
+ Char c3 = characters[second_digit_index]; |
+ if ('0' <= c3 && c3 <= '9') { // Double digits. |
+ int double_digit_ref = capture_ref * 10 + c3 - '0'; |
+ if (double_digit_ref <= capture_count) { |
+ next_index = second_digit_index; |
+ capture_ref = double_digit_ref; |
+ } |
+ } |
+ } |
+ if (capture_ref > 0) { |
+ if (i > last) { |
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), |
+ zone); |
+ } |
+ DCHECK(capture_ref <= capture_count); |
+ parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); |
+ last = next_index + 1; |
+ } |
+ i = next_index; |
+ break; |
+ } |
+ default: |
+ i = next_index; |
+ break; |
+ } |
+ } |
+ } |
+ if (length > last) { |
+ if (last == 0) { |
+ // Replacement is simple. Do not use Apply to do the replacement. |
+ return true; |
+ } else { |
+ parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); |
+ } |
+ } |
+ return false; |
+ } |
+ |
+ ZoneList<ReplacementPart> parts_; |
+ ZoneList<Handle<String> > replacement_substrings_; |
+ Zone* zone_; |
+}; |
+ |
+ |
+bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, |
+ int subject_length) { |
+ { |
+ DisallowHeapAllocation no_gc; |
+ String::FlatContent content = replacement->GetFlatContent(); |
+ DCHECK(content.IsFlat()); |
+ bool simple = false; |
+ if (content.IsOneByte()) { |
+ simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), |
+ capture_count, subject_length, zone()); |
+ } else { |
+ DCHECK(content.IsTwoByte()); |
+ simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), |
+ capture_count, subject_length, zone()); |
+ } |
+ if (simple) return true; |
+ } |
+ |
+ Isolate* isolate = replacement->GetIsolate(); |
+ // Find substrings of replacement string and create them as String objects. |
+ int substring_index = 0; |
+ for (int i = 0, n = parts_.length(); i < n; i++) { |
+ int tag = parts_[i].tag; |
+ if (tag <= 0) { // A replacement string slice. |
+ int from = -tag; |
+ int to = parts_[i].data; |
+ replacement_substrings_.Add( |
+ isolate->factory()->NewSubString(replacement, from, to), zone()); |
+ parts_[i].tag = REPLACEMENT_SUBSTRING; |
+ parts_[i].data = substring_index; |
+ substring_index++; |
+ } else if (tag == REPLACEMENT_STRING) { |
+ replacement_substrings_.Add(replacement, zone()); |
+ parts_[i].data = substring_index; |
+ substring_index++; |
+ } |
+ } |
+ return false; |
+} |
+ |
+ |
+void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
+ int match_from, int match_to, int32_t* match) { |
+ DCHECK_LT(0, parts_.length()); |
+ for (int i = 0, n = parts_.length(); i < n; i++) { |
+ ReplacementPart part = parts_[i]; |
+ switch (part.tag) { |
+ case SUBJECT_PREFIX: |
+ if (match_from > 0) builder->AddSubjectSlice(0, match_from); |
+ break; |
+ case SUBJECT_SUFFIX: { |
+ int subject_length = part.data; |
+ if (match_to < subject_length) { |
+ builder->AddSubjectSlice(match_to, subject_length); |
+ } |
+ break; |
+ } |
+ case SUBJECT_CAPTURE: { |
+ int capture = part.data; |
+ int from = match[capture * 2]; |
+ int to = match[capture * 2 + 1]; |
+ if (from >= 0 && to > from) { |
+ builder->AddSubjectSlice(from, to); |
+ } |
+ break; |
+ } |
+ case REPLACEMENT_SUBSTRING: |
+ case REPLACEMENT_STRING: |
+ builder->AddString(replacement_substrings_[part.data]); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ } |
+ } |
+} |
+ |
+ |
+void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern, |
+ ZoneList<int>* indices, unsigned int limit, |
+ Zone* zone) { |
+ DCHECK(limit > 0); |
+ // Collect indices of pattern in subject using memchr. |
+ // Stop after finding at most limit values. |
+ const uint8_t* subject_start = subject.start(); |
+ const uint8_t* subject_end = subject_start + subject.length(); |
+ const uint8_t* pos = subject_start; |
+ while (limit > 0) { |
+ pos = reinterpret_cast<const uint8_t*>( |
+ memchr(pos, pattern, subject_end - pos)); |
+ if (pos == NULL) return; |
+ indices->Add(static_cast<int>(pos - subject_start), zone); |
+ pos++; |
+ limit--; |
+ } |
+} |
+ |
+ |
+void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
+ ZoneList<int>* indices, unsigned int limit, |
+ Zone* zone) { |
+ DCHECK(limit > 0); |
+ const uc16* subject_start = subject.start(); |
+ const uc16* subject_end = subject_start + subject.length(); |
+ for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
+ if (*pos == pattern) { |
+ indices->Add(static_cast<int>(pos - subject_start), zone); |
+ limit--; |
+ } |
+ } |
+} |
+ |
+ |
+template <typename SubjectChar, typename PatternChar> |
+void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
+ Vector<const PatternChar> pattern, |
+ ZoneList<int>* indices, unsigned int limit, Zone* zone) { |
+ DCHECK(limit > 0); |
+ // Collect indices of pattern in subject. |
+ // Stop after finding at most limit values. |
+ int pattern_length = pattern.length(); |
+ int index = 0; |
+ StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
+ while (limit > 0) { |
+ index = search.Search(subject, index); |
+ if (index < 0) return; |
+ indices->Add(index, zone); |
+ index += pattern_length; |
+ limit--; |
+ } |
+} |
+ |
+ |
+void FindStringIndicesDispatch(Isolate* isolate, String* subject, |
+ String* pattern, ZoneList<int>* indices, |
+ unsigned int limit, Zone* zone) { |
+ { |
+ DisallowHeapAllocation no_gc; |
+ String::FlatContent subject_content = subject->GetFlatContent(); |
+ String::FlatContent pattern_content = pattern->GetFlatContent(); |
+ DCHECK(subject_content.IsFlat()); |
+ DCHECK(pattern_content.IsFlat()); |
+ if (subject_content.IsOneByte()) { |
+ Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
+ if (pattern_content.IsOneByte()) { |
+ Vector<const uint8_t> pattern_vector = |
+ pattern_content.ToOneByteVector(); |
+ if (pattern_vector.length() == 1) { |
+ FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
+ limit, zone); |
+ } else { |
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
+ limit, zone); |
+ } |
+ } else { |
+ FindStringIndices(isolate, subject_vector, |
+ pattern_content.ToUC16Vector(), indices, limit, zone); |
+ } |
+ } else { |
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
+ if (pattern_content.IsOneByte()) { |
+ Vector<const uint8_t> pattern_vector = |
+ pattern_content.ToOneByteVector(); |
+ if (pattern_vector.length() == 1) { |
+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
+ limit, zone); |
+ } else { |
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
+ limit, zone); |
+ } |
+ } else { |
+ Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
+ if (pattern_vector.length() == 1) { |
+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
+ limit, zone); |
+ } else { |
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
+ limit, zone); |
+ } |
+ } |
+ } |
+ } |
+} |
+ |
+ |
+template <typename ResultSeqString> |
+MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( |
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
+ Handle<String> replacement, Handle<JSArray> last_match_info) { |
+ DCHECK(subject->IsFlat()); |
+ DCHECK(replacement->IsFlat()); |
+ |
+ ZoneScope zone_scope(isolate->runtime_zone()); |
+ ZoneList<int> indices(8, zone_scope.zone()); |
+ DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
+ String* pattern = |
+ String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
+ int subject_len = subject->length(); |
+ int pattern_len = pattern->length(); |
+ int replacement_len = replacement->length(); |
+ |
+ FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, |
+ zone_scope.zone()); |
+ |
+ int matches = indices.length(); |
+ if (matches == 0) return *subject; |
+ |
+ // Detect integer overflow. |
+ int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
+ static_cast<int64_t>(pattern_len)) * |
+ static_cast<int64_t>(matches) + |
+ static_cast<int64_t>(subject_len); |
+ int result_len; |
+ if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
+ STATIC_ASSERT(String::kMaxLength < kMaxInt); |
+ result_len = kMaxInt; // Provoke exception. |
+ } else { |
+ result_len = static_cast<int>(result_len_64); |
+ } |
+ |
+ int subject_pos = 0; |
+ int result_pos = 0; |
+ |
+ MaybeHandle<SeqString> maybe_res; |
+ if (ResultSeqString::kHasOneByteEncoding) { |
+ maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
+ } else { |
+ maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
+ } |
+ Handle<SeqString> untyped_res; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
+ Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
+ |
+ for (int i = 0; i < matches; i++) { |
+ // Copy non-matched subject content. |
+ if (subject_pos < indices.at(i)) { |
+ String::WriteToFlat(*subject, result->GetChars() + result_pos, |
+ subject_pos, indices.at(i)); |
+ result_pos += indices.at(i) - subject_pos; |
+ } |
+ |
+ // Replace match. |
+ if (replacement_len > 0) { |
+ String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, |
+ replacement_len); |
+ result_pos += replacement_len; |
+ } |
+ |
+ subject_pos = indices.at(i) + pattern_len; |
+ } |
+ // Add remaining subject content at the end. |
+ if (subject_pos < subject_len) { |
+ String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, |
+ subject_len); |
+ } |
+ |
+ int32_t match_indices[] = {indices.at(matches - 1), |
+ indices.at(matches - 1) + pattern_len}; |
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
+ |
+ return *result; |
+} |
+ |
+ |
+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( |
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
+ Handle<String> replacement, Handle<JSArray> last_match_info) { |
+ DCHECK(subject->IsFlat()); |
+ DCHECK(replacement->IsFlat()); |
+ |
+ int capture_count = regexp->CaptureCount(); |
+ int subject_length = subject->length(); |
+ |
+ // CompiledReplacement uses zone allocation. |
+ ZoneScope zone_scope(isolate->runtime_zone()); |
+ CompiledReplacement compiled_replacement(zone_scope.zone()); |
+ bool simple_replace = |
+ compiled_replacement.Compile(replacement, capture_count, subject_length); |
+ |
+ // Shortcut for simple non-regexp global replacements |
+ if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { |
+ if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { |
+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
+ isolate, subject, regexp, replacement, last_match_info); |
+ } else { |
+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
+ isolate, subject, regexp, replacement, last_match_info); |
+ } |
+ } |
+ |
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ int32_t* current_match = global_cache.FetchNext(); |
+ if (current_match == NULL) { |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ return *subject; |
+ } |
+ |
+ // Guessing the number of parts that the final result string is built |
+ // from. Global regexps can match any number of times, so we guess |
+ // conservatively. |
+ int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; |
+ ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); |
+ |
+ // Number of parts added by compiled replacement plus preceeding |
+ // string and possibly suffix after last match. It is possible for |
+ // all components to use two elements when encoded as two smis. |
+ const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); |
+ |
+ int prev = 0; |
+ |
+ do { |
+ builder.EnsureCapacity(parts_added_per_loop); |
+ |
+ int start = current_match[0]; |
+ int end = current_match[1]; |
+ |
+ if (prev < start) { |
+ builder.AddSubjectSlice(prev, start); |
+ } |
+ |
+ if (simple_replace) { |
+ builder.AddString(replacement); |
+ } else { |
+ compiled_replacement.Apply(&builder, start, end, current_match); |
+ } |
+ prev = end; |
+ |
+ current_match = global_cache.FetchNext(); |
+ } while (current_match != NULL); |
+ |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ if (prev < subject_length) { |
+ builder.EnsureCapacity(2); |
+ builder.AddSubjectSlice(prev, subject_length); |
+ } |
+ |
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
+ global_cache.LastSuccessfulMatch()); |
+ |
+ Handle<String> result; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString()); |
+ return *result; |
+} |
+ |
+ |
+template <typename ResultSeqString> |
+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( |
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
+ Handle<JSArray> last_match_info) { |
+ DCHECK(subject->IsFlat()); |
+ |
+ // Shortcut for simple non-regexp global replacements |
+ if (regexp->TypeTag() == JSRegExp::ATOM) { |
+ Handle<String> empty_string = isolate->factory()->empty_string(); |
+ if (subject->IsOneByteRepresentation()) { |
+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
+ isolate, subject, regexp, empty_string, last_match_info); |
+ } else { |
+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
+ isolate, subject, regexp, empty_string, last_match_info); |
+ } |
+ } |
+ |
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ int32_t* current_match = global_cache.FetchNext(); |
+ if (current_match == NULL) { |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ return *subject; |
+ } |
+ |
+ int start = current_match[0]; |
+ int end = current_match[1]; |
+ int capture_count = regexp->CaptureCount(); |
+ int subject_length = subject->length(); |
+ |
+ int new_length = subject_length - (end - start); |
+ if (new_length == 0) return isolate->heap()->empty_string(); |
+ |
+ Handle<ResultSeqString> answer; |
+ if (ResultSeqString::kHasOneByteEncoding) { |
+ answer = Handle<ResultSeqString>::cast( |
+ isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); |
+ } else { |
+ answer = Handle<ResultSeqString>::cast( |
+ isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); |
+ } |
+ |
+ int prev = 0; |
+ int position = 0; |
+ |
+ do { |
+ start = current_match[0]; |
+ end = current_match[1]; |
+ if (prev < start) { |
+ // Add substring subject[prev;start] to answer string. |
+ String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); |
+ position += start - prev; |
+ } |
+ prev = end; |
+ |
+ current_match = global_cache.FetchNext(); |
+ } while (current_match != NULL); |
+ |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
+ global_cache.LastSuccessfulMatch()); |
+ |
+ if (prev < subject_length) { |
+ // Add substring subject[prev;length] to answer string. |
+ String::WriteToFlat(*subject, answer->GetChars() + position, prev, |
+ subject_length); |
+ position += subject_length - prev; |
+ } |
+ |
+ if (position == 0) return isolate->heap()->empty_string(); |
+ |
+ // Shorten string and fill |
+ int string_size = ResultSeqString::SizeFor(position); |
+ int allocated_string_size = ResultSeqString::SizeFor(new_length); |
+ int delta = allocated_string_size - string_size; |
+ |
+ answer->set_length(position); |
+ if (delta == 0) return *answer; |
+ |
+ Address end_of_string = answer->address() + string_size; |
+ Heap* heap = isolate->heap(); |
+ |
+ // The trimming is performed on a newly allocated object, which is on a |
+ // fresly allocated page or on an already swept page. Hence, the sweeper |
+ // thread can not get confused with the filler creation. No synchronization |
+ // needed. |
+ heap->CreateFillerObjectAt(end_of_string, delta); |
+ heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR); |
+ return *answer; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { |
+ HandleScope scope(isolate); |
+ DCHECK(args.length() == 4); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); |
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
+ |
+ RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
+ |
+ subject = String::Flatten(subject); |
+ |
+ if (replacement->length() == 0) { |
+ if (subject->HasOnlyOneByteChars()) { |
+ return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( |
+ isolate, subject, regexp, last_match_info); |
+ } else { |
+ return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( |
+ isolate, subject, regexp, last_match_info); |
+ } |
+ } |
+ |
+ replacement = String::Flatten(replacement); |
+ |
+ return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, |
+ replacement, last_match_info); |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_StringSplit) { |
+ HandleScope handle_scope(isolate); |
+ DCHECK(args.length() == 3); |
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
+ CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
+ RUNTIME_ASSERT(limit > 0); |
+ |
+ int subject_length = subject->length(); |
+ int pattern_length = pattern->length(); |
+ RUNTIME_ASSERT(pattern_length > 0); |
+ |
+ if (limit == 0xffffffffu) { |
+ Handle<Object> cached_answer( |
+ RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, |
+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), |
+ isolate); |
+ if (*cached_answer != Smi::FromInt(0)) { |
+ // The cache FixedArray is a COW-array and can therefore be reused. |
+ Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( |
+ Handle<FixedArray>::cast(cached_answer)); |
+ return *result; |
+ } |
+ } |
+ |
+ // The limit can be very large (0xffffffffu), but since the pattern |
+ // isn't empty, we can never create more parts than ~half the length |
+ // of the subject. |
+ |
+ subject = String::Flatten(subject); |
+ pattern = String::Flatten(pattern); |
+ |
+ static const int kMaxInitialListCapacity = 16; |
+ |
+ ZoneScope zone_scope(isolate->runtime_zone()); |
+ |
+ // Find (up to limit) indices of separator and end-of-string in subject |
+ int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
+ ZoneList<int> indices(initial_capacity, zone_scope.zone()); |
+ |
+ FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, |
+ zone_scope.zone()); |
+ |
+ if (static_cast<uint32_t>(indices.length()) < limit) { |
+ indices.Add(subject_length, zone_scope.zone()); |
+ } |
+ |
+ // The list indices now contains the end of each part to create. |
+ |
+ // Create JSArray of substrings separated by separator. |
+ int part_count = indices.length(); |
+ |
+ Handle<JSArray> result = isolate->factory()->NewJSArray(part_count); |
+ JSObject::EnsureCanContainHeapObjectElements(result); |
+ result->set_length(Smi::FromInt(part_count)); |
+ |
+ DCHECK(result->HasFastObjectElements()); |
+ |
+ if (part_count == 1 && indices.at(0) == subject_length) { |
+ FixedArray::cast(result->elements())->set(0, *subject); |
+ return *result; |
+ } |
+ |
+ Handle<FixedArray> elements(FixedArray::cast(result->elements())); |
+ int part_start = 0; |
+ for (int i = 0; i < part_count; i++) { |
+ HandleScope local_loop_handle(isolate); |
+ int part_end = indices.at(i); |
+ Handle<String> substring = |
+ isolate->factory()->NewProperSubString(subject, part_start, part_end); |
+ elements->set(i, *substring); |
+ part_start = part_end + pattern_length; |
+ } |
+ |
+ if (limit == 0xffffffffu) { |
+ if (result->HasFastObjectElements()) { |
+ RegExpResultsCache::Enter(isolate, subject, pattern, elements, |
+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); |
+ } |
+ } |
+ |
+ return *result; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_RegExpCompile) { |
+ HandleScope scope(isolate); |
+ DCHECK(args.length() == 3); |
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); |
+ Handle<Object> result; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, |
+ RegExpImpl::Compile(re, pattern, flags)); |
+ return *result; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_RegExpExecRT) { |
+ HandleScope scope(isolate); |
+ DCHECK(args.length() == 4); |
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
+ CONVERT_INT32_ARG_CHECKED(index, 2); |
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
+ // Due to the way the JS calls are constructed this must be less than the |
+ // length of a string, i.e. it is always a Smi. We check anyway for security. |
+ RUNTIME_ASSERT(index >= 0); |
+ RUNTIME_ASSERT(index <= subject->length()); |
+ isolate->counters()->regexp_entry_runtime()->Increment(); |
+ Handle<Object> result; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
+ isolate, result, |
+ RegExpImpl::Exec(regexp, subject, index, last_match_info)); |
+ return *result; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_RegExpConstructResult) { |
+ HandleScope handle_scope(isolate); |
+ DCHECK(args.length() == 3); |
+ CONVERT_SMI_ARG_CHECKED(size, 0); |
+ RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength); |
+ CONVERT_ARG_HANDLE_CHECKED(Object, index, 1); |
+ CONVERT_ARG_HANDLE_CHECKED(Object, input, 2); |
+ Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size); |
+ Handle<Map> regexp_map(isolate->native_context()->regexp_result_map()); |
+ Handle<JSObject> object = |
+ isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false); |
+ Handle<JSArray> array = Handle<JSArray>::cast(object); |
+ array->set_elements(*elements); |
+ array->set_length(Smi::FromInt(size)); |
+ // Write in-object properties after the length of the array. |
+ array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index); |
+ array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input); |
+ return *array; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) { |
+ HandleScope scope(isolate); |
+ DCHECK(args.length() == 6); |
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(String, source, 1); |
+ // If source is the empty string we set it to "(?:)" instead as |
+ // suggested by ECMA-262, 5th, section 15.10.4.1. |
+ if (source->length() == 0) source = isolate->factory()->query_colon_string(); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(Object, global, 2); |
+ if (!global->IsTrue()) global = isolate->factory()->false_value(); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3); |
+ if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value(); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4); |
+ if (!multiline->IsTrue()) multiline = isolate->factory()->false_value(); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5); |
+ if (!sticky->IsTrue()) sticky = isolate->factory()->false_value(); |
+ |
+ Map* map = regexp->map(); |
+ Object* constructor = map->constructor(); |
+ if (!FLAG_harmony_regexps && constructor->IsJSFunction() && |
+ JSFunction::cast(constructor)->initial_map() == map) { |
+ // If we still have the original map, set in-object properties directly. |
+ regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source); |
+ // Both true and false are immovable immortal objects so no need for write |
+ // barrier. |
+ regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global, |
+ SKIP_WRITE_BARRIER); |
+ regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase, |
+ SKIP_WRITE_BARRIER); |
+ regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline, |
+ SKIP_WRITE_BARRIER); |
+ regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex, |
+ Smi::FromInt(0), SKIP_WRITE_BARRIER); |
+ return *regexp; |
+ } |
+ |
+ // Map has changed, so use generic, but slower, method. We also end here if |
+ // the --harmony-regexp flag is set, because the initial map does not have |
+ // space for the 'sticky' flag, since it is from the snapshot, but must work |
+ // both with and without --harmony-regexp. When sticky comes out from under |
+ // the flag, we will be able to use the fast initial map. |
+ PropertyAttributes final = |
+ static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE); |
+ PropertyAttributes writable = |
+ static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE); |
+ Handle<Object> zero(Smi::FromInt(0), isolate); |
+ Factory* factory = isolate->factory(); |
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(), |
+ source, final).Check(); |
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(), |
+ global, final).Check(); |
+ JSObject::SetOwnPropertyIgnoreAttributes( |
+ regexp, factory->ignore_case_string(), ignoreCase, final).Check(); |
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(), |
+ multiline, final).Check(); |
+ if (FLAG_harmony_regexps) { |
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(), |
+ sticky, final).Check(); |
+ } |
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(), |
+ zero, writable).Check(); |
+ return *regexp; |
+} |
+ |
+ |
+RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) { |
+ HandleScope scope(isolate); |
+ DCHECK(args.length() == 4); |
+ CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0); |
+ CONVERT_SMI_ARG_CHECKED(index, 1); |
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2); |
+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 3); |
+ |
+ // Get the RegExp function from the context in the literals array. |
+ // This is the RegExp function from the context in which the |
+ // function was created. We do not use the RegExp function from the |
+ // current native context because this might be the RegExp function |
+ // from another context which we should not have access to. |
+ Handle<JSFunction> constructor = Handle<JSFunction>( |
+ JSFunction::NativeContextFromLiterals(*literals)->regexp_function()); |
+ // Compute the regular expression literal. |
+ Handle<Object> regexp; |
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
+ isolate, regexp, |
+ RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags)); |
+ literals->set(index, *regexp); |
+ return *regexp; |
+} |
+ |
+ |
+// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
+// separate last match info. See comment on that function. |
+template <bool has_capture> |
+static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, |
+ Handle<JSRegExp> regexp, |
+ Handle<JSArray> last_match_array, |
+ Handle<JSArray> result_array) { |
+ DCHECK(subject->IsFlat()); |
+ DCHECK_NE(has_capture, regexp->CaptureCount() == 0); |
+ |
+ int capture_count = regexp->CaptureCount(); |
+ int subject_length = subject->length(); |
+ |
+ static const int kMinLengthToCache = 0x1000; |
+ |
+ if (subject_length > kMinLengthToCache) { |
+ Handle<Object> cached_answer( |
+ RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(), |
+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES), |
+ isolate); |
+ if (*cached_answer != Smi::FromInt(0)) { |
+ Handle<FixedArray> cached_fixed_array = |
+ Handle<FixedArray>(FixedArray::cast(*cached_answer)); |
+ // The cache FixedArray is a COW-array and can therefore be reused. |
+ JSArray::SetContent(result_array, cached_fixed_array); |
+ // The actual length of the result array is stored in the last element of |
+ // the backing store (the backing FixedArray may have a larger capacity). |
+ Object* cached_fixed_array_last_element = |
+ cached_fixed_array->get(cached_fixed_array->length() - 1); |
+ Smi* js_array_length = Smi::cast(cached_fixed_array_last_element); |
+ result_array->set_length(js_array_length); |
+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
+ NULL); |
+ return *result_array; |
+ } |
+ } |
+ |
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ // Ensured in Runtime_RegExpExecMultiple. |
+ DCHECK(result_array->HasFastObjectElements()); |
+ Handle<FixedArray> result_elements( |
+ FixedArray::cast(result_array->elements())); |
+ if (result_elements->length() < 16) { |
+ result_elements = isolate->factory()->NewFixedArrayWithHoles(16); |
+ } |
+ |
+ FixedArrayBuilder builder(result_elements); |
+ |
+ // Position to search from. |
+ int match_start = -1; |
+ int match_end = 0; |
+ bool first = true; |
+ |
+ // Two smis before and after the match, for very long strings. |
+ static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
+ |
+ while (true) { |
+ int32_t* current_match = global_cache.FetchNext(); |
+ if (current_match == NULL) break; |
+ match_start = current_match[0]; |
+ builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
+ if (match_end < match_start) { |
+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
+ match_start); |
+ } |
+ match_end = current_match[1]; |
+ { |
+ // Avoid accumulating new handles inside loop. |
+ HandleScope temp_scope(isolate); |
+ Handle<String> match; |
+ if (!first) { |
+ match = isolate->factory()->NewProperSubString(subject, match_start, |
+ match_end); |
+ } else { |
+ match = |
+ isolate->factory()->NewSubString(subject, match_start, match_end); |
+ first = false; |
+ } |
+ |
+ if (has_capture) { |
+ // Arguments array to replace function is match, captures, index and |
+ // subject, i.e., 3 + capture count in total. |
+ Handle<FixedArray> elements = |
+ isolate->factory()->NewFixedArray(3 + capture_count); |
+ |
+ elements->set(0, *match); |
+ for (int i = 1; i <= capture_count; i++) { |
+ int start = current_match[i * 2]; |
+ if (start >= 0) { |
+ int end = current_match[i * 2 + 1]; |
+ DCHECK(start <= end); |
+ Handle<String> substring = |
+ isolate->factory()->NewSubString(subject, start, end); |
+ elements->set(i, *substring); |
+ } else { |
+ DCHECK(current_match[i * 2 + 1] < 0); |
+ elements->set(i, isolate->heap()->undefined_value()); |
+ } |
+ } |
+ elements->set(capture_count + 1, Smi::FromInt(match_start)); |
+ elements->set(capture_count + 2, *subject); |
+ builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
+ } else { |
+ builder.Add(*match); |
+ } |
+ } |
+ } |
+ |
+ if (global_cache.HasException()) return isolate->heap()->exception(); |
+ |
+ if (match_start >= 0) { |
+ // Finished matching, with at least one match. |
+ if (match_end < subject_length) { |
+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
+ subject_length); |
+ } |
+ |
+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
+ NULL); |
+ |
+ if (subject_length > kMinLengthToCache) { |
+ // Store the length of the result array into the last element of the |
+ // backing FixedArray. |
+ builder.EnsureCapacity(1); |
+ Handle<FixedArray> fixed_array = builder.array(); |
+ fixed_array->set(fixed_array->length() - 1, |
+ Smi::FromInt(builder.length())); |
+ // Cache the result and turn the FixedArray into a COW array. |
+ RegExpResultsCache::Enter(isolate, subject, |
+ handle(regexp->data(), isolate), fixed_array, |
+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
+ } |
+ return *builder.ToJSArray(result_array); |
+ } else { |
+ return isolate->heap()->null_value(); // No matches at all. |
+ } |
+} |
+ |
+ |
+// This is only called for StringReplaceGlobalRegExpWithFunction. This sets |
+// lastMatchInfoOverride to maintain the last match info, so we don't need to |
+// set any other last match array info. |
+RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { |
+ HandleScope handles(isolate); |
+ DCHECK(args.length() == 4); |
+ |
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); |
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
+ RUNTIME_ASSERT(result_array->HasFastObjectElements()); |
+ |
+ subject = String::Flatten(subject); |
+ RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
+ |
+ if (regexp->CaptureCount() == 0) { |
+ return SearchRegExpMultiple<false>(isolate, subject, regexp, |
+ last_match_info, result_array); |
+ } else { |
+ return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, |
+ result_array); |
+ } |
+} |
+ |
+ |
+RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) { |
+ SealHandleScope shs(isolate); |
+ return __RT_impl_Runtime_RegExpConstructResult(args, isolate); |
+} |
+ |
+ |
+RUNTIME_FUNCTION(RuntimeReference_RegExpExec) { |
+ SealHandleScope shs(isolate); |
+ return __RT_impl_Runtime_RegExpExecRT(args, isolate); |
+} |
+ |
+ |
+// Perform string match of pattern on subject, starting at start index. |
+// Caller must ensure that 0 <= start_index <= sub->length(), |
+// and should check that pat->length() + start_index <= sub->length(). |
+int Runtime::StringMatch(Isolate* isolate, Handle<String> sub, |
+ Handle<String> pat, int start_index) { |
+ DCHECK(0 <= start_index); |
+ DCHECK(start_index <= sub->length()); |
+ |
+ int pattern_length = pat->length(); |
+ if (pattern_length == 0) return start_index; |
+ |
+ int subject_length = sub->length(); |
+ if (start_index + pattern_length > subject_length) return -1; |
+ |
+ sub = String::Flatten(sub); |
+ pat = String::Flatten(pat); |
+ |
+ DisallowHeapAllocation no_gc; // ensure vectors stay valid |
+ // Extract flattened substrings of cons strings before getting encoding. |
+ String::FlatContent seq_sub = sub->GetFlatContent(); |
+ String::FlatContent seq_pat = pat->GetFlatContent(); |
+ |
+ // dispatch on type of strings |
+ if (seq_pat.IsOneByte()) { |
+ Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector(); |
+ if (seq_sub.IsOneByte()) { |
+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector, |
+ start_index); |
+ } |
+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, |
+ start_index); |
+ } |
+ Vector<const uc16> pat_vector = seq_pat.ToUC16Vector(); |
+ if (seq_sub.IsOneByte()) { |
+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector, |
+ start_index); |
+ } |
+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index); |
+} |
+} |
+} // namespace v8::internal |