Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(972)

Unified Diff: src/runtime/runtime-regexp.cc

Issue 604703004: Move string-related runtime functions into separate files. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/runtime/runtime-json.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime/runtime-regexp.cc
diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e96d50195a80aaa8233ed3da9b89282bfd0aa469
--- /dev/null
+++ b/src/runtime/runtime-regexp.cc
@@ -0,0 +1,1131 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/v8.h"
+
+#include "src/arguments.h"
+#include "src/jsregexp-inl.h"
+#include "src/jsregexp.h"
+#include "src/runtime/runtime.h"
+#include "src/runtime/runtime-utils.h"
+#include "src/runtime/string-builder.h"
+#include "src/string-search.h"
+
+namespace v8 {
+namespace internal {
+
+class CompiledReplacement {
+ public:
+ explicit CompiledReplacement(Zone* zone)
+ : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
+
+ // Return whether the replacement is simple.
+ bool Compile(Handle<String> replacement, int capture_count,
+ int subject_length);
+
+ // Use Apply only if Compile returned false.
+ void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
+ int32_t* match);
+
+ // Number of distinct parts of the replacement pattern.
+ int parts() { return parts_.length(); }
+
+ Zone* zone() const { return zone_; }
+
+ private:
+ enum PartType {
+ SUBJECT_PREFIX = 1,
+ SUBJECT_SUFFIX,
+ SUBJECT_CAPTURE,
+ REPLACEMENT_SUBSTRING,
+ REPLACEMENT_STRING,
+ NUMBER_OF_PART_TYPES
+ };
+
+ struct ReplacementPart {
+ static inline ReplacementPart SubjectMatch() {
+ return ReplacementPart(SUBJECT_CAPTURE, 0);
+ }
+ static inline ReplacementPart SubjectCapture(int capture_index) {
+ return ReplacementPart(SUBJECT_CAPTURE, capture_index);
+ }
+ static inline ReplacementPart SubjectPrefix() {
+ return ReplacementPart(SUBJECT_PREFIX, 0);
+ }
+ static inline ReplacementPart SubjectSuffix(int subject_length) {
+ return ReplacementPart(SUBJECT_SUFFIX, subject_length);
+ }
+ static inline ReplacementPart ReplacementString() {
+ return ReplacementPart(REPLACEMENT_STRING, 0);
+ }
+ static inline ReplacementPart ReplacementSubString(int from, int to) {
+ DCHECK(from >= 0);
+ DCHECK(to > from);
+ return ReplacementPart(-from, to);
+ }
+
+ // If tag <= 0 then it is the negation of a start index of a substring of
+ // the replacement pattern, otherwise it's a value from PartType.
+ ReplacementPart(int tag, int data) : tag(tag), data(data) {
+ // Must be non-positive or a PartType value.
+ DCHECK(tag < NUMBER_OF_PART_TYPES);
+ }
+ // Either a value of PartType or a non-positive number that is
+ // the negation of an index into the replacement string.
+ int tag;
+ // The data value's interpretation depends on the value of tag:
+ // tag == SUBJECT_PREFIX ||
+ // tag == SUBJECT_SUFFIX: data is unused.
+ // tag == SUBJECT_CAPTURE: data is the number of the capture.
+ // tag == REPLACEMENT_SUBSTRING ||
+ // tag == REPLACEMENT_STRING: data is index into array of substrings
+ // of the replacement string.
+ // tag <= 0: Temporary representation of the substring of the replacement
+ // string ranging over -tag .. data.
+ // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
+ // substring objects.
+ int data;
+ };
+
+ template <typename Char>
+ bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
+ Vector<Char> characters, int capture_count,
+ int subject_length, Zone* zone) {
+ int length = characters.length();
+ int last = 0;
+ for (int i = 0; i < length; i++) {
+ Char c = characters[i];
+ if (c == '$') {
+ int next_index = i + 1;
+ if (next_index == length) { // No next character!
+ break;
+ }
+ Char c2 = characters[next_index];
+ switch (c2) {
+ case '$':
+ if (i > last) {
+ // There is a substring before. Include the first "$".
+ parts->Add(
+ ReplacementPart::ReplacementSubString(last, next_index),
+ zone);
+ last = next_index + 1; // Continue after the second "$".
+ } else {
+ // Let the next substring start with the second "$".
+ last = next_index;
+ }
+ i = next_index;
+ break;
+ case '`':
+ if (i > last) {
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
+ }
+ parts->Add(ReplacementPart::SubjectPrefix(), zone);
+ i = next_index;
+ last = i + 1;
+ break;
+ case '\'':
+ if (i > last) {
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
+ }
+ parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
+ i = next_index;
+ last = i + 1;
+ break;
+ case '&':
+ if (i > last) {
+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
+ }
+ parts->Add(ReplacementPart::SubjectMatch(), zone);
+ i = next_index;
+ last = i + 1;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ int capture_ref = c2 - '0';
+ if (capture_ref > capture_count) {
+ i = next_index;
+ continue;
+ }
+ int second_digit_index = next_index + 1;
+ if (second_digit_index < length) {
+ // Peek ahead to see if we have two digits.
+ Char c3 = characters[second_digit_index];
+ if ('0' <= c3 && c3 <= '9') { // Double digits.
+ int double_digit_ref = capture_ref * 10 + c3 - '0';
+ if (double_digit_ref <= capture_count) {
+ next_index = second_digit_index;
+ capture_ref = double_digit_ref;
+ }
+ }
+ }
+ if (capture_ref > 0) {
+ if (i > last) {
+ parts->Add(ReplacementPart::ReplacementSubString(last, i),
+ zone);
+ }
+ DCHECK(capture_ref <= capture_count);
+ parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
+ last = next_index + 1;
+ }
+ i = next_index;
+ break;
+ }
+ default:
+ i = next_index;
+ break;
+ }
+ }
+ }
+ if (length > last) {
+ if (last == 0) {
+ // Replacement is simple. Do not use Apply to do the replacement.
+ return true;
+ } else {
+ parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
+ }
+ }
+ return false;
+ }
+
+ ZoneList<ReplacementPart> parts_;
+ ZoneList<Handle<String> > replacement_substrings_;
+ Zone* zone_;
+};
+
+
+bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
+ int subject_length) {
+ {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent content = replacement->GetFlatContent();
+ DCHECK(content.IsFlat());
+ bool simple = false;
+ if (content.IsOneByte()) {
+ simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
+ capture_count, subject_length, zone());
+ } else {
+ DCHECK(content.IsTwoByte());
+ simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
+ capture_count, subject_length, zone());
+ }
+ if (simple) return true;
+ }
+
+ Isolate* isolate = replacement->GetIsolate();
+ // Find substrings of replacement string and create them as String objects.
+ int substring_index = 0;
+ for (int i = 0, n = parts_.length(); i < n; i++) {
+ int tag = parts_[i].tag;
+ if (tag <= 0) { // A replacement string slice.
+ int from = -tag;
+ int to = parts_[i].data;
+ replacement_substrings_.Add(
+ isolate->factory()->NewSubString(replacement, from, to), zone());
+ parts_[i].tag = REPLACEMENT_SUBSTRING;
+ parts_[i].data = substring_index;
+ substring_index++;
+ } else if (tag == REPLACEMENT_STRING) {
+ replacement_substrings_.Add(replacement, zone());
+ parts_[i].data = substring_index;
+ substring_index++;
+ }
+ }
+ return false;
+}
+
+
+void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
+ int match_from, int match_to, int32_t* match) {
+ DCHECK_LT(0, parts_.length());
+ for (int i = 0, n = parts_.length(); i < n; i++) {
+ ReplacementPart part = parts_[i];
+ switch (part.tag) {
+ case SUBJECT_PREFIX:
+ if (match_from > 0) builder->AddSubjectSlice(0, match_from);
+ break;
+ case SUBJECT_SUFFIX: {
+ int subject_length = part.data;
+ if (match_to < subject_length) {
+ builder->AddSubjectSlice(match_to, subject_length);
+ }
+ break;
+ }
+ case SUBJECT_CAPTURE: {
+ int capture = part.data;
+ int from = match[capture * 2];
+ int to = match[capture * 2 + 1];
+ if (from >= 0 && to > from) {
+ builder->AddSubjectSlice(from, to);
+ }
+ break;
+ }
+ case REPLACEMENT_SUBSTRING:
+ case REPLACEMENT_STRING:
+ builder->AddString(replacement_substrings_[part.data]);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+}
+
+
+void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern,
+ ZoneList<int>* indices, unsigned int limit,
+ Zone* zone) {
+ DCHECK(limit > 0);
+ // Collect indices of pattern in subject using memchr.
+ // Stop after finding at most limit values.
+ const uint8_t* subject_start = subject.start();
+ const uint8_t* subject_end = subject_start + subject.length();
+ const uint8_t* pos = subject_start;
+ while (limit > 0) {
+ pos = reinterpret_cast<const uint8_t*>(
+ memchr(pos, pattern, subject_end - pos));
+ if (pos == NULL) return;
+ indices->Add(static_cast<int>(pos - subject_start), zone);
+ pos++;
+ limit--;
+ }
+}
+
+
+void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
+ ZoneList<int>* indices, unsigned int limit,
+ Zone* zone) {
+ DCHECK(limit > 0);
+ const uc16* subject_start = subject.start();
+ const uc16* subject_end = subject_start + subject.length();
+ for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
+ if (*pos == pattern) {
+ indices->Add(static_cast<int>(pos - subject_start), zone);
+ limit--;
+ }
+ }
+}
+
+
+template <typename SubjectChar, typename PatternChar>
+void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
+ Vector<const PatternChar> pattern,
+ ZoneList<int>* indices, unsigned int limit, Zone* zone) {
+ DCHECK(limit > 0);
+ // Collect indices of pattern in subject.
+ // Stop after finding at most limit values.
+ int pattern_length = pattern.length();
+ int index = 0;
+ StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
+ while (limit > 0) {
+ index = search.Search(subject, index);
+ if (index < 0) return;
+ indices->Add(index, zone);
+ index += pattern_length;
+ limit--;
+ }
+}
+
+
+void FindStringIndicesDispatch(Isolate* isolate, String* subject,
+ String* pattern, ZoneList<int>* indices,
+ unsigned int limit, Zone* zone) {
+ {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent subject_content = subject->GetFlatContent();
+ String::FlatContent pattern_content = pattern->GetFlatContent();
+ DCHECK(subject_content.IsFlat());
+ DCHECK(pattern_content.IsFlat());
+ if (subject_content.IsOneByte()) {
+ Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
+ if (pattern_content.IsOneByte()) {
+ Vector<const uint8_t> pattern_vector =
+ pattern_content.ToOneByteVector();
+ if (pattern_vector.length() == 1) {
+ FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
+ limit, zone);
+ } else {
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,
+ limit, zone);
+ }
+ } else {
+ FindStringIndices(isolate, subject_vector,
+ pattern_content.ToUC16Vector(), indices, limit, zone);
+ }
+ } else {
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
+ if (pattern_content.IsOneByte()) {
+ Vector<const uint8_t> pattern_vector =
+ pattern_content.ToOneByteVector();
+ if (pattern_vector.length() == 1) {
+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
+ limit, zone);
+ } else {
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,
+ limit, zone);
+ }
+ } else {
+ Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
+ if (pattern_vector.length() == 1) {
+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
+ limit, zone);
+ } else {
+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,
+ limit, zone);
+ }
+ }
+ }
+ }
+}
+
+
+template <typename ResultSeqString>
+MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
+ Handle<String> replacement, Handle<JSArray> last_match_info) {
+ DCHECK(subject->IsFlat());
+ DCHECK(replacement->IsFlat());
+
+ ZoneScope zone_scope(isolate->runtime_zone());
+ ZoneList<int> indices(8, zone_scope.zone());
+ DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
+ String* pattern =
+ String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
+ int subject_len = subject->length();
+ int pattern_len = pattern->length();
+ int replacement_len = replacement->length();
+
+ FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
+ zone_scope.zone());
+
+ int matches = indices.length();
+ if (matches == 0) return *subject;
+
+ // Detect integer overflow.
+ int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
+ static_cast<int64_t>(pattern_len)) *
+ static_cast<int64_t>(matches) +
+ static_cast<int64_t>(subject_len);
+ int result_len;
+ if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
+ STATIC_ASSERT(String::kMaxLength < kMaxInt);
+ result_len = kMaxInt; // Provoke exception.
+ } else {
+ result_len = static_cast<int>(result_len_64);
+ }
+
+ int subject_pos = 0;
+ int result_pos = 0;
+
+ MaybeHandle<SeqString> maybe_res;
+ if (ResultSeqString::kHasOneByteEncoding) {
+ maybe_res = isolate->factory()->NewRawOneByteString(result_len);
+ } else {
+ maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
+ }
+ Handle<SeqString> untyped_res;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
+ Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
+
+ for (int i = 0; i < matches; i++) {
+ // Copy non-matched subject content.
+ if (subject_pos < indices.at(i)) {
+ String::WriteToFlat(*subject, result->GetChars() + result_pos,
+ subject_pos, indices.at(i));
+ result_pos += indices.at(i) - subject_pos;
+ }
+
+ // Replace match.
+ if (replacement_len > 0) {
+ String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
+ replacement_len);
+ result_pos += replacement_len;
+ }
+
+ subject_pos = indices.at(i) + pattern_len;
+ }
+ // Add remaining subject content at the end.
+ if (subject_pos < subject_len) {
+ String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
+ subject_len);
+ }
+
+ int32_t match_indices[] = {indices.at(matches - 1),
+ indices.at(matches - 1) + pattern_len};
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
+
+ return *result;
+}
+
+
+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
+ Handle<String> replacement, Handle<JSArray> last_match_info) {
+ DCHECK(subject->IsFlat());
+ DCHECK(replacement->IsFlat());
+
+ int capture_count = regexp->CaptureCount();
+ int subject_length = subject->length();
+
+ // CompiledReplacement uses zone allocation.
+ ZoneScope zone_scope(isolate->runtime_zone());
+ CompiledReplacement compiled_replacement(zone_scope.zone());
+ bool simple_replace =
+ compiled_replacement.Compile(replacement, capture_count, subject_length);
+
+ // Shortcut for simple non-regexp global replacements
+ if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
+ if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
+ isolate, subject, regexp, replacement, last_match_info);
+ } else {
+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
+ isolate, subject, regexp, replacement, last_match_info);
+ }
+ }
+
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ int32_t* current_match = global_cache.FetchNext();
+ if (current_match == NULL) {
+ if (global_cache.HasException()) return isolate->heap()->exception();
+ return *subject;
+ }
+
+ // Guessing the number of parts that the final result string is built
+ // from. Global regexps can match any number of times, so we guess
+ // conservatively.
+ int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
+ ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
+
+ // Number of parts added by compiled replacement plus preceeding
+ // string and possibly suffix after last match. It is possible for
+ // all components to use two elements when encoded as two smis.
+ const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
+
+ int prev = 0;
+
+ do {
+ builder.EnsureCapacity(parts_added_per_loop);
+
+ int start = current_match[0];
+ int end = current_match[1];
+
+ if (prev < start) {
+ builder.AddSubjectSlice(prev, start);
+ }
+
+ if (simple_replace) {
+ builder.AddString(replacement);
+ } else {
+ compiled_replacement.Apply(&builder, start, end, current_match);
+ }
+ prev = end;
+
+ current_match = global_cache.FetchNext();
+ } while (current_match != NULL);
+
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ if (prev < subject_length) {
+ builder.EnsureCapacity(2);
+ builder.AddSubjectSlice(prev, subject_length);
+ }
+
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
+ global_cache.LastSuccessfulMatch());
+
+ Handle<String> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
+ return *result;
+}
+
+
+template <typename ResultSeqString>
+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
+ Handle<JSArray> last_match_info) {
+ DCHECK(subject->IsFlat());
+
+ // Shortcut for simple non-regexp global replacements
+ if (regexp->TypeTag() == JSRegExp::ATOM) {
+ Handle<String> empty_string = isolate->factory()->empty_string();
+ if (subject->IsOneByteRepresentation()) {
+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
+ isolate, subject, regexp, empty_string, last_match_info);
+ } else {
+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
+ isolate, subject, regexp, empty_string, last_match_info);
+ }
+ }
+
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ int32_t* current_match = global_cache.FetchNext();
+ if (current_match == NULL) {
+ if (global_cache.HasException()) return isolate->heap()->exception();
+ return *subject;
+ }
+
+ int start = current_match[0];
+ int end = current_match[1];
+ int capture_count = regexp->CaptureCount();
+ int subject_length = subject->length();
+
+ int new_length = subject_length - (end - start);
+ if (new_length == 0) return isolate->heap()->empty_string();
+
+ Handle<ResultSeqString> answer;
+ if (ResultSeqString::kHasOneByteEncoding) {
+ answer = Handle<ResultSeqString>::cast(
+ isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
+ } else {
+ answer = Handle<ResultSeqString>::cast(
+ isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
+ }
+
+ int prev = 0;
+ int position = 0;
+
+ do {
+ start = current_match[0];
+ end = current_match[1];
+ if (prev < start) {
+ // Add substring subject[prev;start] to answer string.
+ String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
+ position += start - prev;
+ }
+ prev = end;
+
+ current_match = global_cache.FetchNext();
+ } while (current_match != NULL);
+
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
+ global_cache.LastSuccessfulMatch());
+
+ if (prev < subject_length) {
+ // Add substring subject[prev;length] to answer string.
+ String::WriteToFlat(*subject, answer->GetChars() + position, prev,
+ subject_length);
+ position += subject_length - prev;
+ }
+
+ if (position == 0) return isolate->heap()->empty_string();
+
+ // Shorten string and fill
+ int string_size = ResultSeqString::SizeFor(position);
+ int allocated_string_size = ResultSeqString::SizeFor(new_length);
+ int delta = allocated_string_size - string_size;
+
+ answer->set_length(position);
+ if (delta == 0) return *answer;
+
+ Address end_of_string = answer->address() + string_size;
+ Heap* heap = isolate->heap();
+
+ // The trimming is performed on a newly allocated object, which is on a
+ // fresly allocated page or on an already swept page. Hence, the sweeper
+ // thread can not get confused with the filler creation. No synchronization
+ // needed.
+ heap->CreateFillerObjectAt(end_of_string, delta);
+ heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);
+ return *answer;
+}
+
+
+RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 4);
+
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
+ CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
+
+ RUNTIME_ASSERT(regexp->GetFlags().is_global());
+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
+
+ subject = String::Flatten(subject);
+
+ if (replacement->length() == 0) {
+ if (subject->HasOnlyOneByteChars()) {
+ return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
+ isolate, subject, regexp, last_match_info);
+ } else {
+ return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
+ isolate, subject, regexp, last_match_info);
+ }
+ }
+
+ replacement = String::Flatten(replacement);
+
+ return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
+ replacement, last_match_info);
+}
+
+
+RUNTIME_FUNCTION(Runtime_StringSplit) {
+ HandleScope handle_scope(isolate);
+ DCHECK(args.length() == 3);
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
+ CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
+ RUNTIME_ASSERT(limit > 0);
+
+ int subject_length = subject->length();
+ int pattern_length = pattern->length();
+ RUNTIME_ASSERT(pattern_length > 0);
+
+ if (limit == 0xffffffffu) {
+ Handle<Object> cached_answer(
+ RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
+ isolate);
+ if (*cached_answer != Smi::FromInt(0)) {
+ // The cache FixedArray is a COW-array and can therefore be reused.
+ Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
+ Handle<FixedArray>::cast(cached_answer));
+ return *result;
+ }
+ }
+
+ // The limit can be very large (0xffffffffu), but since the pattern
+ // isn't empty, we can never create more parts than ~half the length
+ // of the subject.
+
+ subject = String::Flatten(subject);
+ pattern = String::Flatten(pattern);
+
+ static const int kMaxInitialListCapacity = 16;
+
+ ZoneScope zone_scope(isolate->runtime_zone());
+
+ // Find (up to limit) indices of separator and end-of-string in subject
+ int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
+ ZoneList<int> indices(initial_capacity, zone_scope.zone());
+
+ FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
+ zone_scope.zone());
+
+ if (static_cast<uint32_t>(indices.length()) < limit) {
+ indices.Add(subject_length, zone_scope.zone());
+ }
+
+ // The list indices now contains the end of each part to create.
+
+ // Create JSArray of substrings separated by separator.
+ int part_count = indices.length();
+
+ Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
+ JSObject::EnsureCanContainHeapObjectElements(result);
+ result->set_length(Smi::FromInt(part_count));
+
+ DCHECK(result->HasFastObjectElements());
+
+ if (part_count == 1 && indices.at(0) == subject_length) {
+ FixedArray::cast(result->elements())->set(0, *subject);
+ return *result;
+ }
+
+ Handle<FixedArray> elements(FixedArray::cast(result->elements()));
+ int part_start = 0;
+ for (int i = 0; i < part_count; i++) {
+ HandleScope local_loop_handle(isolate);
+ int part_end = indices.at(i);
+ Handle<String> substring =
+ isolate->factory()->NewProperSubString(subject, part_start, part_end);
+ elements->set(i, *substring);
+ part_start = part_end + pattern_length;
+ }
+
+ if (limit == 0xffffffffu) {
+ if (result->HasFastObjectElements()) {
+ RegExpResultsCache::Enter(isolate, subject, pattern, elements,
+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
+ }
+ }
+
+ return *result;
+}
+
+
+RUNTIME_FUNCTION(Runtime_RegExpCompile) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 3);
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0);
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
+ Handle<Object> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result,
+ RegExpImpl::Compile(re, pattern, flags));
+ return *result;
+}
+
+
+RUNTIME_FUNCTION(Runtime_RegExpExecRT) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 4);
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
+ CONVERT_INT32_ARG_CHECKED(index, 2);
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
+ // Due to the way the JS calls are constructed this must be less than the
+ // length of a string, i.e. it is always a Smi. We check anyway for security.
+ RUNTIME_ASSERT(index >= 0);
+ RUNTIME_ASSERT(index <= subject->length());
+ isolate->counters()->regexp_entry_runtime()->Increment();
+ Handle<Object> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result,
+ RegExpImpl::Exec(regexp, subject, index, last_match_info));
+ return *result;
+}
+
+
+RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
+ HandleScope handle_scope(isolate);
+ DCHECK(args.length() == 3);
+ CONVERT_SMI_ARG_CHECKED(size, 0);
+ RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
+ CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
+ CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
+ Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
+ Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
+ Handle<JSObject> object =
+ isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);
+ Handle<JSArray> array = Handle<JSArray>::cast(object);
+ array->set_elements(*elements);
+ array->set_length(Smi::FromInt(size));
+ // Write in-object properties after the length of the array.
+ array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
+ array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
+ return *array;
+}
+
+
+RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 6);
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
+ CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
+ // If source is the empty string we set it to "(?:)" instead as
+ // suggested by ECMA-262, 5th, section 15.10.4.1.
+ if (source->length() == 0) source = isolate->factory()->query_colon_string();
+
+ CONVERT_ARG_HANDLE_CHECKED(Object, global, 2);
+ if (!global->IsTrue()) global = isolate->factory()->false_value();
+
+ CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3);
+ if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value();
+
+ CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4);
+ if (!multiline->IsTrue()) multiline = isolate->factory()->false_value();
+
+ CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5);
+ if (!sticky->IsTrue()) sticky = isolate->factory()->false_value();
+
+ Map* map = regexp->map();
+ Object* constructor = map->constructor();
+ if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&
+ JSFunction::cast(constructor)->initial_map() == map) {
+ // If we still have the original map, set in-object properties directly.
+ regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source);
+ // Both true and false are immovable immortal objects so no need for write
+ // barrier.
+ regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,
+ SKIP_WRITE_BARRIER);
+ regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase,
+ SKIP_WRITE_BARRIER);
+ regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,
+ SKIP_WRITE_BARRIER);
+ regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,
+ Smi::FromInt(0), SKIP_WRITE_BARRIER);
+ return *regexp;
+ }
+
+ // Map has changed, so use generic, but slower, method. We also end here if
+ // the --harmony-regexp flag is set, because the initial map does not have
+ // space for the 'sticky' flag, since it is from the snapshot, but must work
+ // both with and without --harmony-regexp. When sticky comes out from under
+ // the flag, we will be able to use the fast initial map.
+ PropertyAttributes final =
+ static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
+ PropertyAttributes writable =
+ static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
+ Handle<Object> zero(Smi::FromInt(0), isolate);
+ Factory* factory = isolate->factory();
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(),
+ source, final).Check();
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),
+ global, final).Check();
+ JSObject::SetOwnPropertyIgnoreAttributes(
+ regexp, factory->ignore_case_string(), ignoreCase, final).Check();
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(),
+ multiline, final).Check();
+ if (FLAG_harmony_regexps) {
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),
+ sticky, final).Check();
+ }
+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(),
+ zero, writable).Check();
+ return *regexp;
+}
+
+
+RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 4);
+ CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);
+ CONVERT_SMI_ARG_CHECKED(index, 1);
+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);
+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);
+
+ // Get the RegExp function from the context in the literals array.
+ // This is the RegExp function from the context in which the
+ // function was created. We do not use the RegExp function from the
+ // current native context because this might be the RegExp function
+ // from another context which we should not have access to.
+ Handle<JSFunction> constructor = Handle<JSFunction>(
+ JSFunction::NativeContextFromLiterals(*literals)->regexp_function());
+ // Compute the regular expression literal.
+ Handle<Object> regexp;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, regexp,
+ RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));
+ literals->set(index, *regexp);
+ return *regexp;
+}
+
+
+// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
+// separate last match info. See comment on that function.
+template <bool has_capture>
+static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
+ Handle<JSRegExp> regexp,
+ Handle<JSArray> last_match_array,
+ Handle<JSArray> result_array) {
+ DCHECK(subject->IsFlat());
+ DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
+
+ int capture_count = regexp->CaptureCount();
+ int subject_length = subject->length();
+
+ static const int kMinLengthToCache = 0x1000;
+
+ if (subject_length > kMinLengthToCache) {
+ Handle<Object> cached_answer(
+ RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
+ isolate);
+ if (*cached_answer != Smi::FromInt(0)) {
+ Handle<FixedArray> cached_fixed_array =
+ Handle<FixedArray>(FixedArray::cast(*cached_answer));
+ // The cache FixedArray is a COW-array and can therefore be reused.
+ JSArray::SetContent(result_array, cached_fixed_array);
+ // The actual length of the result array is stored in the last element of
+ // the backing store (the backing FixedArray may have a larger capacity).
+ Object* cached_fixed_array_last_element =
+ cached_fixed_array->get(cached_fixed_array->length() - 1);
+ Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
+ result_array->set_length(js_array_length);
+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
+ NULL);
+ return *result_array;
+ }
+ }
+
+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ // Ensured in Runtime_RegExpExecMultiple.
+ DCHECK(result_array->HasFastObjectElements());
+ Handle<FixedArray> result_elements(
+ FixedArray::cast(result_array->elements()));
+ if (result_elements->length() < 16) {
+ result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
+ }
+
+ FixedArrayBuilder builder(result_elements);
+
+ // Position to search from.
+ int match_start = -1;
+ int match_end = 0;
+ bool first = true;
+
+ // Two smis before and after the match, for very long strings.
+ static const int kMaxBuilderEntriesPerRegExpMatch = 5;
+
+ while (true) {
+ int32_t* current_match = global_cache.FetchNext();
+ if (current_match == NULL) break;
+ match_start = current_match[0];
+ builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
+ match_start);
+ }
+ match_end = current_match[1];
+ {
+ // Avoid accumulating new handles inside loop.
+ HandleScope temp_scope(isolate);
+ Handle<String> match;
+ if (!first) {
+ match = isolate->factory()->NewProperSubString(subject, match_start,
+ match_end);
+ } else {
+ match =
+ isolate->factory()->NewSubString(subject, match_start, match_end);
+ first = false;
+ }
+
+ if (has_capture) {
+ // Arguments array to replace function is match, captures, index and
+ // subject, i.e., 3 + capture count in total.
+ Handle<FixedArray> elements =
+ isolate->factory()->NewFixedArray(3 + capture_count);
+
+ elements->set(0, *match);
+ for (int i = 1; i <= capture_count; i++) {
+ int start = current_match[i * 2];
+ if (start >= 0) {
+ int end = current_match[i * 2 + 1];
+ DCHECK(start <= end);
+ Handle<String> substring =
+ isolate->factory()->NewSubString(subject, start, end);
+ elements->set(i, *substring);
+ } else {
+ DCHECK(current_match[i * 2 + 1] < 0);
+ elements->set(i, isolate->heap()->undefined_value());
+ }
+ }
+ elements->set(capture_count + 1, Smi::FromInt(match_start));
+ elements->set(capture_count + 2, *subject);
+ builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
+ } else {
+ builder.Add(*match);
+ }
+ }
+ }
+
+ if (global_cache.HasException()) return isolate->heap()->exception();
+
+ if (match_start >= 0) {
+ // Finished matching, with at least one match.
+ if (match_end < subject_length) {
+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
+ subject_length);
+ }
+
+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
+ NULL);
+
+ if (subject_length > kMinLengthToCache) {
+ // Store the length of the result array into the last element of the
+ // backing FixedArray.
+ builder.EnsureCapacity(1);
+ Handle<FixedArray> fixed_array = builder.array();
+ fixed_array->set(fixed_array->length() - 1,
+ Smi::FromInt(builder.length()));
+ // Cache the result and turn the FixedArray into a COW array.
+ RegExpResultsCache::Enter(isolate, subject,
+ handle(regexp->data(), isolate), fixed_array,
+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
+ }
+ return *builder.ToJSArray(result_array);
+ } else {
+ return isolate->heap()->null_value(); // No matches at all.
+ }
+}
+
+
+// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
+// lastMatchInfoOverride to maintain the last match info, so we don't need to
+// set any other last match array info.
+RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
+ HandleScope handles(isolate);
+ DCHECK(args.length() == 4);
+
+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
+ CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
+ RUNTIME_ASSERT(result_array->HasFastObjectElements());
+
+ subject = String::Flatten(subject);
+ RUNTIME_ASSERT(regexp->GetFlags().is_global());
+
+ if (regexp->CaptureCount() == 0) {
+ return SearchRegExpMultiple<false>(isolate, subject, regexp,
+ last_match_info, result_array);
+ } else {
+ return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
+ result_array);
+ }
+}
+
+
+RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {
+ SealHandleScope shs(isolate);
+ return __RT_impl_Runtime_RegExpConstructResult(args, isolate);
+}
+
+
+RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {
+ SealHandleScope shs(isolate);
+ return __RT_impl_Runtime_RegExpExecRT(args, isolate);
+}
+
+
+// Perform string match of pattern on subject, starting at start index.
+// Caller must ensure that 0 <= start_index <= sub->length(),
+// and should check that pat->length() + start_index <= sub->length().
+int Runtime::StringMatch(Isolate* isolate, Handle<String> sub,
+ Handle<String> pat, int start_index) {
+ DCHECK(0 <= start_index);
+ DCHECK(start_index <= sub->length());
+
+ int pattern_length = pat->length();
+ if (pattern_length == 0) return start_index;
+
+ int subject_length = sub->length();
+ if (start_index + pattern_length > subject_length) return -1;
+
+ sub = String::Flatten(sub);
+ pat = String::Flatten(pat);
+
+ DisallowHeapAllocation no_gc; // ensure vectors stay valid
+ // Extract flattened substrings of cons strings before getting encoding.
+ String::FlatContent seq_sub = sub->GetFlatContent();
+ String::FlatContent seq_pat = pat->GetFlatContent();
+
+ // dispatch on type of strings
+ if (seq_pat.IsOneByte()) {
+ Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();
+ if (seq_sub.IsOneByte()) {
+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
+ start_index);
+ }
+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector,
+ start_index);
+ }
+ Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
+ if (seq_sub.IsOneByte()) {
+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,
+ start_index);
+ }
+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index);
+}
+}
+} // namespace v8::internal
« no previous file with comments | « src/runtime/runtime-json.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698