Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: src/runtime.cc

Issue 10831126: Take advantage of batched results when matching global regexp. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2556 matching lines...) Expand 10 before | Expand all | Expand 10 after
2567 FixedArrayBuilder array_builder_; 2567 FixedArrayBuilder array_builder_;
2568 Handle<String> subject_; 2568 Handle<String> subject_;
2569 int character_count_; 2569 int character_count_;
2570 bool is_ascii_; 2570 bool is_ascii_;
2571 }; 2571 };
2572 2572
2573 2573
2574 class CompiledReplacement { 2574 class CompiledReplacement {
2575 public: 2575 public:
2576 explicit CompiledReplacement(Zone* zone) 2576 explicit CompiledReplacement(Zone* zone)
2577 : parts_(1, zone), replacement_substrings_(0, zone), 2577 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
2578 simple_hint_(false),
2579 zone_(zone) {}
2580 2578
2581 void Compile(Handle<String> replacement, 2579 // Return whether the replacement is simple.
2580 bool Compile(Handle<String> replacement,
2582 int capture_count, 2581 int capture_count,
2583 int subject_length); 2582 int subject_length);
2584 2583
2584 // Use Apply only if Compile returned false.
2585 void Apply(ReplacementStringBuilder* builder, 2585 void Apply(ReplacementStringBuilder* builder,
2586 int match_from, 2586 int match_from,
2587 int match_to, 2587 int match_to,
2588 Handle<JSArray> last_match_info); 2588 int32_t* match);
2589 2589
2590 // Number of distinct parts of the replacement pattern. 2590 // Number of distinct parts of the replacement pattern.
2591 int parts() { 2591 int parts() {
2592 return parts_.length(); 2592 return parts_.length();
2593 } 2593 }
2594 2594
2595 bool simple_hint() {
2596 return simple_hint_;
2597 }
2598
2599 Zone* zone() const { return zone_; } 2595 Zone* zone() const { return zone_; }
2600 2596
2601 private: 2597 private:
2602 enum PartType { 2598 enum PartType {
2603 SUBJECT_PREFIX = 1, 2599 SUBJECT_PREFIX = 1,
2604 SUBJECT_SUFFIX, 2600 SUBJECT_SUFFIX,
2605 SUBJECT_CAPTURE, 2601 SUBJECT_CAPTURE,
2606 REPLACEMENT_SUBSTRING, 2602 REPLACEMENT_SUBSTRING,
2607 REPLACEMENT_STRING, 2603 REPLACEMENT_STRING,
2608 2604
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
2649 // tag == REPLACEMENT_STRING: data is index into array of substrings 2645 // tag == REPLACEMENT_STRING: data is index into array of substrings
2650 // of the replacement string. 2646 // of the replacement string.
2651 // tag <= 0: Temporary representation of the substring of the replacement 2647 // tag <= 0: Temporary representation of the substring of the replacement
2652 // string ranging over -tag .. data. 2648 // string ranging over -tag .. data.
2653 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the 2649 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
2654 // substring objects. 2650 // substring objects.
2655 int data; 2651 int data;
2656 }; 2652 };
2657 2653
2658 template<typename Char> 2654 template<typename Char>
2659 static bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, 2655 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
2660 Vector<Char> characters, 2656 Vector<Char> characters,
2661 int capture_count, 2657 int capture_count,
2662 int subject_length, 2658 int subject_length,
2663 Zone* zone) { 2659 Zone* zone) {
2664 int length = characters.length(); 2660 int length = characters.length();
2665 int last = 0; 2661 int last = 0;
2666 for (int i = 0; i < length; i++) { 2662 for (int i = 0; i < length; i++) {
2667 Char c = characters[i]; 2663 Char c = characters[i];
2668 if (c == '$') { 2664 if (c == '$') {
2669 int next_index = i + 1; 2665 int next_index = i + 1;
2670 if (next_index == length) { // No next character! 2666 if (next_index == length) { // No next character!
2671 break; 2667 break;
2672 } 2668 }
2673 Char c2 = characters[next_index]; 2669 Char c2 = characters[next_index];
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
2747 break; 2743 break;
2748 } 2744 }
2749 default: 2745 default:
2750 i = next_index; 2746 i = next_index;
2751 break; 2747 break;
2752 } 2748 }
2753 } 2749 }
2754 } 2750 }
2755 if (length > last) { 2751 if (length > last) {
2756 if (last == 0) { 2752 if (last == 0) {
2757 parts->Add(ReplacementPart::ReplacementString(), zone); 2753 // Replacement is simple. Do not use Apply to do the replacement.
2758 return true; 2754 return true;
2759 } else { 2755 } else {
2760 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); 2756 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
2761 } 2757 }
2762 } 2758 }
2763 return false; 2759 return false;
2764 } 2760 }
2765 2761
2766 ZoneList<ReplacementPart> parts_; 2762 ZoneList<ReplacementPart> parts_;
2767 ZoneList<Handle<String> > replacement_substrings_; 2763 ZoneList<Handle<String> > replacement_substrings_;
2768 bool simple_hint_;
2769 Zone* zone_; 2764 Zone* zone_;
2770 }; 2765 };
2771 2766
2772 2767
2773 void CompiledReplacement::Compile(Handle<String> replacement, 2768 bool CompiledReplacement::Compile(Handle<String> replacement,
2774 int capture_count, 2769 int capture_count,
2775 int subject_length) { 2770 int subject_length) {
2776 { 2771 {
2777 AssertNoAllocation no_alloc; 2772 AssertNoAllocation no_alloc;
2778 String::FlatContent content = replacement->GetFlatContent(); 2773 String::FlatContent content = replacement->GetFlatContent();
2779 ASSERT(content.IsFlat()); 2774 ASSERT(content.IsFlat());
2775 bool simple = false;
2780 if (content.IsAscii()) { 2776 if (content.IsAscii()) {
2781 simple_hint_ = ParseReplacementPattern(&parts_, 2777 simple = ParseReplacementPattern(&parts_,
2782 content.ToAsciiVector(), 2778 content.ToAsciiVector(),
2783 capture_count, 2779 capture_count,
2784 subject_length, 2780 subject_length,
2785 zone()); 2781 zone());
2786 } else { 2782 } else {
2787 ASSERT(content.IsTwoByte()); 2783 ASSERT(content.IsTwoByte());
2788 simple_hint_ = ParseReplacementPattern(&parts_, 2784 simple = ParseReplacementPattern(&parts_,
2789 content.ToUC16Vector(), 2785 content.ToUC16Vector(),
2790 capture_count, 2786 capture_count,
2791 subject_length, 2787 subject_length,
2792 zone()); 2788 zone());
2793 } 2789 }
2790 if (simple) return true;
2794 } 2791 }
2792
2795 Isolate* isolate = replacement->GetIsolate(); 2793 Isolate* isolate = replacement->GetIsolate();
2796 // Find substrings of replacement string and create them as String objects. 2794 // Find substrings of replacement string and create them as String objects.
2797 int substring_index = 0; 2795 int substring_index = 0;
2798 for (int i = 0, n = parts_.length(); i < n; i++) { 2796 for (int i = 0, n = parts_.length(); i < n; i++) {
2799 int tag = parts_[i].tag; 2797 int tag = parts_[i].tag;
2800 if (tag <= 0) { // A replacement string slice. 2798 if (tag <= 0) { // A replacement string slice.
2801 int from = -tag; 2799 int from = -tag;
2802 int to = parts_[i].data; 2800 int to = parts_[i].data;
2803 replacement_substrings_.Add( 2801 replacement_substrings_.Add(
2804 isolate->factory()->NewSubString(replacement, from, to), zone()); 2802 isolate->factory()->NewSubString(replacement, from, to), zone());
2805 parts_[i].tag = REPLACEMENT_SUBSTRING; 2803 parts_[i].tag = REPLACEMENT_SUBSTRING;
2806 parts_[i].data = substring_index; 2804 parts_[i].data = substring_index;
2807 substring_index++; 2805 substring_index++;
2808 } else if (tag == REPLACEMENT_STRING) { 2806 } else if (tag == REPLACEMENT_STRING) {
2809 replacement_substrings_.Add(replacement, zone()); 2807 replacement_substrings_.Add(replacement, zone());
2810 parts_[i].data = substring_index; 2808 parts_[i].data = substring_index;
2811 substring_index++; 2809 substring_index++;
2812 } 2810 }
2813 } 2811 }
2812 return false;
2814 } 2813 }
2815 2814
2816 2815
2817 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, 2816 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
2818 int match_from, 2817 int match_from,
2819 int match_to, 2818 int match_to,
2820 Handle<JSArray> last_match_info) { 2819 int32_t* match) {
2820 ASSERT_LT(0, parts_.length());
2821 for (int i = 0, n = parts_.length(); i < n; i++) { 2821 for (int i = 0, n = parts_.length(); i < n; i++) {
2822 ReplacementPart part = parts_[i]; 2822 ReplacementPart part = parts_[i];
2823 switch (part.tag) { 2823 switch (part.tag) {
2824 case SUBJECT_PREFIX: 2824 case SUBJECT_PREFIX:
2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from); 2825 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
2826 break; 2826 break;
2827 case SUBJECT_SUFFIX: { 2827 case SUBJECT_SUFFIX: {
2828 int subject_length = part.data; 2828 int subject_length = part.data;
2829 if (match_to < subject_length) { 2829 if (match_to < subject_length) {
2830 builder->AddSubjectSlice(match_to, subject_length); 2830 builder->AddSubjectSlice(match_to, subject_length);
2831 } 2831 }
2832 break; 2832 break;
2833 } 2833 }
2834 case SUBJECT_CAPTURE: { 2834 case SUBJECT_CAPTURE: {
2835 int capture = part.data; 2835 int capture = part.data;
2836 FixedArray* match_info = FixedArray::cast(last_match_info->elements()); 2836 int from = match[capture * 2];
2837 int from = RegExpImpl::GetCapture(match_info, capture * 2); 2837 int to = match[capture * 2 + 1];
2838 int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
2839 if (from >= 0 && to > from) { 2838 if (from >= 0 && to > from) {
2840 builder->AddSubjectSlice(from, to); 2839 builder->AddSubjectSlice(from, to);
2841 } 2840 }
2842 break; 2841 break;
2843 } 2842 }
2844 case REPLACEMENT_SUBSTRING: 2843 case REPLACEMENT_SUBSTRING:
2845 case REPLACEMENT_STRING: 2844 case REPLACEMENT_STRING:
2846 builder->AddString(replacement_substrings_[part.data]); 2845 builder->AddString(replacement_substrings_[part.data]);
2847 break; 2846 break;
2848 default: 2847 default:
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
2950 pattern_content.ToUC16Vector(), 2949 pattern_content.ToUC16Vector(),
2951 indices, 2950 indices,
2952 limit, 2951 limit,
2953 zone); 2952 zone);
2954 } 2953 }
2955 } 2954 }
2956 } 2955 }
2957 } 2956 }
2958 2957
2959 2958
2960 // Two smis before and after the match, for very long strings.
2961 const int kMaxBuilderEntriesPerRegExpMatch = 5;
2962
2963
2964 static void SetLastMatchInfoNoCaptures(Handle<String> subject,
2965 Handle<JSArray> last_match_info,
2966 int match_start,
2967 int match_end) {
2968 // Fill last_match_info with a single capture.
2969 last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
2970 AssertNoAllocation no_gc;
2971 FixedArray* elements = FixedArray::cast(last_match_info->elements());
2972 RegExpImpl::SetLastCaptureCount(elements, 2);
2973 RegExpImpl::SetLastInput(elements, *subject);
2974 RegExpImpl::SetLastSubject(elements, *subject);
2975 RegExpImpl::SetCapture(elements, 0, match_start);
2976 RegExpImpl::SetCapture(elements, 1, match_end);
2977 }
2978
2979
2980 template <typename SubjectChar, typename PatternChar>
2981 static bool SearchStringMultiple(Isolate* isolate,
2982 Vector<const SubjectChar> subject,
2983 Vector<const PatternChar> pattern,
2984 String* pattern_string,
2985 FixedArrayBuilder* builder,
2986 int* match_pos) {
2987 int pos = *match_pos;
2988 int subject_length = subject.length();
2989 int pattern_length = pattern.length();
2990 int max_search_start = subject_length - pattern_length;
2991 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
2992 while (pos <= max_search_start) {
2993 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2994 *match_pos = pos;
2995 return false;
2996 }
2997 // Position of end of previous match.
2998 int match_end = pos + pattern_length;
2999 int new_pos = search.Search(subject, match_end);
3000 if (new_pos >= 0) {
3001 // A match.
3002 if (new_pos > match_end) {
3003 ReplacementStringBuilder::AddSubjectSlice(builder,
3004 match_end,
3005 new_pos);
3006 }
3007 pos = new_pos;
3008 builder->Add(pattern_string);
3009 } else {
3010 break;
3011 }
3012 }
3013
3014 if (pos < max_search_start) {
3015 ReplacementStringBuilder::AddSubjectSlice(builder,
3016 pos + pattern_length,
3017 subject_length);
3018 }
3019 *match_pos = pos;
3020 return true;
3021 }
3022
3023
3024
3025
3026 template<typename ResultSeqString> 2959 template<typename ResultSeqString>
3027 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( 2960 MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
3028 Isolate* isolate, 2961 Isolate* isolate,
3029 Handle<String> subject, 2962 Handle<String> subject,
3030 Handle<JSRegExp> pattern_regexp, 2963 Handle<JSRegExp> pattern_regexp,
3031 Handle<String> replacement, 2964 Handle<String> replacement,
3032 Handle<JSArray> last_match_info, 2965 Handle<JSArray> last_match_info) {
3033 Zone* zone) {
3034 ASSERT(subject->IsFlat()); 2966 ASSERT(subject->IsFlat());
3035 ASSERT(replacement->IsFlat()); 2967 ASSERT(replacement->IsFlat());
3036 2968
3037 ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT); 2969 Zone* zone = isolate->runtime_zone();
3038 ZoneList<int> indices(8, isolate->runtime_zone()); 2970 ZoneScope zone_space(zone, DELETE_ON_EXIT);
2971 ZoneList<int> indices(8, zone);
3039 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); 2972 ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
3040 String* pattern = 2973 String* pattern =
3041 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); 2974 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
3042 int subject_len = subject->length(); 2975 int subject_len = subject->length();
3043 int pattern_len = pattern->length(); 2976 int pattern_len = pattern->length();
3044 int replacement_len = replacement->length(); 2977 int replacement_len = replacement->length();
3045 2978
3046 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, 2979 FindStringIndicesDispatch(
3047 zone); 2980 isolate, *subject, pattern, &indices, 0xffffffff, zone);
3048 2981
3049 int matches = indices.length(); 2982 int matches = indices.length();
3050 if (matches == 0) return *subject; 2983 if (matches == 0) return *subject;
3051 2984
3052 // Detect integer overflow. 2985 // Detect integer overflow.
3053 int64_t result_len_64 = 2986 int64_t result_len_64 =
3054 (static_cast<int64_t>(replacement_len) - 2987 (static_cast<int64_t>(replacement_len) -
3055 static_cast<int64_t>(pattern_len)) * 2988 static_cast<int64_t>(pattern_len)) *
3056 static_cast<int64_t>(matches) + 2989 static_cast<int64_t>(matches) +
3057 static_cast<int64_t>(subject_len); 2990 static_cast<int64_t>(subject_len);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3092 subject_pos = indices.at(i) + pattern_len; 3025 subject_pos = indices.at(i) + pattern_len;
3093 } 3026 }
3094 // Add remaining subject content at the end. 3027 // Add remaining subject content at the end.
3095 if (subject_pos < subject_len) { 3028 if (subject_pos < subject_len) {
3096 String::WriteToFlat(*subject, 3029 String::WriteToFlat(*subject,
3097 result->GetChars() + result_pos, 3030 result->GetChars() + result_pos,
3098 subject_pos, 3031 subject_pos,
3099 subject_len); 3032 subject_len);
3100 } 3033 }
3101 3034
3102 SetLastMatchInfoNoCaptures(subject, 3035 int32_t match_indices[] = { indices.at(matches - 1),
3103 last_match_info, 3036 indices.at(matches - 1) + pattern_len };
3104 indices.at(matches - 1), 3037 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
3105 indices.at(matches - 1) + pattern_len);
3106 3038
3107 return *result; 3039 return *result;
3108 } 3040 }
3109 3041
3110 3042
3111 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( 3043 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
3112 Isolate* isolate, 3044 Isolate* isolate,
3113 String* subject, 3045 Handle<String> subject,
3114 JSRegExp* regexp, 3046 Handle<JSRegExp> regexp,
3115 String* replacement, 3047 Handle<String> replacement,
3116 JSArray* last_match_info, 3048 Handle<JSArray> last_match_info) {
3117 Zone* zone) {
3118 ASSERT(subject->IsFlat()); 3049 ASSERT(subject->IsFlat());
3119 ASSERT(replacement->IsFlat()); 3050 ASSERT(replacement->IsFlat());
3120 3051
3121 HandleScope handles(isolate); 3052 bool is_global = regexp->GetFlags().is_global();
3122 3053 int capture_count = regexp->CaptureCount();
3123 int length = subject->length(); 3054 int subject_length = subject->length();
3124 Handle<String> subject_handle(subject);
3125 Handle<JSRegExp> regexp_handle(regexp);
3126 Handle<String> replacement_handle(replacement);
3127 Handle<JSArray> last_match_info_handle(last_match_info);
3128 Handle<Object> match = RegExpImpl::Exec(regexp_handle,
3129 subject_handle,
3130 0,
3131 last_match_info_handle);
3132 if (match.is_null()) {
3133 return Failure::Exception();
3134 }
3135 if (match->IsNull()) {
3136 return *subject_handle;
3137 }
3138
3139 int capture_count = regexp_handle->CaptureCount();
3140 3055
3141 // CompiledReplacement uses zone allocation. 3056 // CompiledReplacement uses zone allocation.
3057 Zone* zone = isolate->runtime_zone();
3142 ZoneScope zonescope(zone, DELETE_ON_EXIT); 3058 ZoneScope zonescope(zone, DELETE_ON_EXIT);
3143 CompiledReplacement compiled_replacement(zone); 3059 CompiledReplacement compiled_replacement(zone);
3144 compiled_replacement.Compile(replacement_handle, 3060 bool simple_replace = compiled_replacement.Compile(replacement,
3145 capture_count, 3061 capture_count,
3146 length); 3062 subject_length);
3147
3148 bool is_global = regexp_handle->GetFlags().is_global();
3149 3063
3150 // Shortcut for simple non-regexp global replacements 3064 // Shortcut for simple non-regexp global replacements
3151 if (is_global && 3065 if (is_global &&
3152 regexp_handle->TypeTag() == JSRegExp::ATOM && 3066 regexp->TypeTag() == JSRegExp::ATOM &&
3153 compiled_replacement.simple_hint()) { 3067 simple_replace) {
3154 if (subject_handle->HasOnlyAsciiChars() && 3068 if (subject->HasOnlyAsciiChars()) {
3155 replacement_handle->HasOnlyAsciiChars()) {
3156 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3069 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3157 isolate, 3070 isolate, subject, regexp, replacement, last_match_info);
3158 subject_handle,
3159 regexp_handle,
3160 replacement_handle,
3161 last_match_info_handle,
3162 zone);
3163 } else { 3071 } else {
3164 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3072 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3165 isolate, 3073 isolate, subject, regexp, replacement, last_match_info);
3166 subject_handle,
3167 regexp_handle,
3168 replacement_handle,
3169 last_match_info_handle,
3170 zone);
3171 } 3074 }
3172 } 3075 }
3173 3076
3077 if (RegExpImpl::GlobalCacheInitialize(regexp, subject, is_global, isolate) ==
3078 RegExpImpl::RE_EXCEPTION) {
3079 return Failure::Exception();
3080 }
3081
3082 int32_t* current_match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3083 if (current_match == NULL) {
3084 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3085 return Failure::Exception();
3086 } else if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_FAILURE) {
3087 return *subject;
3088 }
3089 }
3090
3174 // Guessing the number of parts that the final result string is built 3091 // Guessing the number of parts that the final result string is built
3175 // from. Global regexps can match any number of times, so we guess 3092 // from. Global regexps can match any number of times, so we guess
3176 // conservatively. 3093 // conservatively.
3177 int expected_parts = 3094 int expected_parts =
3178 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; 3095 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
3179 ReplacementStringBuilder builder(isolate->heap(), 3096 ReplacementStringBuilder builder(isolate->heap(),
3180 subject_handle, 3097 subject,
3181 expected_parts); 3098 expected_parts);
3182 3099
3183 // Index of end of last match.
3184 int prev = 0;
3185
3186 // Number of parts added by compiled replacement plus preceeding 3100 // Number of parts added by compiled replacement plus preceeding
3187 // string and possibly suffix after last match. It is possible for 3101 // string and possibly suffix after last match. It is possible for
3188 // all components to use two elements when encoded as two smis. 3102 // all components to use two elements when encoded as two smis.
3189 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); 3103 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
3190 bool matched = true; 3104
3191 do { 3105 int prev = 0;
3192 ASSERT(last_match_info_handle->HasFastObjectElements()); 3106
3193 // Increase the capacity of the builder before entering local handle-scope, 3107 while (true) {
3194 // so its internal buffer can safely allocate a new handle if it grows.
3195 builder.EnsureCapacity(parts_added_per_loop); 3108 builder.EnsureCapacity(parts_added_per_loop);
3196 3109
3197 HandleScope loop_scope(isolate); 3110 int start = current_match[0];
3198 int start, end; 3111 int end = current_match[1];
3199 {
3200 AssertNoAllocation match_info_array_is_not_in_a_handle;
3201 FixedArray* match_info_array =
3202 FixedArray::cast(last_match_info_handle->elements());
3203
3204 ASSERT_EQ(capture_count * 2 + 2,
3205 RegExpImpl::GetLastCaptureCount(match_info_array));
3206 start = RegExpImpl::GetCapture(match_info_array, 0);
3207 end = RegExpImpl::GetCapture(match_info_array, 1);
3208 }
3209 3112
3210 if (prev < start) { 3113 if (prev < start) {
3211 builder.AddSubjectSlice(prev, start); 3114 builder.AddSubjectSlice(prev, start);
3212 } 3115 }
3213 compiled_replacement.Apply(&builder, 3116
3214 start, 3117 if (simple_replace) {
3215 end, 3118 builder.AddString(replacement);
3216 last_match_info_handle); 3119 } else {
3120 compiled_replacement.Apply(&builder,
3121 start,
3122 end,
3123 current_match);
3124 }
3217 prev = end; 3125 prev = end;
3218 3126
3219 // Only continue checking for global regexps. 3127 // Only continue checking for global regexps.
3220 if (!is_global) break; 3128 if (!is_global) break;
3221 3129
3222 // Continue from where the match ended, unless it was an empty match. 3130 int32_t* match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3223 int next = end; 3131
3224 if (start == end) { 3132 if (match == NULL) {
3225 next = end + 1; 3133 break;
3226 if (next > length) break; 3134 } else {
3135 current_match = match;
3227 } 3136 }
3137 }
3228 3138
3229 match = RegExpImpl::Exec(regexp_handle, 3139 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3230 subject_handle, 3140 return Failure::Exception();
3231 next, 3141 }
3232 last_match_info_handle);
3233 if (match.is_null()) {
3234 return Failure::Exception();
3235 }
3236 matched = !match->IsNull();
3237 } while (matched);
3238 3142
3239 if (prev < length) { 3143 if (prev < subject_length) {
3240 builder.AddSubjectSlice(prev, length); 3144 builder.AddSubjectSlice(prev, subject_length);
3241 } 3145 }
3242 3146
3147 RegExpImpl::SetLastMatchInfo(
3148 last_match_info, subject, capture_count, current_match);
3149
3243 return *(builder.ToString()); 3150 return *(builder.ToString());
3244 } 3151 }
3245 3152
3246 3153
3247 template <typename ResultSeqString> 3154 template <typename ResultSeqString>
3248 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( 3155 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
3249 Isolate* isolate, 3156 Isolate* isolate,
3250 String* subject, 3157 Handle<String> subject,
3251 JSRegExp* regexp, 3158 Handle<JSRegExp> regexp,
3252 JSArray* last_match_info, 3159 Handle<JSArray> last_match_info) {
3253 Zone* zone) {
3254 ASSERT(subject->IsFlat()); 3160 ASSERT(subject->IsFlat());
3255 3161
3256 HandleScope handles(isolate); 3162 bool is_global = regexp->GetFlags().is_global();
3257
3258 Handle<String> subject_handle(subject);
3259 Handle<JSRegExp> regexp_handle(regexp);
3260 Handle<JSArray> last_match_info_handle(last_match_info);
3261 3163
3262 // Shortcut for simple non-regexp global replacements 3164 // Shortcut for simple non-regexp global replacements
3263 if (regexp_handle->GetFlags().is_global() && 3165 if (is_global &&
3264 regexp_handle->TypeTag() == JSRegExp::ATOM) { 3166 regexp->TypeTag() == JSRegExp::ATOM) {
3265 Handle<String> empty_string_handle(HEAP->empty_string()); 3167 Handle<String> empty_string(HEAP->empty_string());
3266 if (subject_handle->HasOnlyAsciiChars()) { 3168 if (subject->HasOnlyAsciiChars()) {
3267 return StringReplaceAtomRegExpWithString<SeqAsciiString>( 3169 return StringReplaceAtomRegExpWithString<SeqAsciiString>(
3268 isolate, 3170 isolate,
3269 subject_handle, 3171 subject,
3270 regexp_handle, 3172 regexp,
3271 empty_string_handle, 3173 empty_string,
3272 last_match_info_handle, 3174 last_match_info);
3273 zone);
3274 } else { 3175 } else {
3275 return StringReplaceAtomRegExpWithString<SeqTwoByteString>( 3176 return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
3276 isolate, 3177 isolate,
3277 subject_handle, 3178 subject,
3278 regexp_handle, 3179 regexp,
3279 empty_string_handle, 3180 empty_string,
3280 last_match_info_handle, 3181 last_match_info);
3281 zone);
3282 } 3182 }
3283 } 3183 }
3284 3184
3285 Handle<Object> match = RegExpImpl::Exec(regexp_handle, 3185 if (RegExpImpl::GlobalCacheInitialize(regexp, subject, is_global, isolate) ==
3286 subject_handle, 3186 RegExpImpl::RE_EXCEPTION) {
3287 0, 3187 return Failure::Exception();
3288 last_match_info_handle);
3289 if (match.is_null()) return Failure::Exception();
3290 if (match->IsNull()) return *subject_handle;
3291
3292 ASSERT(last_match_info_handle->HasFastObjectElements());
3293
3294 int start, end;
3295 {
3296 AssertNoAllocation match_info_array_is_not_in_a_handle;
3297 FixedArray* match_info_array =
3298 FixedArray::cast(last_match_info_handle->elements());
3299
3300 start = RegExpImpl::GetCapture(match_info_array, 0);
3301 end = RegExpImpl::GetCapture(match_info_array, 1);
3302 } 3188 }
3303 3189
3304 bool global = regexp_handle->GetFlags().is_global(); 3190 int32_t* current_match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3191 if (current_match == NULL) {
3192 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3193 return Failure::Exception();
3194 } else {
3195 ASSERT(RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_FAILURE);
3196 return *subject;
3197 }
3198 }
3305 3199
3306 if (start == end && !global) return *subject_handle; 3200 int start = current_match[0];
3201 int end = current_match[1];
3202 int capture_count = regexp->CaptureCount();
3203 int subject_length = subject->length();
3307 3204
3308 int length = subject_handle->length(); 3205 int new_length = subject_length - (end - start);
3309 int new_length = length - (end - start); 3206 if (new_length == 0) return isolate->heap()->empty_string();
3310 if (new_length == 0) { 3207
3311 return isolate->heap()->empty_string();
3312 }
3313 Handle<ResultSeqString> answer; 3208 Handle<ResultSeqString> answer;
3314 if (ResultSeqString::kHasAsciiEncoding) { 3209 if (ResultSeqString::kHasAsciiEncoding) {
3315 answer = Handle<ResultSeqString>::cast( 3210 answer = Handle<ResultSeqString>::cast(
3316 isolate->factory()->NewRawAsciiString(new_length)); 3211 isolate->factory()->NewRawAsciiString(new_length));
3317 } else { 3212 } else {
3318 answer = Handle<ResultSeqString>::cast( 3213 answer = Handle<ResultSeqString>::cast(
3319 isolate->factory()->NewRawTwoByteString(new_length)); 3214 isolate->factory()->NewRawTwoByteString(new_length));
3320 } 3215 }
3321 3216
3322 // If the regexp isn't global, only match once. 3217 if (!is_global) {
3323 if (!global) { 3218 RegExpImpl::SetLastMatchInfo(
3324 if (start > 0) { 3219 last_match_info, subject, capture_count, current_match);
3325 String::WriteToFlat(*subject_handle, 3220 if (start == end) {
3326 answer->GetChars(), 3221 return *subject;
3327 0, 3222 } else {
3328 start); 3223 if (start > 0) {
3224 String::WriteToFlat(*subject, answer->GetChars(), 0, start);
3225 }
3226 if (end < subject_length) {
3227 String::WriteToFlat(
3228 *subject, answer->GetChars() + start, end, subject_length);
3229 }
3230 return *answer;
3329 } 3231 }
3330 if (end < length) {
3331 String::WriteToFlat(*subject_handle,
3332 answer->GetChars() + start,
3333 end,
3334 length);
3335 }
3336 return *answer;
3337 } 3232 }
3338 3233
3339 int prev = 0; // Index of end of last match. 3234 int prev = 0;
3340 int next = 0; // Start of next search (prev unless last match was empty).
3341 int position = 0; 3235 int position = 0;
3342 3236
3343 do { 3237 while (true) {
3238 start = current_match[0];
3239 end = current_match[1];
3344 if (prev < start) { 3240 if (prev < start) {
3345 // Add substring subject[prev;start] to answer string. 3241 // Add substring subject[prev;start] to answer string.
3346 String::WriteToFlat(*subject_handle, 3242 String::WriteToFlat(
3347 answer->GetChars() + position, 3243 *subject, answer->GetChars() + position, prev, start);
3348 prev,
3349 start);
3350 position += start - prev; 3244 position += start - prev;
3351 } 3245 }
3352 prev = end; 3246 prev = end;
3353 next = end; 3247
3354 // Continue from where the match ended, unless it was an empty match. 3248 int32_t* match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3355 if (start == end) { 3249
3356 next++; 3250 if (match == NULL) {
3357 if (next > length) break; 3251 break;
3252 } else {
3253 current_match = match;
3358 } 3254 }
3359 match = RegExpImpl::Exec(regexp_handle,
3360 subject_handle,
3361 next,
3362 last_match_info_handle);
3363 if (match.is_null()) return Failure::Exception();
3364 if (match->IsNull()) break;
3365
3366 ASSERT(last_match_info_handle->HasFastObjectElements());
3367 HandleScope loop_scope(isolate);
3368 {
3369 AssertNoAllocation match_info_array_is_not_in_a_handle;
3370 FixedArray* match_info_array =
3371 FixedArray::cast(last_match_info_handle->elements());
3372 start = RegExpImpl::GetCapture(match_info_array, 0);
3373 end = RegExpImpl::GetCapture(match_info_array, 1);
3374 }
3375 } while (true);
3376
3377 if (prev < length) {
3378 // Add substring subject[prev;length] to answer string.
3379 String::WriteToFlat(*subject_handle,
3380 answer->GetChars() + position,
3381 prev,
3382 length);
3383 position += length - prev;
3384 } 3255 }
3385 3256
3386 if (position == 0) { 3257 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3387 return isolate->heap()->empty_string(); 3258 return Failure::Exception();
3388 } 3259 }
3389 3260
3261 RegExpImpl::SetLastMatchInfo(
3262 last_match_info, subject, capture_count, current_match);
3263
3264 if (prev < subject_length) {
3265 // Add substring subject[prev;length] to answer string.
3266 String::WriteToFlat(
3267 *subject, answer->GetChars() + position, prev, subject_length);
3268 position += subject_length - prev;
3269 }
3270
3271 if (position == 0) return isolate->heap()->empty_string();
3272
3390 // Shorten string and fill 3273 // Shorten string and fill
3391 int string_size = ResultSeqString::SizeFor(position); 3274 int string_size = ResultSeqString::SizeFor(position);
3392 int allocated_string_size = ResultSeqString::SizeFor(new_length); 3275 int allocated_string_size = ResultSeqString::SizeFor(new_length);
3393 int delta = allocated_string_size - string_size; 3276 int delta = allocated_string_size - string_size;
3394 3277
3395 answer->set_length(position); 3278 answer->set_length(position);
3396 if (delta == 0) return *answer; 3279 if (delta == 0) return *answer;
3397 3280
3398 Address end_of_string = answer->address() + string_size; 3281 Address end_of_string = answer->address() + string_size;
3399 isolate->heap()->CreateFillerObjectAt(end_of_string, delta); 3282 isolate->heap()->CreateFillerObjectAt(end_of_string, delta);
3400 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) { 3283 if (Marking::IsBlack(Marking::MarkBitFrom(*answer))) {
3401 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta); 3284 MemoryChunk::IncrementLiveBytesFromMutator(answer->address(), -delta);
3402 } 3285 }
3403 3286
3404 return *answer; 3287 return *answer;
3405 } 3288 }
3406 3289
3407 3290
3408 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { 3291 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
3409 ASSERT(args.length() == 4); 3292 ASSERT(args.length() == 4);
3410 3293
3411 CONVERT_ARG_CHECKED(String, subject, 0); 3294 HandleScope scope(isolate);
3295
3296 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3297 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
3298 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3299 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
3300
3412 if (!subject->IsFlat()) { 3301 if (!subject->IsFlat()) {
3413 Object* flat_subject; 3302 subject = Handle<String>(subject->TryFlattenGetString());
3414 { MaybeObject* maybe_flat_subject = subject->TryFlatten();
3415 if (!maybe_flat_subject->ToObject(&flat_subject)) {
3416 return maybe_flat_subject;
3417 }
3418 }
3419 subject = String::cast(flat_subject);
3420 } 3303 }
3421 3304
3422 CONVERT_ARG_CHECKED(String, replacement, 2);
3423 if (!replacement->IsFlat()) { 3305 if (!replacement->IsFlat()) {
3424 Object* flat_replacement; 3306 replacement = Handle<String>(replacement->TryFlattenGetString());
3425 { MaybeObject* maybe_flat_replacement = replacement->TryFlatten();
3426 if (!maybe_flat_replacement->ToObject(&flat_replacement)) {
3427 return maybe_flat_replacement;
3428 }
3429 }
3430 replacement = String::cast(flat_replacement);
3431 } 3307 }
3432 3308
3433 CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
3434 CONVERT_ARG_CHECKED(JSArray, last_match_info, 3);
3435
3436 ASSERT(last_match_info->HasFastObjectElements()); 3309 ASSERT(last_match_info->HasFastObjectElements());
3437 3310
3438 Zone* zone = isolate->runtime_zone();
3439 if (replacement->length() == 0) { 3311 if (replacement->length() == 0) {
3440 if (subject->HasOnlyAsciiChars()) { 3312 if (subject->HasOnlyAsciiChars()) {
3441 return StringReplaceRegExpWithEmptyString<SeqAsciiString>( 3313 return StringReplaceRegExpWithEmptyString<SeqAsciiString>(
3442 isolate, subject, regexp, last_match_info, zone); 3314 isolate, subject, regexp, last_match_info);
3443 } else { 3315 } else {
3444 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>( 3316 return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
3445 isolate, subject, regexp, last_match_info, zone); 3317 isolate, subject, regexp, last_match_info);
3446 } 3318 }
3447 } 3319 }
3448 3320
3449 return StringReplaceRegExpWithString(isolate, 3321 return StringReplaceRegExpWithString(
3450 subject, 3322 isolate, subject, regexp, replacement, last_match_info);
3451 regexp,
3452 replacement,
3453 last_match_info,
3454 zone);
3455 } 3323 }
3456 3324
3457 3325
3458 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate, 3326 Handle<String> Runtime::StringReplaceOneCharWithString(Isolate* isolate,
3459 Handle<String> subject, 3327 Handle<String> subject,
3460 Handle<String> search, 3328 Handle<String> search,
3461 Handle<String> replace, 3329 Handle<String> replace,
3462 bool* found, 3330 bool* found,
3463 int recursion_limit) { 3331 int recursion_limit) {
3464 if (recursion_limit == 0) return Handle<String>::null(); 3332 if (recursion_limit == 0) return Handle<String>::null();
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
3767 3635
3768 3636
3769 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { 3637 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
3770 ASSERT_EQ(3, args.length()); 3638 ASSERT_EQ(3, args.length());
3771 3639
3772 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); 3640 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
3773 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); 3641 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
3774 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2); 3642 CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2);
3775 HandleScope handles; 3643 HandleScope handles;
3776 3644
3777 Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); 3645 if (RegExpImpl::GlobalCacheInitialize(regexp, subject, true, isolate) ==
3778 3646 RegExpImpl::RE_EXCEPTION) {
3779 if (match.is_null()) {
3780 return Failure::Exception(); 3647 return Failure::Exception();
3781 } 3648 }
3782 if (match->IsNull()) { 3649
3783 return isolate->heap()->null_value(); 3650 int capture_count = regexp->CaptureCount();
3784 }
3785 int length = subject->length();
3786 3651
3787 Zone* zone = isolate->runtime_zone(); 3652 Zone* zone = isolate->runtime_zone();
3788 ZoneScope zone_space(zone, DELETE_ON_EXIT); 3653 ZoneScope zone_space(zone, DELETE_ON_EXIT);
3789 ZoneList<int> offsets(8, zone); 3654 ZoneList<int> offsets(8, zone);
3790 int start; 3655
3791 int end; 3656 int32_t* current_match = NULL;
3792 do { 3657 while (true) {
3793 { 3658 int32_t* match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3794 AssertNoAllocation no_alloc; 3659 if (match == NULL) {
3795 FixedArray* elements = FixedArray::cast(regexp_info->elements()); 3660 break;
3796 start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); 3661 } else {
3797 end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); 3662 current_match = match;
3798 } 3663 }
3799 offsets.Add(start, zone); 3664 offsets.Add(current_match[0], zone); // start
3800 offsets.Add(end, zone); 3665 offsets.Add(current_match[1], zone); // end
3801 if (start == end) if (++end > length) break; 3666 }
3802 match = RegExpImpl::Exec(regexp, subject, end, regexp_info); 3667
3803 if (match.is_null()) { 3668 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3804 return Failure::Exception(); 3669 return Failure::Exception();
3805 } 3670 }
3806 } while (!match->IsNull()); 3671
3672 if (current_match == NULL) {
3673 return isolate->heap()->null_value();
3674 }
3675
3676 RegExpImpl::SetLastMatchInfo(
3677 regexp_info, subject, capture_count, current_match);
3678
3807 int matches = offsets.length() / 2; 3679 int matches = offsets.length() / 2;
3808 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches); 3680 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(matches);
3809 Handle<String> substring = isolate->factory()-> 3681 Handle<String> substring =
3810 NewSubString(subject, offsets.at(0), offsets.at(1)); 3682 isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1));
3811 elements->set(0, *substring); 3683 elements->set(0, *substring);
3812 for (int i = 1; i < matches ; i++) { 3684 for (int i = 1; i < matches; i++) {
3685 HandleScope temp_scope(isolate);
3813 int from = offsets.at(i * 2); 3686 int from = offsets.at(i * 2);
3814 int to = offsets.at(i * 2 + 1); 3687 int to = offsets.at(i * 2 + 1);
3815 Handle<String> substring = isolate->factory()-> 3688 Handle<String> substring =
3816 NewProperSubString(subject, from, to); 3689 isolate->factory()->NewProperSubString(subject, from, to);
3817 elements->set(i, *substring); 3690 elements->set(i, *substring);
3818 } 3691 }
3819 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements); 3692 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(elements);
3820 result->set_length(Smi::FromInt(matches)); 3693 result->set_length(Smi::FromInt(matches));
3821 return *result; 3694 return *result;
3822 } 3695 }
3823 3696
3824 3697
3825 static bool SearchStringMultiple(Isolate* isolate, 3698 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3826 Handle<String> subject, 3699 // separate last match info. See comment on that function.
3827 Handle<String> pattern, 3700 template<bool has_capture>
3828 Handle<JSArray> last_match_info, 3701 static int SearchRegExpMultiple(
3829 FixedArrayBuilder* builder) {
3830 ASSERT(subject->IsFlat());
3831 ASSERT(pattern->IsFlat());
3832
3833 // Treating as if a previous match was before first character.
3834 int match_pos = -pattern->length();
3835
3836 for (;;) { // Break when search complete.
3837 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3838 AssertNoAllocation no_gc;
3839 String::FlatContent subject_content = subject->GetFlatContent();
3840 String::FlatContent pattern_content = pattern->GetFlatContent();
3841 if (subject_content.IsAscii()) {
3842 Vector<const char> subject_vector = subject_content.ToAsciiVector();
3843 if (pattern_content.IsAscii()) {
3844 if (SearchStringMultiple(isolate,
3845 subject_vector,
3846 pattern_content.ToAsciiVector(),
3847 *pattern,
3848 builder,
3849 &match_pos)) break;
3850 } else {
3851 if (SearchStringMultiple(isolate,
3852 subject_vector,
3853 pattern_content.ToUC16Vector(),
3854 *pattern,
3855 builder,
3856 &match_pos)) break;
3857 }
3858 } else {
3859 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
3860 if (pattern_content.IsAscii()) {
3861 if (SearchStringMultiple(isolate,
3862 subject_vector,
3863 pattern_content.ToAsciiVector(),
3864 *pattern,
3865 builder,
3866 &match_pos)) break;
3867 } else {
3868 if (SearchStringMultiple(isolate,
3869 subject_vector,
3870 pattern_content.ToUC16Vector(),
3871 *pattern,
3872 builder,
3873 &match_pos)) break;
3874 }
3875 }
3876 }
3877
3878 if (match_pos >= 0) {
3879 SetLastMatchInfoNoCaptures(subject,
3880 last_match_info,
3881 match_pos,
3882 match_pos + pattern->length());
3883 return true;
3884 }
3885 return false; // No matches at all.
3886 }
3887
3888
3889 static int SearchRegExpNoCaptureMultiple(
3890 Isolate* isolate, 3702 Isolate* isolate,
3891 Handle<String> subject, 3703 Handle<String> subject,
3892 Handle<JSRegExp> regexp, 3704 Handle<JSRegExp> regexp,
3893 Handle<JSArray> last_match_array, 3705 Handle<JSArray> last_match_array,
3894 FixedArrayBuilder* builder) { 3706 FixedArrayBuilder* builder) {
3895 ASSERT(subject->IsFlat()); 3707 ASSERT(subject->IsFlat());
3896 ASSERT(regexp->CaptureCount() == 0); 3708 ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
3709
3710 if (RegExpImpl::GlobalCacheInitialize(regexp, subject, true, isolate) ==
3711 RegExpImpl::RE_EXCEPTION) {
3712 return RegExpImpl::RE_EXCEPTION;
3713 }
3714
3715 int capture_count = regexp->CaptureCount();
3716 int subject_length = subject->length();
3717
3718 // Position to search from.
3897 int match_start = -1; 3719 int match_start = -1;
3898 int match_end = 0; 3720 int match_end = 0;
3899 int pos = 0; 3721 bool first = true;
3900 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3901 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3902 3722
3903 int max_matches; 3723 // Two smis before and after the match, for very long strings.
3904 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, 3724 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
3905 registers_per_match, 3725
3906 &max_matches); 3726 while (true) {
3907 OffsetsVector registers(num_registers, isolate); 3727 int32_t* current_match = RegExpImpl::GlobalCacheFetchNext(regexp, subject);
3908 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3728 if (current_match == NULL) break;
3909 int subject_length = subject->length(); 3729 match_start = current_match[0];
3910 bool first = true; 3730 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3911 for (;;) { // Break on failure, return on exception. 3731 if (match_end < match_start) {
3912 int num_matches = RegExpImpl::IrregexpExecRaw(regexp, 3732 ReplacementStringBuilder::AddSubjectSlice(builder,
3913 subject, 3733 match_end,
3914 pos, 3734 match_start);
3915 register_vector); 3735 }
3916 if (num_matches > 0) { 3736 match_end = current_match[1];
3917 for (int match_index = 0; match_index < num_matches; match_index++) { 3737 {
3918 int32_t* current_match = &register_vector[match_index * 2]; 3738 // Avoid accumulating new handles inside loop.
3919 match_start = current_match[0]; 3739 HandleScope temp_scope(isolate);
3920 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3740 Handle<String> match;
3921 if (match_end < match_start) { 3741 if (!first) {
3922 ReplacementStringBuilder::AddSubjectSlice(builder, 3742 match = isolate->factory()->NewProperSubString(subject,
3923 match_end, 3743 match_start,
3924 match_start); 3744 match_end);
3925 } 3745 } else {
3926 match_end = current_match[1]; 3746 match = isolate->factory()->NewSubString(subject,
3927 HandleScope loop_scope(isolate); 3747 match_start,
3928 if (!first) { 3748 match_end);
3929 builder->Add(*isolate->factory()->NewProperSubString(subject, 3749 first = false;
3930 match_start,
3931 match_end));
3932 } else {
3933 builder->Add(*isolate->factory()->NewSubString(subject,
3934 match_start,
3935 match_end));
3936 first = false;
3937 }
3938 } 3750 }
3939 3751
3940 // If we did not get the maximum number of matches, we can stop here 3752 if (has_capture) {
3941 // since there are no matches left. 3753 // Arguments array to replace function is match, captures, index and
3942 if (num_matches < max_matches) break; 3754 // subject, i.e., 3 + capture count in total.
3755 Handle<FixedArray> elements =
3756 isolate->factory()->NewFixedArray(3 + capture_count);
3943 3757
3944 if (match_start != match_end) { 3758 elements->set(0, *match);
3945 pos = match_end; 3759 for (int i = 1; i <= capture_count; i++) {
3760 int start = current_match[i * 2];
3761 if (start >= 0) {
3762 int end = current_match[i * 2 + 1];
3763 ASSERT(start <= end);
3764 Handle<String> substring =
3765 isolate->factory()->NewSubString(subject, start, end);
3766 elements->set(i, *substring);
3767 } else {
3768 ASSERT(current_match[i * 2 + 1] < 0);
3769 elements->set(i, isolate->heap()->undefined_value());
3770 }
3771 }
3772 elements->set(capture_count + 1, Smi::FromInt(match_start));
3773 elements->set(capture_count + 2, *subject);
3774 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3946 } else { 3775 } else {
3947 pos = match_end + 1; 3776 builder->Add(*match);
3948 if (pos > subject_length) break;
3949 } 3777 }
3950 } else if (num_matches == 0) {
3951 break;
3952 } else {
3953 ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
3954 return RegExpImpl::RE_EXCEPTION;
3955 } 3778 }
3956 } 3779 }
3957 3780
3781 if (RegExpImpl::GlobalCacheResult() == RegExpImpl::RE_EXCEPTION) {
3782 return RegExpImpl::RE_EXCEPTION;
3783 }
3784
3958 if (match_start >= 0) { 3785 if (match_start >= 0) {
3786 // Finished matching, with at least one match.
3959 if (match_end < subject_length) { 3787 if (match_end < subject_length) {
3960 ReplacementStringBuilder::AddSubjectSlice(builder, 3788 ReplacementStringBuilder::AddSubjectSlice(builder,
3961 match_end, 3789 match_end,
3962 subject_length); 3790 subject_length);
3963 } 3791 }
3964 SetLastMatchInfoNoCaptures(subject, 3792
3965 last_match_array, 3793 RegExpImpl::SetLastMatchInfo(
3966 match_start, 3794 last_match_array, subject, capture_count, NULL);
3967 match_end); 3795
3968 return RegExpImpl::RE_SUCCESS; 3796 return RegExpImpl::RE_SUCCESS;
3969 } else { 3797 } else {
3970 return RegExpImpl::RE_FAILURE; // No matches at all. 3798 return RegExpImpl::RE_FAILURE; // No matches at all.
3971 } 3799 }
3972 } 3800 }
3973 3801
3974 3802
3975 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3976 // separate last match info. See comment on that function.
3977 static int SearchRegExpMultiple(
3978 Isolate* isolate,
3979 Handle<String> subject,
3980 Handle<JSRegExp> regexp,
3981 Handle<JSArray> last_match_array,
3982 FixedArrayBuilder* builder,
3983 Zone* zone) {
3984
3985 ASSERT(subject->IsFlat());
3986 int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
3987 if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
3988
3989 int max_matches;
3990 int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
3991 registers_per_match,
3992 &max_matches);
3993 OffsetsVector registers(num_registers, isolate);
3994 Vector<int32_t> register_vector(registers.vector(), registers.length());
3995
3996 int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
3997 subject,
3998 0,
3999 register_vector);
4000
4001 int capture_count = regexp->CaptureCount();
4002 int subject_length = subject->length();
4003
4004 // Position to search from.
4005 int pos = 0;
4006 // End of previous match. Differs from pos if match was empty.
4007 int match_end = 0;
4008 bool first = true;
4009
4010 if (num_matches > 0) {
4011 do {
4012 int match_start = 0;
4013 for (int match_index = 0; match_index < num_matches; match_index++) {
4014 int32_t* current_match =
4015 &register_vector[match_index * registers_per_match];
4016 match_start = current_match[0];
4017 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
4018 if (match_end < match_start) {
4019 ReplacementStringBuilder::AddSubjectSlice(builder,
4020 match_end,
4021 match_start);
4022 }
4023 match_end = current_match[1];
4024
4025 {
4026 // Avoid accumulating new handles inside loop.
4027 HandleScope temp_scope(isolate);
4028 // Arguments array to replace function is match, captures, index and
4029 // subject, i.e., 3 + capture count in total.
4030 Handle<FixedArray> elements =
4031 isolate->factory()->NewFixedArray(3 + capture_count);
4032 Handle<String> match;
4033 if (!first) {
4034 match = isolate->factory()->NewProperSubString(subject,
4035 match_start,
4036 match_end);
4037 } else {
4038 match = isolate->factory()->NewSubString(subject,
4039 match_start,
4040 match_end);
4041 }
4042 elements->set(0, *match);
4043 for (int i = 1; i <= capture_count; i++) {
4044 int start = current_match[i * 2];
4045 if (start >= 0) {
4046 int end = current_match[i * 2 + 1];
4047 ASSERT(start <= end);
4048 Handle<String> substring;
4049 if (!first) {
4050 substring =
4051 isolate->factory()->NewProperSubString(subject, start, end);
4052 } else {
4053 substring =
4054 isolate->factory()->NewSubString(subject, start, end);
4055 }
4056 elements->set(i, *substring);
4057 } else {
4058 ASSERT(current_match[i * 2 + 1] < 0);
4059 elements->set(i, isolate->heap()->undefined_value());
4060 }
4061 }
4062 elements->set(capture_count + 1, Smi::FromInt(match_start));
4063 elements->set(capture_count + 2, *subject);
4064 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
4065 }
4066 first = false;
4067 }
4068
4069 // If we did not get the maximum number of matches, we can stop here
4070 // since there are no matches left.
4071 if (num_matches < max_matches) break;
4072
4073 if (match_end > match_start) {
4074 pos = match_end;
4075 } else {
4076 pos = match_end + 1;
4077 if (pos > subject_length) {
4078 break;
4079 }
4080 }
4081
4082 num_matches = RegExpImpl::IrregexpExecRaw(regexp,
4083 subject,
4084 pos,
4085 register_vector);
4086 } while (num_matches > 0);
4087
4088 if (num_matches != RegExpImpl::RE_EXCEPTION) {
4089 // Finished matching, with at least one match.
4090 if (match_end < subject_length) {
4091 ReplacementStringBuilder::AddSubjectSlice(builder,
4092 match_end,
4093 subject_length);
4094 }
4095
4096 int last_match_capture_count = (capture_count + 1) * 2;
4097 int last_match_array_size =
4098 last_match_capture_count + RegExpImpl::kLastMatchOverhead;
4099 last_match_array->EnsureSize(last_match_array_size);
4100 AssertNoAllocation no_gc;
4101 FixedArray* elements = FixedArray::cast(last_match_array->elements());
4102 // We have to set this even though the rest of the last match array is
4103 // ignored.
4104 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
4105 // These are also read without consulting the override.
4106 RegExpImpl::SetLastSubject(elements, *subject);
4107 RegExpImpl::SetLastInput(elements, *subject);
4108 return RegExpImpl::RE_SUCCESS;
4109 }
4110 }
4111 // No matches at all, return failure or exception result directly.
4112 return num_matches;
4113 }
4114
4115
4116 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets 3803 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets
4117 // lastMatchInfoOverride to maintain the last match info, so we don't need to 3804 // lastMatchInfoOverride to maintain the last match info, so we don't need to
4118 // set any other last match array info. 3805 // set any other last match array info.
4119 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { 3806 RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
4120 ASSERT(args.length() == 4); 3807 ASSERT(args.length() == 4);
4121 HandleScope handles(isolate); 3808 HandleScope handles(isolate);
4122 3809
4123 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); 3810 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
4124 if (!subject->IsFlat()) FlattenString(subject); 3811 if (!subject->IsFlat()) FlattenString(subject);
4125 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); 3812 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
4126 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); 3813 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
4127 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); 3814 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
4128 3815
4129 ASSERT(last_match_info->HasFastObjectElements()); 3816 ASSERT(last_match_info->HasFastObjectElements());
4130 ASSERT(regexp->GetFlags().is_global()); 3817 ASSERT(regexp->GetFlags().is_global());
4131 Handle<FixedArray> result_elements; 3818 Handle<FixedArray> result_elements;
4132 if (result_array->HasFastObjectElements()) { 3819 if (result_array->HasFastObjectElements()) {
4133 result_elements = 3820 result_elements =
4134 Handle<FixedArray>(FixedArray::cast(result_array->elements())); 3821 Handle<FixedArray>(FixedArray::cast(result_array->elements()));
4135 } 3822 }
4136 if (result_elements.is_null() || result_elements->length() < 16) { 3823 if (result_elements.is_null() || result_elements->length() < 16) {
4137 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); 3824 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
4138 } 3825 }
4139 FixedArrayBuilder builder(result_elements); 3826 FixedArrayBuilder builder(result_elements);
4140 3827
4141 if (regexp->TypeTag() == JSRegExp::ATOM) { 3828 int result;
4142 Handle<String> pattern( 3829 if (regexp->CaptureCount() == 0) {
4143 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); 3830 result = SearchRegExpMultiple<false>(
4144 ASSERT(pattern->IsFlat()); 3831 isolate, subject, regexp, last_match_info, &builder);
4145 if (SearchStringMultiple(isolate, subject, pattern, 3832 } else {
4146 last_match_info, &builder)) { 3833 result = SearchRegExpMultiple<true>(
4147 return *builder.ToJSArray(result_array); 3834 isolate, subject, regexp, last_match_info, &builder);
4148 }
4149 return isolate->heap()->null_value();
4150 } 3835 }
4151 3836
4152 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
4153
4154 int result;
4155 if (regexp->CaptureCount() == 0) {
4156 result = SearchRegExpNoCaptureMultiple(isolate,
4157 subject,
4158 regexp,
4159 last_match_info,
4160 &builder);
4161 } else {
4162 result = SearchRegExpMultiple(isolate,
4163 subject,
4164 regexp,
4165 last_match_info,
4166 &builder,
4167 isolate->runtime_zone());
4168 }
4169 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); 3837 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
4170 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); 3838 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
4171 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); 3839 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
4172 return Failure::Exception(); 3840 return Failure::Exception();
4173 } 3841 }
4174 3842
4175 3843
4176 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) { 3844 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) {
4177 NoHandleAllocation ha; 3845 NoHandleAllocation ha;
4178 ASSERT(args.length() == 2); 3846 ASSERT(args.length() == 2);
(...skipping 9536 matching lines...) Expand 10 before | Expand all | Expand 10 after
13715 // Handle last resort GC and make sure to allow future allocations 13383 // Handle last resort GC and make sure to allow future allocations
13716 // to grow the heap without causing GCs (if possible). 13384 // to grow the heap without causing GCs (if possible).
13717 isolate->counters()->gc_last_resort_from_js()->Increment(); 13385 isolate->counters()->gc_last_resort_from_js()->Increment();
13718 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13386 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13719 "Runtime::PerformGC"); 13387 "Runtime::PerformGC");
13720 } 13388 }
13721 } 13389 }
13722 13390
13723 13391
13724 } } // namespace v8::internal 13392 } } // namespace v8::internal
OLDNEW
« src/jsregexp.h ('K') | « src/mips/code-stubs-mips.cc ('k') | src/x64/code-stubs-x64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698