Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/runtime.cc

Issue 10386090: Implement loop for global regexps in regexp assembler. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 3777 matching lines...) Expand 10 before | Expand all | Expand 10 after
3788 SetLastMatchInfoNoCaptures(subject, 3788 SetLastMatchInfoNoCaptures(subject,
3789 last_match_info, 3789 last_match_info,
3790 match_pos, 3790 match_pos,
3791 match_pos + pattern->length()); 3791 match_pos + pattern->length());
3792 return true; 3792 return true;
3793 } 3793 }
3794 return false; // No matches at all. 3794 return false; // No matches at all.
3795 } 3795 }
3796 3796
3797 3797
3798 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( 3798 static int SearchRegExpNoCaptureMultiple(
Erik Corry 2012/05/11 11:01:00 I wonder if we can unify this and SearchRegExpMult
Yang 2012/05/16 14:58:47 I'll try this in another CL.
3799 Isolate* isolate, 3799 Isolate* isolate,
3800 Handle<String> subject, 3800 Handle<String> subject,
3801 Handle<JSRegExp> regexp, 3801 Handle<JSRegExp> regexp,
3802 Handle<JSArray> last_match_array, 3802 Handle<JSArray> last_match_array,
3803 FixedArrayBuilder* builder) { 3803 FixedArrayBuilder* builder) {
3804 ASSERT(subject->IsFlat()); 3804 ASSERT(subject->IsFlat());
3805 int match_start = -1; 3805 int match_start = -1;
3806 int match_end = 0; 3806 int match_end = 0;
3807 int pos = 0; 3807 int pos = 0;
3808 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); 3808 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
3809 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; 3809 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
3810 3810
3811 OffsetsVector registers(required_registers, isolate); 3811 OffsetsVector registers(OffsetsVector::kStaticOffsetsVectorSize, isolate);
3812 static const int max_result_sets =
3813 OffsetsVector::kStaticOffsetsVectorSize / 2;
3812 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3814 Vector<int32_t> register_vector(registers.vector(), registers.length());
3813 int subject_length = subject->length(); 3815 int subject_length = subject->length();
3814 bool first = true; 3816 bool first = true;
3815
3816 for (;;) { // Break on failure, return on exception. 3817 for (;;) { // Break on failure, return on exception.
3817 RegExpImpl::IrregexpResult result = 3818 int result =
Erik Corry 2012/05/11 11:01:00 How about giving this variable a name like number_
Yang 2012/05/16 14:58:47 Done.
3818 RegExpImpl::IrregexpExecOnce(regexp, 3819 RegExpImpl::IrregexpExecOnce(regexp,
3819 subject, 3820 subject,
3820 pos, 3821 pos,
3821 register_vector); 3822 register_vector);
3822 if (result == RegExpImpl::RE_SUCCESS) { 3823 if (result >= RegExpImpl::RE_SUCCESS) {
3823 match_start = register_vector[0]; 3824 for (int result_index = 0; result_index < result; result_index++) {
3824 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3825 int32_t* current_result_set = &register_vector[result_index * 2];
3825 if (match_end < match_start) { 3826 match_start = current_result_set[0];
3826 ReplacementStringBuilder::AddSubjectSlice(builder, 3827 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3827 match_end, 3828 if (match_end < match_start) {
3828 match_start); 3829 ReplacementStringBuilder::AddSubjectSlice(builder,
3830 match_end,
3831 match_start);
3832 }
3833 match_end = current_result_set[1];
3834 HandleScope loop_scope(isolate);
3835 if (!first) {
3836 builder->Add(*isolate->factory()->NewProperSubString(subject,
3837 match_start,
3838 match_end));
3839 } else {
3840 builder->Add(*isolate->factory()->NewSubString(subject,
3841 match_start,
3842 match_end));
3843 }
3844 first = false;
3829 } 3845 }
3830 match_end = register_vector[1]; 3846
3831 HandleScope loop_scope(isolate); 3847 // If we did not get the maximum number of resultsets, we can stop here
3832 if (!first) { 3848 // since there are no matches left.
3833 builder->Add(*isolate->factory()->NewProperSubString(subject, 3849 if (result < max_result_sets) break;
3834 match_start, 3850
3835 match_end));
3836 } else {
3837 builder->Add(*isolate->factory()->NewSubString(subject,
3838 match_start,
3839 match_end));
3840 }
3841 if (match_start != match_end) { 3851 if (match_start != match_end) {
Erik Corry 2012/05/11 11:01:00 I don't see this logic replicated in the regexp-ma
3842 pos = match_end; 3852 pos = match_end;
3843 } else { 3853 } else {
3844 pos = match_end + 1; 3854 pos = match_end + 1;
3845 if (pos > subject_length) break; 3855 if (pos > subject_length) {
Erik Corry 2012/05/11 11:01:00 unneeded edit
3856 break;
3857 }
3846 } 3858 }
3847 } else if (result == RegExpImpl::RE_FAILURE) { 3859 } else if (result == RegExpImpl::RE_FAILURE) {
3848 break; 3860 break;
3849 } else { 3861 } else {
3850 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); 3862 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
3851 return result; 3863 return result;
3852 } 3864 }
3853 first = false;
3854 } 3865 }
3855 3866
3856 if (match_start >= 0) { 3867 if (match_start >= 0) {
3857 if (match_end < subject_length) { 3868 if (match_end < subject_length) {
3858 ReplacementStringBuilder::AddSubjectSlice(builder, 3869 ReplacementStringBuilder::AddSubjectSlice(builder,
3859 match_end, 3870 match_end,
3860 subject_length); 3871 subject_length);
3861 } 3872 }
3862 SetLastMatchInfoNoCaptures(subject, 3873 SetLastMatchInfoNoCaptures(subject,
3863 last_match_array, 3874 last_match_array,
3864 match_start, 3875 match_start,
3865 match_end); 3876 match_end);
3866 return RegExpImpl::RE_SUCCESS; 3877 return RegExpImpl::RE_SUCCESS;
3867 } else { 3878 } else {
3868 return RegExpImpl::RE_FAILURE; // No matches at all. 3879 return RegExpImpl::RE_FAILURE; // No matches at all.
3869 } 3880 }
3870 } 3881 }
3871 3882
3872 3883
3873 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 3884 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
3874 // separate last match info. See comment on that function. 3885 // separate last match info. See comment on that function.
3875 static RegExpImpl::IrregexpResult SearchRegExpMultiple( 3886 static int SearchRegExpMultiple(
3876 Isolate* isolate, 3887 Isolate* isolate,
3877 Handle<String> subject, 3888 Handle<String> subject,
3878 Handle<JSRegExp> regexp, 3889 Handle<JSRegExp> regexp,
3879 Handle<JSArray> last_match_array, 3890 Handle<JSArray> last_match_array,
3880 FixedArrayBuilder* builder) { 3891 FixedArrayBuilder* builder) {
3881 3892
3882 ASSERT(subject->IsFlat()); 3893 ASSERT(subject->IsFlat());
3883 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); 3894 int registers_per_result_set = RegExpImpl::IrregexpPrepare(regexp, subject);
3884 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; 3895 if (registers_per_result_set < 0) return RegExpImpl::RE_EXCEPTION;
3885 3896
3886 OffsetsVector registers(required_registers, isolate); 3897 int num_registers = Min(OffsetsVector::kStaticOffsetsVectorSize,
3898 registers_per_result_set);
3899 int max_result_sets = num_registers / registers_per_result_set;
3900 OffsetsVector registers(OffsetsVector::kStaticOffsetsVectorSize, isolate);
3887 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3901 Vector<int32_t> register_vector(registers.vector(), registers.length());
3888 3902
3889 RegExpImpl::IrregexpResult result = 3903 int result = RegExpImpl::IrregexpExecOnce(regexp,
3890 RegExpImpl::IrregexpExecOnce(regexp, 3904 subject,
3891 subject, 3905 0,
3892 0, 3906 register_vector);
3893 register_vector);
3894 3907
3895 int capture_count = regexp->CaptureCount(); 3908 int capture_count = regexp->CaptureCount();
3896 int subject_length = subject->length(); 3909 int subject_length = subject->length();
3897 3910
3898 // Position to search from. 3911 // Position to search from.
3899 int pos = 0; 3912 int pos = 0;
3900 // End of previous match. Differs from pos if match was empty. 3913 // End of previous match. Differs from pos if match was empty.
3901 int match_end = 0; 3914 int match_end = 0;
3902 if (result == RegExpImpl::RE_SUCCESS) { 3915 if (result >= RegExpImpl::RE_SUCCESS) {
3903 bool first = true; 3916 bool first = true;
3904 do { 3917 do {
3905 int match_start = register_vector[0]; 3918 int match_start = 0;
3906 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 3919 for (int result_index = 0; result_index < result; result_index++) {
3907 if (match_end < match_start) { 3920 int32_t* current_result_set =
3908 ReplacementStringBuilder::AddSubjectSlice(builder, 3921 &register_vector[result_index * registers_per_result_set];
3909 match_end, 3922 match_start = current_result_set[0];
3910 match_start); 3923 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3924 if (match_end < match_start) {
3925 ReplacementStringBuilder::AddSubjectSlice(builder,
3926 match_end,
3927 match_start);
3928 }
3929 match_end = current_result_set[1];
3930
3931 {
3932 // Avoid accumulating new handles inside loop.
3933 HandleScope temp_scope(isolate);
3934 // Arguments array to replace function is match, captures, index and
3935 // subject, i.e., 3 + capture count in total.
3936 Handle<FixedArray> elements =
3937 isolate->factory()->NewFixedArray(3 + capture_count);
3938 Handle<String> match;
3939 if (!first) {
3940 match = isolate->factory()->NewProperSubString(subject,
Erik Corry 2012/05/11 11:01:00 Not sure why we need NewProperSubString here.
Yang 2012/05/16 14:58:47 For every substring save for the first one we know
3941 match_start,
3942 match_end);
3943 } else {
3944 match = isolate->factory()->NewSubString(subject,
3945 match_start,
3946 match_end);
3947 }
3948 elements->set(0, *match);
3949 for (int i = 1; i <= capture_count; i++) {
Erik Corry 2012/05/11 11:01:00 I think you could just start this loop at 0 and re
Yang 2012/05/16 14:58:47 After removing the call to NewSubString here (firs
3950 int start = current_result_set[i * 2];
3951 if (start >= 0) {
3952 int end = current_result_set[i * 2 + 1];
3953 ASSERT(start <= end);
3954 Handle<String> substring;
3955 if (!first) {
3956 substring = isolate->factory()->NewProperSubString(subject,
3957 start,
3958 end);
3959 } else {
3960 substring = isolate->factory()->NewSubString(subject,
3961 start,
3962 end);
3963 }
3964 elements->set(i, *substring);
3965 } else {
3966 ASSERT(current_result_set[i * 2 + 1] < 0);
3967 elements->set(i, isolate->heap()->undefined_value());
3968 }
3969 }
3970 elements->set(capture_count + 1, Smi::FromInt(match_start));
3971 elements->set(capture_count + 2, *subject);
3972 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3973 }
3911 } 3974 }
3912 match_end = register_vector[1];
3913 3975
3914 { 3976 // If we did not get the maximum number of resultsets, we can stop here
3915 // Avoid accumulating new handles inside loop. 3977 // since there are no matches left.
3916 HandleScope temp_scope(isolate); 3978 if (result < max_result_sets) break;
3917 // Arguments array to replace function is match, captures, index and
3918 // subject, i.e., 3 + capture count in total.
3919 Handle<FixedArray> elements =
3920 isolate->factory()->NewFixedArray(3 + capture_count);
3921 Handle<String> match;
3922 if (!first) {
3923 match = isolate->factory()->NewProperSubString(subject,
3924 match_start,
3925 match_end);
3926 } else {
3927 match = isolate->factory()->NewSubString(subject,
3928 match_start,
3929 match_end);
3930 }
3931 elements->set(0, *match);
3932 for (int i = 1; i <= capture_count; i++) {
3933 int start = register_vector[i * 2];
3934 if (start >= 0) {
3935 int end = register_vector[i * 2 + 1];
3936 ASSERT(start <= end);
3937 Handle<String> substring;
3938 if (!first) {
3939 substring = isolate->factory()->NewProperSubString(subject,
3940 start,
3941 end);
3942 } else {
3943 substring = isolate->factory()->NewSubString(subject, start, end);
3944 }
3945 elements->set(i, *substring);
3946 } else {
3947 ASSERT(register_vector[i * 2 + 1] < 0);
3948 elements->set(i, isolate->heap()->undefined_value());
3949 }
3950 }
3951 elements->set(capture_count + 1, Smi::FromInt(match_start));
3952 elements->set(capture_count + 2, *subject);
3953 builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
3954 }
3955 3979
3956 if (match_end > match_start) { 3980 if (match_end > match_start) {
3957 pos = match_end; 3981 pos = match_end;
3958 } else { 3982 } else {
3959 pos = match_end + 1; 3983 pos = match_end + 1;
3960 if (pos > subject_length) { 3984 if (pos > subject_length) {
3961 break; 3985 break;
3962 } 3986 }
3963 } 3987 }
3964 3988
3965 result = RegExpImpl::IrregexpExecOnce(regexp, 3989 result = RegExpImpl::IrregexpExecOnce(regexp,
3966 subject, 3990 subject,
3967 pos, 3991 pos,
3968 register_vector); 3992 register_vector);
3969 first = false; 3993 first = false;
3970 } while (result == RegExpImpl::RE_SUCCESS); 3994 } while (result >= RegExpImpl::RE_SUCCESS);
3971 3995
3972 if (result != RegExpImpl::RE_EXCEPTION) { 3996 if (result != RegExpImpl::RE_EXCEPTION) {
3973 // Finished matching, with at least one match. 3997 // Finished matching, with at least one match.
3974 if (match_end < subject_length) { 3998 if (match_end < subject_length) {
3975 ReplacementStringBuilder::AddSubjectSlice(builder, 3999 ReplacementStringBuilder::AddSubjectSlice(builder,
3976 match_end, 4000 match_end,
3977 subject_length); 4001 subject_length);
3978 } 4002 }
3979 4003
3980 int last_match_capture_count = (capture_count + 1) * 2; 4004 int last_match_capture_count = (capture_count + 1) * 2;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
4028 ASSERT(pattern->IsFlat()); 4052 ASSERT(pattern->IsFlat());
4029 if (SearchStringMultiple(isolate, subject, pattern, 4053 if (SearchStringMultiple(isolate, subject, pattern,
4030 last_match_info, &builder)) { 4054 last_match_info, &builder)) {
4031 return *builder.ToJSArray(result_array); 4055 return *builder.ToJSArray(result_array);
4032 } 4056 }
4033 return isolate->heap()->null_value(); 4057 return isolate->heap()->null_value();
4034 } 4058 }
4035 4059
4036 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 4060 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
4037 4061
4038 RegExpImpl::IrregexpResult result; 4062 int result;
4039 if (regexp->CaptureCount() == 0) { 4063 if (regexp->CaptureCount() == 0) {
4040 result = SearchRegExpNoCaptureMultiple(isolate, 4064 result = SearchRegExpNoCaptureMultiple(isolate,
4041 subject, 4065 subject,
4042 regexp, 4066 regexp,
4043 last_match_info, 4067 last_match_info,
4044 &builder); 4068 &builder);
4045 } else { 4069 } else {
4046 result = SearchRegExpMultiple(isolate, 4070 result = SearchRegExpMultiple(isolate,
4047 subject, 4071 subject,
4048 regexp, 4072 regexp,
4049 last_match_info, 4073 last_match_info,
4050 &builder); 4074 &builder);
4051 } 4075 }
4052 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); 4076 if (result >= RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
4053 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); 4077 if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
4054 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); 4078 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
4055 return Failure::Exception(); 4079 return Failure::Exception();
4056 } 4080 }
4057 4081
4058 4082
4059 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) { 4083 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberToRadixString) {
4060 NoHandleAllocation ha; 4084 NoHandleAllocation ha;
4061 ASSERT(args.length() == 2); 4085 ASSERT(args.length() == 2);
4062 CONVERT_SMI_ARG_CHECKED(radix, 1); 4086 CONVERT_SMI_ARG_CHECKED(radix, 1);
(...skipping 9442 matching lines...) Expand 10 before | Expand all | Expand 10 after
13505 // Handle last resort GC and make sure to allow future allocations 13529 // Handle last resort GC and make sure to allow future allocations
13506 // to grow the heap without causing GCs (if possible). 13530 // to grow the heap without causing GCs (if possible).
13507 isolate->counters()->gc_last_resort_from_js()->Increment(); 13531 isolate->counters()->gc_last_resort_from_js()->Increment();
13508 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13532 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13509 "Runtime::PerformGC"); 13533 "Runtime::PerformGC");
13510 } 13534 }
13511 } 13535 }
13512 13536
13513 13537
13514 } } // namespace v8::internal 13538 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698