Chromium Code Reviews| Index: src/jsregexp.cc |
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
| index e730e145a4e41627512b75fb68fefa68cfe5f189..cfe4635bfeb1dc40aa5b528f4ac71a57a1cd2814 100644 |
| --- a/src/jsregexp.cc |
| +++ b/src/jsregexp.cc |
| @@ -278,11 +278,12 @@ static void SetAtomLastCapture(FixedArray* array, |
| } |
| -Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| - Handle<String> subject, |
| - int index, |
| - Handle<JSArray> last_match_info) { |
| - Isolate* isolate = re->GetIsolate(); |
| +int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp, |
| + Handle<String> subject, |
| + int index, |
| + int32_t* output, |
| + int output_size) { |
| + Isolate* isolate = regexp->GetIsolate(); |
| ASSERT(0 <= index); |
| ASSERT(index <= subject->length()); |
| @@ -290,15 +291,16 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| if (!subject->IsFlat()) FlattenString(subject); |
| AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| - String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
| + String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
| int needle_len = needle->length(); |
| ASSERT(needle->IsFlat()); |
| + ASSERT_LT(0, needle_len); |
| - if (needle_len != 0) { |
| - if (index + needle_len > subject->length()) { |
| - return isolate->factory()->null_value(); |
| - } |
| + if (index + needle_len > subject->length()) { |
| + return RegExpImpl::RE_FAILURE; |
| + } |
| + for (int i = 0; i < output_size; i += 2) { |
| String::FlatContent needle_content = needle->GetFlatContent(); |
| String::FlatContent subject_content = subject->GetFlatContent(); |
| ASSERT(needle_content.IsFlat()); |
| @@ -323,15 +325,36 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| subject_content.ToUC16Vector(), |
| needle_content.ToUC16Vector(), |
| index))); |
| - if (index == -1) return isolate->factory()->null_value(); |
| + if (index == -1) { |
| + return i / 2; // Return number of matches. |
| + } else { |
| + output[i] = index; |
| + output[i+1] = index + needle_len; |
| + index += needle_len; |
| + } |
| } |
| - ASSERT(last_match_info->HasFastObjectElements()); |
| + return output_size / 2; |
| +} |
| - { |
| - NoHandleAllocation no_handles; |
| - FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| - SetAtomLastCapture(array, *subject, index, index + needle_len); |
| - } |
| + |
| +Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| + Handle<String> subject, |
| + int index, |
| + Handle<JSArray> last_match_info) { |
| + Isolate* isolate = re->GetIsolate(); |
| + |
| + static const int kNumRegisters = 2; |
| + STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); |
| + int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); |
| + |
| + int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); |
| + |
| + if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); |
| + |
| + ASSERT_EQ(res, RegExpImpl::RE_SUCCESS); |
| + NoHandleAllocation no_handles; |
| + FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| + SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); |
| return last_match_info; |
| } |
| @@ -511,7 +534,11 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| #ifdef V8_INTERPRETED_REGEXP |
| // Byte-code regexp needs space allocated for all its registers. |
| - return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| + // The result captures are copied to the start of the registers array |
| + // if the match succeeds. This way those registers are not clobbered |
| + // when we set the last match info from last successful match. |
| + return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + |
| + (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| #else // V8_INTERPRETED_REGEXP |
| // Native regexp only needs room to output captures. Registers are handled |
| // internally. |
| @@ -520,27 +547,11 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| } |
| -int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, |
| - int registers_per_match, |
| - int* max_matches) { |
| -#ifdef V8_INTERPRETED_REGEXP |
| - // Global loop in interpreted regexp is not implemented. Therefore we choose |
| - // the size of the offsets vector so that it can only store one match. |
| - *max_matches = 1; |
| - return registers_per_match; |
| -#else // V8_INTERPRETED_REGEXP |
| - int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize); |
| - *max_matches = size / registers_per_match; |
| - return size; |
| -#endif // V8_INTERPRETED_REGEXP |
| -} |
| - |
| - |
| -int RegExpImpl::IrregexpExecRaw( |
| - Handle<JSRegExp> regexp, |
| - Handle<String> subject, |
| - int index, |
| - Vector<int> output) { |
| +int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, |
| + Handle<String> subject, |
| + int index, |
| + int32_t* output, |
| + int output_size) { |
| Isolate* isolate = regexp->GetIsolate(); |
| Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
| @@ -552,15 +563,19 @@ int RegExpImpl::IrregexpExecRaw( |
| bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| #ifndef V8_INTERPRETED_REGEXP |
| - ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| + ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| do { |
| EnsureCompiledIrregexp(regexp, subject, is_ascii); |
| Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| + // The stack is used to allocate registers for the compiled regexp code. |
| + // This means that in case of failure, the output registers array is left |
| + // untouched and contains the capture results from the previous successful |
| + // match. We can use that to set the last match info lazily. |
| NativeRegExpMacroAssembler::Result res = |
| NativeRegExpMacroAssembler::Match(code, |
| subject, |
| - output.start(), |
| - output.length(), |
| + output, |
| + output_size, |
| index, |
| isolate); |
| if (res != NativeRegExpMacroAssembler::RETRY) { |
| @@ -587,22 +602,29 @@ int RegExpImpl::IrregexpExecRaw( |
| return RE_EXCEPTION; |
| #else // V8_INTERPRETED_REGEXP |
| - ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| + ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp)); |
| // We must have done EnsureCompiledIrregexp, so we can get the number of |
| // registers. |
| - int* register_vector = output.start(); |
| int number_of_capture_registers = |
| (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| + int32_t* raw_output = &output[number_of_capture_registers]; |
| + // We do not touch the actual capture result registers until we know there |
| + // has been a match so that we can use those capture results to set the |
| + // last match info. |
| for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| - register_vector[i] = -1; |
| + raw_output[i] = -1; |
| } |
| Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
| IrregexpResult result = IrregexpInterpreter::Match(isolate, |
| byte_codes, |
| subject, |
| - register_vector, |
| + raw_output, |
| index); |
| + if (result == RE_SUCCESS) { |
| + // Copy capture results to the start of the registers array. |
| + memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); |
| + } |
| if (result == RE_EXCEPTION) { |
| ASSERT(!isolate->has_pending_exception()); |
| isolate->StackOverflow(); |
| @@ -612,50 +634,42 @@ int RegExpImpl::IrregexpExecRaw( |
| } |
| -Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| +Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| int previous_index, |
| Handle<JSArray> last_match_info) { |
| - Isolate* isolate = jsregexp->GetIsolate(); |
| - ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| + Isolate* isolate = regexp->GetIsolate(); |
| + ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| // Prepare space for the return values. |
| -#ifdef V8_INTERPRETED_REGEXP |
| -#ifdef DEBUG |
| +#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) |
| if (FLAG_trace_regexp_bytecodes) { |
| - String* pattern = jsregexp->Pattern(); |
| + String* pattern = regexp->Pattern(); |
| PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| } |
| #endif |
| -#endif |
| - int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| + int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); |
| if (required_registers < 0) { |
| // Compiling failed with an exception. |
| ASSERT(isolate->has_pending_exception()); |
| return Handle<Object>::null(); |
| } |
| - OffsetsVector registers(required_registers, isolate); |
| + int32_t* output_registers; |
| + if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| + output_registers = NewArray<int32_t>(required_registers); |
|
ulan
2012/08/03 11:58:05
Where does this array get released?
|
| + } else { |
| + output_registers = isolate->jsregexp_static_offsets_vector(); |
| + } |
| - int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, |
| - Vector<int>(registers.vector(), |
| - registers.length())); |
| + int res = RegExpImpl::IrregexpExecRaw( |
| + regexp, subject, previous_index, output_registers, required_registers); |
| if (res == RE_SUCCESS) { |
| - int capture_register_count = |
| - (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| - last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| - AssertNoAllocation no_gc; |
| - int* register_vector = registers.vector(); |
| - FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| - for (int i = 0; i < capture_register_count; i += 2) { |
| - SetCapture(array, i, register_vector[i]); |
| - SetCapture(array, i + 1, register_vector[i + 1]); |
| - } |
| - SetLastCaptureCount(array, capture_register_count); |
| - SetLastSubject(array, *subject); |
| - SetLastInput(array, *subject); |
| - return last_match_info; |
| + int capture_count = |
| + IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); |
| + return SetLastMatchInfo( |
| + last_match_info, subject, capture_count, output_registers); |
| } |
| if (res == RE_EXCEPTION) { |
| ASSERT(isolate->has_pending_exception()); |
| @@ -666,6 +680,139 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| } |
| +Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info, |
| + Handle<String> subject, |
| + int capture_count, |
| + int32_t* match) { |
| + int capture_register_count = (capture_count + 1) * 2; |
| + last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| + AssertNoAllocation no_gc; |
| + FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| + if (match != NULL) { |
| + for (int i = 0; i < capture_register_count; i += 2) { |
| + SetCapture(array, i, match[i]); |
| + SetCapture(array, i + 1, match[i + 1]); |
| + } |
| + } |
| + SetLastCaptureCount(array, capture_register_count); |
| + SetLastSubject(array, *subject); |
| + SetLastInput(array, *subject); |
| + return last_match_info; |
| +} |
| + |
| + |
| +RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, |
| + Handle<String> subject, |
| + bool is_global, |
| + Isolate* isolate) { |
| +#ifdef V8_INTERPRETED_REGEXP |
| + bool interpreted = true; |
| +#else |
| + bool interpreted = false; |
| +#endif // V8_INTERPRETED_REGEXP |
| + |
| + regexp_ = regexp; |
| + subject_ = subject; |
| + |
| + if (regexp_->TypeTag() == JSRegExp::ATOM) { |
| + static const int kAtomRegistersPerMatch = 2; |
| + registers_per_match_ = kAtomRegistersPerMatch; |
| + // There is no distinction between interpreted and native for atom regexps. |
| + interpreted = false; |
| + } else { |
| + registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); |
| + if (registers_per_match_ < 0) { |
| + num_matches_ = -1; // Signal exception. |
| + return; |
| + } |
| + } |
| + |
| + if (is_global && !interpreted) { |
| + register_array_size_ = |
| + Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); |
| + max_matches_ = register_array_size_ / registers_per_match_; |
| + } else { |
| + // Global loop in interpreted regexp is not implemented. We choose |
| + // the size of the offsets vector so that it can only store one match. |
| + register_array_size_ = registers_per_match_; |
| + max_matches_ = 1; |
| + } |
| + |
| + if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| + register_array_ = NewArray<int32_t>(register_array_size_); |
| + } else { |
| + register_array_ = isolate->jsregexp_static_offsets_vector(); |
| + } |
| + |
| + // Set state so that fetching the results the first time triggers a call |
| + // to the compiled regexp. |
| + current_match_index_ = max_matches_; |
| + num_matches_ = max_matches_; |
| + int32_t* last_match = |
| + ®ister_array_[register_array_size_ - registers_per_match_]; |
| + last_match[0] = -1; |
|
ulan
2012/08/03 11:58:05
This assumes that registers_per_match_ >= 2. If th
Yang
2012/08/03 12:58:22
Registers generally don't need initializing. I ini
|
| + last_match[1] = 0; |
| +} |
| + |
| + |
| +RegExpImpl::GlobalCache::~GlobalCache() { |
| + // Deallocate the register array if we allocated it in the constructor |
| + // (as opposed to using the existing jsregexp_static_offsets_vector). |
| + if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| + DeleteArray(register_array_); |
| + } |
| +} |
| + |
| + |
| +int32_t* RegExpImpl::GlobalCache::FetchNext() { |
| + current_match_index_++; |
| + if (current_match_index_ >= num_matches_) { |
| + // Current batch of results exhausted. |
| + // Fail if last batch was not even fully filled. |
| + if (num_matches_ < max_matches_) { |
| + num_matches_ = 0; // Signal failed match. |
| + return NULL; |
| + } |
| + |
| + int32_t* last_match = ®ister_array_[register_array_size_ |
| + - registers_per_match_]; |
| + int last_end_index = last_match[1]; |
| + |
| + if (regexp_->TypeTag() == JSRegExp::ATOM) { |
| + num_matches_ = RegExpImpl::AtomExecRaw(regexp_, subject_, last_end_index, |
| + register_array_, register_array_size_); |
| + } else { |
| + int last_start_index = last_match[0]; |
| + if (last_start_index == last_end_index) |
| + last_end_index++; |
| + if (last_end_index > subject_->length()) { |
| + num_matches_ = 0; // Signal failed match. |
| + return NULL; |
| + } |
| + num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_, subject_, |
| + last_end_index, register_array_, register_array_size_); |
| + } |
| + |
| + if (num_matches_ <= 0) |
| + return NULL; |
| + current_match_index_ = 0; |
| + return register_array_; |
| + } else { |
| + return ®ister_array_[current_match_index_ * registers_per_match_]; |
| + } |
| +} |
| + |
| + |
| +int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() { |
| + int index = current_match_index_ * registers_per_match_; |
| + if (num_matches_ == 0) { |
| + // After a failed match we shift back by one result. |
| + index -= registers_per_match_; |
| + } |
| + return ®ister_array_[index]; |
| +} |
| + |
| + |
| // ------------------------------------------------------------------- |
| // Implementation of the Irregexp regular expression engine. |
| // |