Chromium Code Reviews| Index: src/jsregexp.cc |
| =================================================================== |
| --- src/jsregexp.cc (revision 1374) |
| +++ src/jsregexp.cc (working copy) |
| @@ -254,14 +254,33 @@ |
| } |
| +static void EnsureSize(Handle<JSArray> lastMatchInfo, |
|
Christian Plesner Hansen
2009/02/26 15:18:53
You might consider making this a method on JSArray
|
| + int num_capture_registers) { |
| + CHECK(lastMatchInfo->HasFastElements()); |
| + if (lastMatchInfo->elements()->length() >= |
| + num_capture_registers + RegExpImpl::kLastMatchOverhead) return; |
| + Handle<FixedArray> old_backing(lastMatchInfo->elements()); |
| + int old_size = old_backing->length(); |
| + int new_size = num_capture_registers + RegExpImpl::kLastMatchOverhead; |
| + // Doubling in size would be overkill, but leave some slack to avoid |
| + // constantly growing. |
| + new_size += new_size >> 3; |
| + Handle<FixedArray> new_backing = Factory::NewFixedArray(new_size); |
| + for (int i = 0; i < old_size; i++) new_backing->set(i, old_backing->get(i)); |
| + lastMatchInfo->SetContent(*new_backing); |
| +} |
| + |
| + |
| Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + Smi* index, |
|
Christian Plesner Hansen
2009/02/26 15:18:53
Consider converting the index to a C right away --
|
| + Handle<JSArray> lastMatchInfo) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::ATOM: |
| - return AtomExec(regexp, subject, index); |
| + return AtomExec(regexp, subject, index, lastMatchInfo); |
| case JSRegExp::IRREGEXP: { |
| - Handle<Object> result = IrregexpExec(regexp, subject, index); |
| + Handle<Object> result = |
| + IrregexpExec(regexp, subject, index, lastMatchInfo); |
| ASSERT(!result.is_null() || Top::has_pending_exception()); |
| return result; |
| } |
| @@ -273,12 +292,14 @@ |
| Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> lastMatchInfo) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::ATOM: |
| - return AtomExecGlobal(regexp, subject); |
| + return AtomExecGlobal(regexp, subject, lastMatchInfo); |
| case JSRegExp::IRREGEXP: { |
| - Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
| + Handle<Object> result = |
| + IrregexpExecGlobal(regexp, subject, lastMatchInfo); |
| ASSERT(!result.is_null() || Top::has_pending_exception()); |
| return result; |
| } |
| @@ -303,49 +324,70 @@ |
| Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + Smi* index, |
| + Handle<JSArray> lastMatchInfo) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| - uint32_t start_index; |
| - if (!Array::IndexFromObject(*index, &start_index)) { |
| - return Handle<Smi>(Smi::FromInt(-1)); |
| - } |
| + uint32_t start_index = index->value(); |
| int value = Runtime::StringMatch(subject, needle, start_index); |
| if (value == -1) return Factory::null_value(); |
| + CHECK(lastMatchInfo->HasFastElements()); |
| - Handle<FixedArray> array = Factory::NewFixedArray(2); |
| - array->set(0, Smi::FromInt(value)); |
| - array->set(1, Smi::FromInt(value + needle->length())); |
| - return Factory::NewJSArrayWithElements(array); |
| + Handle<FixedArray> array(lastMatchInfo->elements()); |
| + SetLastCaptureCount(*array, 2); |
|
Christian Plesner Hansen
2009/02/26 15:18:53
Could this sequence of 'Set's be packed into a fun
|
| + SetLastSubject(*array, *subject); |
| + SetLastInput(*array, *subject); |
| + SetCapture(*array, 0, value); |
| + SetCapture(*array, 1, value + needle->length()); |
| + return lastMatchInfo; |
| } |
| Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> lastMatchInfo) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| + CHECK(lastMatchInfo->HasFastElements()); |
|
Christian Plesner Hansen
2009/02/26 15:18:53
Don't you mean ASSERT?
|
| Handle<JSArray> result = Factory::NewJSArray(1); |
| int index = 0; |
| int match_count = 0; |
| int subject_length = subject->length(); |
| int needle_length = needle->length(); |
| + int last_value = -1; |
| while (true) { |
| int value = -1; |
| if (index + needle_length <= subject_length) { |
| value = Runtime::StringMatch(subject, needle, index); |
| } |
| - if (value == -1) break; |
| + if (value == -1) { |
| + if (last_value != -1) { |
| + Handle<FixedArray> array(lastMatchInfo->elements()); |
|
Christian Plesner Hansen
2009/02/26 15:18:53
You're creating an unbounded number of handles her
|
| + SetLastCaptureCount(*array, 2); |
| + SetLastSubject(*array, *subject); |
| + SetLastInput(*array, *subject); |
| + SetCapture(*array, 0, last_value); |
| + SetCapture(*array, 1, last_value + needle->length()); |
| + } |
| + break; |
| + } |
| + |
| HandleScope scope; |
| int end = value + needle_length; |
| - Handle<FixedArray> array = Factory::NewFixedArray(2); |
| - array->set(0, Smi::FromInt(value)); |
| - array->set(1, Smi::FromInt(end)); |
| + // Create an array that looks like the static lastMatchInfo array |
| + // that is attached to the global RegExp object. We will be returning |
| + // an array of these. |
| + Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); |
| + SetCapture(*array, 0, value); |
| + SetCapture(*array, 1, end); |
| + SetLastCaptureCount(*array, 2); |
| Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| SetElement(result, match_count, pair); |
| match_count++; |
| index = end; |
| if (needle_length == 0) index++; |
| + last_value = value; |
| } |
| return result; |
| } |
| @@ -445,7 +487,8 @@ |
| Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + Smi* index, |
| + Handle<JSArray> lastMatchInfo) { |
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| @@ -457,11 +500,10 @@ |
| } |
| // Prepare space for the return values. |
| - int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| - OffsetsVector offsets(number_of_registers); |
| + int number_of_capture_registers = |
| + (IrregexpNumberOfCaptures(irregexp) + 1) * 2; |
| + OffsetsVector offsets(number_of_capture_registers); |
| - int num_captures = IrregexpNumberOfCaptures(irregexp); |
| - |
| int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| #ifdef DEBUG |
| @@ -476,8 +518,11 @@ |
| FlattenString(subject); |
| } |
| + EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers); |
| + |
| return IrregexpExecOnce(irregexp, |
| - num_captures, |
| + number_of_capture_registers, |
| + lastMatchInfo, |
| subject, |
| previous_index, |
| offsets.vector(), |
| @@ -486,7 +531,8 @@ |
| Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> lastMatchInfo) { |
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| @@ -496,19 +542,22 @@ |
| } |
| // Prepare space for the return values. |
| - int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| - OffsetsVector offsets(number_of_registers); |
| + int number_of_capture_registers = |
| + (IrregexpNumberOfCaptures(irregexp) + 1) * 2; |
| + OffsetsVector offsets(number_of_capture_registers); |
| int previous_index = 0; |
| Handle<JSArray> result = Factory::NewJSArray(0); |
| - int i = 0; |
| + int result_length = 0; |
| Handle<Object> matches; |
| if (!subject->IsFlat(StringShape(*subject))) { |
| FlattenString(subject); |
| } |
| + EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers); |
| + |
| while (true) { |
| if (previous_index > subject->length() || previous_index < 0) { |
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| @@ -524,7 +573,8 @@ |
| } |
| #endif |
| matches = IrregexpExecOnce(irregexp, |
| - IrregexpNumberOfCaptures(irregexp), |
| + number_of_capture_registers, |
| + lastMatchInfo, |
| subject, |
| previous_index, |
| offsets.vector(), |
| @@ -536,12 +586,25 @@ |
| } |
| if (matches->IsJSArray()) { |
| - SetElement(result, i, matches); |
| - i++; |
| - previous_index = offsets.vector()[1]; |
| - if (offsets.vector()[0] == offsets.vector()[1]) { |
| + // Create an array that looks like the static lastMatchInfo array |
| + // that is attached to the global RegExp object. We will be returning |
| + // an array of these. |
| + Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); |
|
Christian Plesner Hansen
2009/02/26 15:18:53
Again with the unbounded number of handles.
|
| + Handle<JSArray> latest_match = |
| + Factory::NewJSArray(kFirstCapture + number_of_capture_registers); |
| + Handle<FixedArray> latest_match_array(latest_match->elements()); |
| + |
| + for (int i = 0; i < number_of_capture_registers; i++) { |
| + SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); |
| + } |
| + SetLastCaptureCount(*latest_match_array, number_of_capture_registers); |
| + |
| + SetElement(result, result_length, latest_match); |
| + result_length++; |
| + previous_index = GetCapture(*matches_array, 1); |
| + if (GetCapture(*matches_array, 0) == previous_index) |
| previous_index++; |
| - } |
| + |
| } else { |
| ASSERT(matches->IsNull()); |
| return result; |
| @@ -552,7 +615,8 @@ |
| Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
| - int num_captures, |
| + int number_of_capture_registers, |
| + Handle<JSArray> lastMatchInfo, |
| Handle<String> subject, |
| int previous_index, |
| int* offsets_vector, |
| @@ -642,7 +706,7 @@ |
| #endif |
| } |
| case RegExpMacroAssembler::kBytecodeImplementation: { |
| - for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
| + for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| offsets_vector[i] = -1; |
| } |
| Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
| @@ -664,13 +728,16 @@ |
| return Factory::null_value(); |
| } |
| - Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| + Handle<FixedArray> array(lastMatchInfo->elements()); |
| // The captures come in (start, end+1) pairs. |
| - for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
| - array->set(i, Smi::FromInt(offsets_vector[i])); |
| - array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
| + for (int i = 0; i < number_of_capture_registers; i += 2) { |
| + SetCapture(*array, i, offsets_vector[i]); |
| + SetCapture(*array, i + 1, offsets_vector[i + 1]); |
| } |
| - return Factory::NewJSArrayWithElements(array); |
| + SetLastCaptureCount(*array, number_of_capture_registers); |
| + SetLastSubject(*array, *subject); |
| + SetLastInput(*array, *subject); |
| + return lastMatchInfo; |
| } |
| @@ -3723,9 +3790,6 @@ |
| // | |
| // [if r >= f] \----> ... |
| // |
| - // |
| - // TODO(someone): clear captures on repetition and handle empty |
| - // matches. |
| // 15.10.2.5 RepeatMatcher algorithm. |
| // The parser has already eliminated the case where max is 0. In the case |