Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 1374) |
+++ src/jsregexp.cc (working copy) |
@@ -254,14 +254,33 @@ |
} |
+static void EnsureSize(Handle<JSArray> lastMatchInfo, |
Christian Plesner Hansen
2009/02/26 15:18:53
You might consider making this a method on JSArray
|
+ int num_capture_registers) { |
+ CHECK(lastMatchInfo->HasFastElements()); |
+ if (lastMatchInfo->elements()->length() >= |
+ num_capture_registers + RegExpImpl::kLastMatchOverhead) return; |
+ Handle<FixedArray> old_backing(lastMatchInfo->elements()); |
+ int old_size = old_backing->length(); |
+ int new_size = num_capture_registers + RegExpImpl::kLastMatchOverhead; |
+ // Doubling in size would be overkill, but leave some slack to avoid |
+ // constantly growing. |
+ new_size += new_size >> 3; |
+ Handle<FixedArray> new_backing = Factory::NewFixedArray(new_size); |
+ for (int i = 0; i < old_size; i++) new_backing->set(i, old_backing->get(i)); |
+ lastMatchInfo->SetContent(*new_backing); |
+} |
+ |
+ |
Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
Handle<String> subject, |
- Handle<Object> index) { |
+ Smi* index, |
Christian Plesner Hansen
2009/02/26 15:18:53
Consider converting the index to a C right away --
|
+ Handle<JSArray> lastMatchInfo) { |
switch (regexp->TypeTag()) { |
case JSRegExp::ATOM: |
- return AtomExec(regexp, subject, index); |
+ return AtomExec(regexp, subject, index, lastMatchInfo); |
case JSRegExp::IRREGEXP: { |
- Handle<Object> result = IrregexpExec(regexp, subject, index); |
+ Handle<Object> result = |
+ IrregexpExec(regexp, subject, index, lastMatchInfo); |
ASSERT(!result.is_null() || Top::has_pending_exception()); |
return result; |
} |
@@ -273,12 +292,14 @@ |
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
- Handle<String> subject) { |
+ Handle<String> subject, |
+ Handle<JSArray> lastMatchInfo) { |
switch (regexp->TypeTag()) { |
case JSRegExp::ATOM: |
- return AtomExecGlobal(regexp, subject); |
+ return AtomExecGlobal(regexp, subject, lastMatchInfo); |
case JSRegExp::IRREGEXP: { |
- Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
+ Handle<Object> result = |
+ IrregexpExecGlobal(regexp, subject, lastMatchInfo); |
ASSERT(!result.is_null() || Top::has_pending_exception()); |
return result; |
} |
@@ -303,49 +324,70 @@ |
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
Handle<String> subject, |
- Handle<Object> index) { |
+ Smi* index, |
+ Handle<JSArray> lastMatchInfo) { |
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
- uint32_t start_index; |
- if (!Array::IndexFromObject(*index, &start_index)) { |
- return Handle<Smi>(Smi::FromInt(-1)); |
- } |
+ uint32_t start_index = index->value(); |
int value = Runtime::StringMatch(subject, needle, start_index); |
if (value == -1) return Factory::null_value(); |
+ CHECK(lastMatchInfo->HasFastElements()); |
- Handle<FixedArray> array = Factory::NewFixedArray(2); |
- array->set(0, Smi::FromInt(value)); |
- array->set(1, Smi::FromInt(value + needle->length())); |
- return Factory::NewJSArrayWithElements(array); |
+ Handle<FixedArray> array(lastMatchInfo->elements()); |
+ SetLastCaptureCount(*array, 2); |
Christian Plesner Hansen
2009/02/26 15:18:53
Could this sequence of 'Set's be packed into a fun
|
+ SetLastSubject(*array, *subject); |
+ SetLastInput(*array, *subject); |
+ SetCapture(*array, 0, value); |
+ SetCapture(*array, 1, value + needle->length()); |
+ return lastMatchInfo; |
} |
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
- Handle<String> subject) { |
+ Handle<String> subject, |
+ Handle<JSArray> lastMatchInfo) { |
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
+ CHECK(lastMatchInfo->HasFastElements()); |
Christian Plesner Hansen
2009/02/26 15:18:53
Don't you mean ASSERT?
|
Handle<JSArray> result = Factory::NewJSArray(1); |
int index = 0; |
int match_count = 0; |
int subject_length = subject->length(); |
int needle_length = needle->length(); |
+ int last_value = -1; |
while (true) { |
int value = -1; |
if (index + needle_length <= subject_length) { |
value = Runtime::StringMatch(subject, needle, index); |
} |
- if (value == -1) break; |
+ if (value == -1) { |
+ if (last_value != -1) { |
+ Handle<FixedArray> array(lastMatchInfo->elements()); |
Christian Plesner Hansen
2009/02/26 15:18:53
You're creating an unbounded number of handles her
|
+ SetLastCaptureCount(*array, 2); |
+ SetLastSubject(*array, *subject); |
+ SetLastInput(*array, *subject); |
+ SetCapture(*array, 0, last_value); |
+ SetCapture(*array, 1, last_value + needle->length()); |
+ } |
+ break; |
+ } |
+ |
HandleScope scope; |
int end = value + needle_length; |
- Handle<FixedArray> array = Factory::NewFixedArray(2); |
- array->set(0, Smi::FromInt(value)); |
- array->set(1, Smi::FromInt(end)); |
+ // Create an array that looks like the static lastMatchInfo array |
+ // that is attached to the global RegExp object. We will be returning |
+ // an array of these. |
+ Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); |
+ SetCapture(*array, 0, value); |
+ SetCapture(*array, 1, end); |
+ SetLastCaptureCount(*array, 2); |
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
SetElement(result, match_count, pair); |
match_count++; |
index = end; |
if (needle_length == 0) index++; |
+ last_value = value; |
} |
return result; |
} |
@@ -445,7 +487,8 @@ |
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
Handle<String> subject, |
- Handle<Object> index) { |
+ Smi* index, |
+ Handle<JSArray> lastMatchInfo) { |
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
@@ -457,11 +500,10 @@ |
} |
// Prepare space for the return values. |
- int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
- OffsetsVector offsets(number_of_registers); |
+ int number_of_capture_registers = |
+ (IrregexpNumberOfCaptures(irregexp) + 1) * 2; |
+ OffsetsVector offsets(number_of_capture_registers); |
- int num_captures = IrregexpNumberOfCaptures(irregexp); |
- |
int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
#ifdef DEBUG |
@@ -476,8 +518,11 @@ |
FlattenString(subject); |
} |
+ EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers); |
+ |
return IrregexpExecOnce(irregexp, |
- num_captures, |
+ number_of_capture_registers, |
+ lastMatchInfo, |
subject, |
previous_index, |
offsets.vector(), |
@@ -486,7 +531,8 @@ |
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
- Handle<String> subject) { |
+ Handle<String> subject, |
+ Handle<JSArray> lastMatchInfo) { |
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
@@ -496,19 +542,22 @@ |
} |
// Prepare space for the return values. |
- int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
- OffsetsVector offsets(number_of_registers); |
+ int number_of_capture_registers = |
+ (IrregexpNumberOfCaptures(irregexp) + 1) * 2; |
+ OffsetsVector offsets(number_of_capture_registers); |
int previous_index = 0; |
Handle<JSArray> result = Factory::NewJSArray(0); |
- int i = 0; |
+ int result_length = 0; |
Handle<Object> matches; |
if (!subject->IsFlat(StringShape(*subject))) { |
FlattenString(subject); |
} |
+ EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers); |
+ |
while (true) { |
if (previous_index > subject->length() || previous_index < 0) { |
// Per ECMA-262 15.10.6.2, if the previous index is greater than the |
@@ -524,7 +573,8 @@ |
} |
#endif |
matches = IrregexpExecOnce(irregexp, |
- IrregexpNumberOfCaptures(irregexp), |
+ number_of_capture_registers, |
+ lastMatchInfo, |
subject, |
previous_index, |
offsets.vector(), |
@@ -536,12 +586,25 @@ |
} |
if (matches->IsJSArray()) { |
- SetElement(result, i, matches); |
- i++; |
- previous_index = offsets.vector()[1]; |
- if (offsets.vector()[0] == offsets.vector()[1]) { |
+ // Create an array that looks like the static lastMatchInfo array |
+ // that is attached to the global RegExp object. We will be returning |
+ // an array of these. |
+ Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); |
Christian Plesner Hansen
2009/02/26 15:18:53
Again with the unbounded number of handles.
|
+ Handle<JSArray> latest_match = |
+ Factory::NewJSArray(kFirstCapture + number_of_capture_registers); |
+ Handle<FixedArray> latest_match_array(latest_match->elements()); |
+ |
+ for (int i = 0; i < number_of_capture_registers; i++) { |
+ SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); |
+ } |
+ SetLastCaptureCount(*latest_match_array, number_of_capture_registers); |
+ |
+ SetElement(result, result_length, latest_match); |
+ result_length++; |
+ previous_index = GetCapture(*matches_array, 1); |
+ if (GetCapture(*matches_array, 0) == previous_index) |
previous_index++; |
- } |
+ |
} else { |
ASSERT(matches->IsNull()); |
return result; |
@@ -552,7 +615,8 @@ |
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
- int num_captures, |
+ int number_of_capture_registers, |
+ Handle<JSArray> lastMatchInfo, |
Handle<String> subject, |
int previous_index, |
int* offsets_vector, |
@@ -642,7 +706,7 @@ |
#endif |
} |
case RegExpMacroAssembler::kBytecodeImplementation: { |
- for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
+ for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
offsets_vector[i] = -1; |
} |
Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
@@ -664,13 +728,16 @@ |
return Factory::null_value(); |
} |
- Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
+ Handle<FixedArray> array(lastMatchInfo->elements()); |
// The captures come in (start, end+1) pairs. |
- for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
- array->set(i, Smi::FromInt(offsets_vector[i])); |
- array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
+ for (int i = 0; i < number_of_capture_registers; i += 2) { |
+ SetCapture(*array, i, offsets_vector[i]); |
+ SetCapture(*array, i + 1, offsets_vector[i + 1]); |
} |
- return Factory::NewJSArrayWithElements(array); |
+ SetLastCaptureCount(*array, number_of_capture_registers); |
+ SetLastSubject(*array, *subject); |
+ SetLastInput(*array, *subject); |
+ return lastMatchInfo; |
} |
@@ -3723,9 +3790,6 @@ |
// | |
// [if r >= f] \----> ... |
// |
- // |
- // TODO(someone): clear captures on repetition and handle empty |
- // matches. |
// 15.10.2.5 RepeatMatcher algorithm. |
// The parser has already eliminated the case where max is 0. In the case |