Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Unified Diff: src/jsregexp.cc

Issue 28184: Avoids allocating a JSArray of capture information on each non-global... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/macros.py » ('j') | src/macros.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 1374)
+++ src/jsregexp.cc (working copy)
@@ -254,14 +254,33 @@
}
+static void EnsureSize(Handle<JSArray> lastMatchInfo,
Christian Plesner Hansen 2009/02/26 15:18:53 You might consider making this a method on JSArray
+ int num_capture_registers) {
+ CHECK(lastMatchInfo->HasFastElements());
+ if (lastMatchInfo->elements()->length() >=
+ num_capture_registers + RegExpImpl::kLastMatchOverhead) return;
+ Handle<FixedArray> old_backing(lastMatchInfo->elements());
+ int old_size = old_backing->length();
+ int new_size = num_capture_registers + RegExpImpl::kLastMatchOverhead;
+ // Doubling in size would be overkill, but leave some slack to avoid
+ // constantly growing.
+ new_size += new_size >> 3;
+ Handle<FixedArray> new_backing = Factory::NewFixedArray(new_size);
+ for (int i = 0; i < old_size; i++) new_backing->set(i, old_backing->get(i));
+ lastMatchInfo->SetContent(*new_backing);
+}
+
+
Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
Handle<String> subject,
- Handle<Object> index) {
+ Smi* index,
Christian Plesner Hansen 2009/02/26 15:18:53 Consider converting the index to a C right away --
+ Handle<JSArray> lastMatchInfo) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
- return AtomExec(regexp, subject, index);
+ return AtomExec(regexp, subject, index, lastMatchInfo);
case JSRegExp::IRREGEXP: {
- Handle<Object> result = IrregexpExec(regexp, subject, index);
+ Handle<Object> result =
+ IrregexpExec(regexp, subject, index, lastMatchInfo);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
@@ -273,12 +292,14 @@
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> lastMatchInfo) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
- return AtomExecGlobal(regexp, subject);
+ return AtomExecGlobal(regexp, subject, lastMatchInfo);
case JSRegExp::IRREGEXP: {
- Handle<Object> result = IrregexpExecGlobal(regexp, subject);
+ Handle<Object> result =
+ IrregexpExecGlobal(regexp, subject, lastMatchInfo);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
@@ -303,49 +324,70 @@
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject,
- Handle<Object> index) {
+ Smi* index,
+ Handle<JSArray> lastMatchInfo) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
- uint32_t start_index;
- if (!Array::IndexFromObject(*index, &start_index)) {
- return Handle<Smi>(Smi::FromInt(-1));
- }
+ uint32_t start_index = index->value();
int value = Runtime::StringMatch(subject, needle, start_index);
if (value == -1) return Factory::null_value();
+ CHECK(lastMatchInfo->HasFastElements());
- Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0, Smi::FromInt(value));
- array->set(1, Smi::FromInt(value + needle->length()));
- return Factory::NewJSArrayWithElements(array);
+ Handle<FixedArray> array(lastMatchInfo->elements());
+ SetLastCaptureCount(*array, 2);
Christian Plesner Hansen 2009/02/26 15:18:53 Could this sequence of 'Set's be packed into a fun
+ SetLastSubject(*array, *subject);
+ SetLastInput(*array, *subject);
+ SetCapture(*array, 0, value);
+ SetCapture(*array, 1, value + needle->length());
+ return lastMatchInfo;
}
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> lastMatchInfo) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
+ CHECK(lastMatchInfo->HasFastElements());
Christian Plesner Hansen 2009/02/26 15:18:53 Don't you mean ASSERT?
Handle<JSArray> result = Factory::NewJSArray(1);
int index = 0;
int match_count = 0;
int subject_length = subject->length();
int needle_length = needle->length();
+ int last_value = -1;
while (true) {
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
}
- if (value == -1) break;
+ if (value == -1) {
+ if (last_value != -1) {
+ Handle<FixedArray> array(lastMatchInfo->elements());
Christian Plesner Hansen 2009/02/26 15:18:53 You're creating an unbounded number of handles her
+ SetLastCaptureCount(*array, 2);
+ SetLastSubject(*array, *subject);
+ SetLastInput(*array, *subject);
+ SetCapture(*array, 0, last_value);
+ SetCapture(*array, 1, last_value + needle->length());
+ }
+ break;
+ }
+
HandleScope scope;
int end = value + needle_length;
- Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0, Smi::FromInt(value));
- array->set(1, Smi::FromInt(end));
+ // Create an array that looks like the static lastMatchInfo array
+ // that is attached to the global RegExp object. We will be returning
+ // an array of these.
+ Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
+ SetCapture(*array, 0, value);
+ SetCapture(*array, 1, end);
+ SetLastCaptureCount(*array, 2);
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
index = end;
if (needle_length == 0) index++;
+ last_value = value;
}
return result;
}
@@ -445,7 +487,8 @@
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
- Handle<Object> index) {
+ Smi* index,
+ Handle<JSArray> lastMatchInfo) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
@@ -457,11 +500,10 @@
}
// Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(irregexp);
- OffsetsVector offsets(number_of_registers);
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(irregexp) + 1) * 2;
+ OffsetsVector offsets(number_of_capture_registers);
- int num_captures = IrregexpNumberOfCaptures(irregexp);
-
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
#ifdef DEBUG
@@ -476,8 +518,11 @@
FlattenString(subject);
}
+ EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers);
+
return IrregexpExecOnce(irregexp,
- num_captures,
+ number_of_capture_registers,
+ lastMatchInfo,
subject,
previous_index,
offsets.vector(),
@@ -486,7 +531,8 @@
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> lastMatchInfo) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
@@ -496,19 +542,22 @@
}
// Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(irregexp);
- OffsetsVector offsets(number_of_registers);
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(irregexp) + 1) * 2;
+ OffsetsVector offsets(number_of_capture_registers);
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
- int i = 0;
+ int result_length = 0;
Handle<Object> matches;
if (!subject->IsFlat(StringShape(*subject))) {
FlattenString(subject);
}
+ EnsureSize(lastMatchInfo, kFirstCapture + number_of_capture_registers);
+
while (true) {
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
@@ -524,7 +573,8 @@
}
#endif
matches = IrregexpExecOnce(irregexp,
- IrregexpNumberOfCaptures(irregexp),
+ number_of_capture_registers,
+ lastMatchInfo,
subject,
previous_index,
offsets.vector(),
@@ -536,12 +586,25 @@
}
if (matches->IsJSArray()) {
- SetElement(result, i, matches);
- i++;
- previous_index = offsets.vector()[1];
- if (offsets.vector()[0] == offsets.vector()[1]) {
+ // Create an array that looks like the static lastMatchInfo array
+ // that is attached to the global RegExp object. We will be returning
+ // an array of these.
+ Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements());
Christian Plesner Hansen 2009/02/26 15:18:53 Again with the unbounded number of handles.
+ Handle<JSArray> latest_match =
+ Factory::NewJSArray(kFirstCapture + number_of_capture_registers);
+ Handle<FixedArray> latest_match_array(latest_match->elements());
+
+ for (int i = 0; i < number_of_capture_registers; i++) {
+ SetCapture(*latest_match_array, i, GetCapture(*matches_array, i));
+ }
+ SetLastCaptureCount(*latest_match_array, number_of_capture_registers);
+
+ SetElement(result, result_length, latest_match);
+ result_length++;
+ previous_index = GetCapture(*matches_array, 1);
+ if (GetCapture(*matches_array, 0) == previous_index)
previous_index++;
- }
+
} else {
ASSERT(matches->IsNull());
return result;
@@ -552,7 +615,8 @@
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
- int num_captures,
+ int number_of_capture_registers,
+ Handle<JSArray> lastMatchInfo,
Handle<String> subject,
int previous_index,
int* offsets_vector,
@@ -642,7 +706,7 @@
#endif
}
case RegExpMacroAssembler::kBytecodeImplementation: {
- for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+ for (int i = number_of_capture_registers - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
@@ -664,13 +728,16 @@
return Factory::null_value();
}
- Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+ Handle<FixedArray> array(lastMatchInfo->elements());
// The captures come in (start, end+1) pairs.
- for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
- array->set(i, Smi::FromInt(offsets_vector[i]));
- array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
+ for (int i = 0; i < number_of_capture_registers; i += 2) {
+ SetCapture(*array, i, offsets_vector[i]);
+ SetCapture(*array, i + 1, offsets_vector[i + 1]);
}
- return Factory::NewJSArrayWithElements(array);
+ SetLastCaptureCount(*array, number_of_capture_registers);
+ SetLastSubject(*array, *subject);
+ SetLastInput(*array, *subject);
+ return lastMatchInfo;
}
@@ -3723,9 +3790,6 @@
// |
// [if r >= f] \----> ...
//
- //
- // TODO(someone): clear captures on repetition and handle empty
- // matches.
// 15.10.2.5 RepeatMatcher algorithm.
// The parser has already eliminated the case where max is 0. In the case
« no previous file with comments | « src/jsregexp.h ('k') | src/macros.py » ('j') | src/macros.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698