Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Unified Diff: src/jsregexp.cc

Issue 1114001: Refactoring of RegExp interface to better support calling several times in a row. (Closed)
Patch Set: Fix type that snuck into the commit. Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/jsregexp.cc
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 505cf03e5ee47ce33fee04095dc7815658423ad1..85a8a59920b63b7ff14148acbc2baa6607dffb27 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -149,7 +149,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
AtomCompile(re, pattern, flags, atom_string);
} else {
- IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
+ IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
}
ASSERT(re->data()->IsFixedArray());
// Compilation succeeded so the data is set on the regexp
@@ -341,7 +341,7 @@ Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
}
-void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
Handle<String> pattern,
Erik Corry 2010/03/19 11:04:11 indent
JSRegExp::Flags flags,
int capture_count) {
@@ -354,120 +354,149 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
}
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
- Handle<String> subject,
- int previous_index,
- Handle<JSArray> last_match_info) {
- ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
-
- // Prepare space for the return values.
- int number_of_capture_registers =
- (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
-
-#ifndef V8_NATIVE_REGEXP
-#ifdef DEBUG
- if (FLAG_trace_regexp_bytecodes) {
- String* pattern = jsregexp->Pattern();
- PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
- PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
- }
-#endif
-#endif
-
+int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
if (!subject->IsFlat()) {
FlattenString(subject);
}
+ bool is_ascii = subject->IsAsciiRepresentation();
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
+ return -1;
+ }
+#ifdef V8_NATIVE_REGEXP
+ // Native regexp only needs room to output captures. Registers are handled
+ // internally.
+ return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
+#else // !V8_NATIVE_REGEXP
+ // Byte-code regexp needs space allocated for all its registers.
+ return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
+#endif // V8_NATIVE_REGEXP
Erik Corry 2010/03/19 11:04:11 V8 -> !V8 Actually I prefer the comment // ndef
Lasse Reichstein 2010/03/19 11:25:42 Changed to ndef.
+}
- last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
-
- Handle<FixedArray> array;
- // Dispatch to the correct RegExp implementation.
- Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
+RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ Vector<int> output) {
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
#ifdef V8_NATIVE_REGEXP
+ ASSERT(output.length() >=
Erik Corry 2010/03/19 11:04:11 please move this assert into the ifdef below
Lasse Reichstein 2010/03/19 11:25:42 Done.
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
+#else
+ ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
+#endif
+ ASSERT(index >= 0);
+ ASSERT(index <= subject->length());
+ ASSERT(subject->IsFlat());
- OffsetsVector captures(number_of_capture_registers);
- int* captures_vector = captures.vector();
- NativeRegExpMacroAssembler::Result res;
+#ifdef V8_NATIVE_REGEXP
do {
bool is_ascii = subject->IsAsciiRepresentation();
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
- return Handle<Object>::null();
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));
+ NativeRegExpMacroAssembler::Result res =
+ NativeRegExpMacroAssembler::Match(code,
+ subject,
+ output.start(),
+ output.length(),
+ index);
+ if (res != NativeRegExpMacroAssembler::RETRY) {
+ ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
+ Top::has_pending_exception());
+ STATIC_ASSERT(
+ static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
+ STATIC_ASSERT(
+ static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
+ STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
+ == RE_EXCEPTION);
+ return static_cast<IrregexpResult>(res);
}
- Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
- res = NativeRegExpMacroAssembler::Match(code,
- subject,
- captures_vector,
- captures.length(),
- previous_index);
// If result is RETRY, the string have changed representation, and we
Erik Corry 2010/03/19 11:04:11 have -> has
// must restart from scratch.
- } while (res == NativeRegExpMacroAssembler::RETRY);
- if (res == NativeRegExpMacroAssembler::EXCEPTION) {
- ASSERT(Top::has_pending_exception());
- return Handle<Object>::null();
- }
- ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
- || res == NativeRegExpMacroAssembler::FAILURE);
-
- if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value();
-
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
- // The captures come in (start, end+1) pairs.
- for (int i = 0; i < number_of_capture_registers; i += 2) {
- // Capture values are relative to start_offset only.
- // Convert them to be relative to start of string.
- if (captures_vector[i] >= 0) {
- captures_vector[i] += previous_index;
- }
- if (captures_vector[i + 1] >= 0) {
- captures_vector[i + 1] += previous_index;
- }
- SetCapture(*array, i, captures_vector[i]);
- SetCapture(*array, i + 1, captures_vector[i + 1]);
- }
-
+ // In this case, it means we must make sure we are prepared to handle
+ // the, potentially, differen subject (the string can switch between
Erik Corry 2010/03/19 11:04:11 en -> ent
+ // being internal and external, and even between being ASCII and UC16,
+ // but the characters are always the same).
+ IrregexpPrepare(regexp, subject);
+ } while (true);
+ UNREACHABLE();
+ return RE_EXCEPTION;
#else // ! V8_NATIVE_REGEXP
bool is_ascii = subject->IsAsciiRepresentation();
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
- return Handle<Object>::null();
- }
- // Now that we have done EnsureCompiledIrregexp we can get the number of
+ // We must have done EnsureCompiledIrregexp, so we can get the number of
// registers.
- int number_of_registers =
- IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
- OffsetsVector registers(number_of_registers);
- int* register_vector = registers.vector();
+ int* register_vector = output.start();
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
register_vector[i] = -1;
}
- Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));
- if (!IrregexpInterpreter::Match(byte_codes,
- subject,
- register_vector,
- previous_index)) {
- return Factory::null_value();
+ if (IrregexpInterpreter::Match(byte_codes,
+ subject,
+ register_vector,
+ index)) {
+ return RE_SUCCESS;
}
+ return RE_FAILURE;
+#endif // V8_NATIVE_REGEXP
+}
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
- // The captures come in (start, end+1) pairs.
- for (int i = 0; i < number_of_capture_registers; i += 2) {
- SetCapture(*array, i, register_vector[i]);
- SetCapture(*array, i + 1, register_vector[i + 1]);
- }
-#endif // V8_NATIVE_REGEXP
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
+ Handle<String> subject,
+ int previous_index,
+ Handle<JSArray> last_match_info) {
+ ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
+
+ // Prepare space for the return values.
+#ifndef V8_NATIVE_REGEXP
+#ifdef DEBUG
+ if (FLAG_trace_regexp_bytecodes) {
+ String* pattern = jsregexp->Pattern();
+ PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
+ PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+ }
+#endif
+#endif
+ int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
+ if (required_registers < 0) {
+ // Compiling failed with an exception.
+ ASSERT(Top::has_pending_exception());
+ return Handle<Object>::null();
+ }
- SetLastCaptureCount(*array, number_of_capture_registers);
- SetLastSubject(*array, *subject);
- SetLastInput(*array, *subject);
+ OffsetsVector registers(required_registers);
Erik Corry 2010/03/19 11:04:11 2 blank lines
Lasse Reichstein 2010/03/19 11:25:42 Ignored, per offline discussion.
- return last_match_info;
+ IrregexpResult res = IrregexpExecOnce(jsregexp,
+ subject,
+ previous_index,
+ Vector<int>(registers.vector(),
+ registers.length()));
+ if (res == RE_SUCCESS) {
+ int capture_register_count =
+ (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
+ last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
+ AssertNoAllocation no_gc;
+ int* register_vector = registers.vector();
+ FixedArray* array = FixedArray::cast(last_match_info->elements());
+ for (int i = 0; i < capture_register_count; i += 2) {
+ SetCapture(array, i, register_vector[i]);
+ SetCapture(array, i + 1, register_vector[i + 1]);
+ }
+ SetLastCaptureCount(array, capture_register_count);
+ SetLastSubject(array, *subject);
+ SetLastInput(array, *subject);
+ return last_match_info;
+ }
+ if (res == RE_EXCEPTION) {
+ ASSERT(Top::has_pending_exception());
+ return Handle<Object>::null();
+ }
+ ASSERT(res == RE_FAILURE);
+ return Factory::null_value();
}

Powered by Google App Engine
This is Rietveld 408576698