Index: src/jsregexp.cc |
diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
index 505cf03e5ee47ce33fee04095dc7815658423ad1..85a8a59920b63b7ff14148acbc2baa6607dffb27 100644 |
--- a/src/jsregexp.cc |
+++ b/src/jsregexp.cc |
@@ -149,7 +149,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
AtomCompile(re, pattern, flags, atom_string); |
} else { |
- IrregexpPrepare(re, pattern, flags, parse_result.capture_count); |
+ IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
} |
ASSERT(re->data()->IsFixedArray()); |
// Compilation succeeded so the data is set on the regexp |
@@ -341,7 +341,7 @@ Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
} |
-void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
+void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
Handle<String> pattern, |
Erik Corry
2010/03/19 11:04:11
indent
|
JSRegExp::Flags flags, |
int capture_count) { |
@@ -354,120 +354,149 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
} |
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
- Handle<String> subject, |
- int previous_index, |
- Handle<JSArray> last_match_info) { |
- ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
- |
- // Prepare space for the return values. |
- int number_of_capture_registers = |
- (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
- |
-#ifndef V8_NATIVE_REGEXP |
-#ifdef DEBUG |
- if (FLAG_trace_regexp_bytecodes) { |
- String* pattern = jsregexp->Pattern(); |
- PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
- PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
- } |
-#endif |
-#endif |
- |
+int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
+ Handle<String> subject) { |
if (!subject->IsFlat()) { |
FlattenString(subject); |
} |
+ bool is_ascii = subject->IsAsciiRepresentation(); |
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
+ return -1; |
+ } |
+#ifdef V8_NATIVE_REGEXP |
+ // Native regexp only needs room to output captures. Registers are handled |
+ // internally. |
+ return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
+#else // !V8_NATIVE_REGEXP |
+ // Byte-code regexp needs space allocated for all its registers. |
+ return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
+#endif // V8_NATIVE_REGEXP |
Erik Corry
2010/03/19 11:04:11
V8 -> !V8
Actually I prefer the comment // ndef
Lasse Reichstein
2010/03/19 11:25:42
Changed to ndef.
|
+} |
- last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
- |
- Handle<FixedArray> array; |
- // Dispatch to the correct RegExp implementation. |
- Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); |
+RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, |
+ Handle<String> subject, |
+ int index, |
+ Vector<int> output) { |
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); |
#ifdef V8_NATIVE_REGEXP |
+ ASSERT(output.length() >= |
Erik Corry
2010/03/19 11:04:11
please move this assert into the ifdef below
Lasse Reichstein
2010/03/19 11:25:42
Done.
|
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
+#else |
+ ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
+#endif |
+ ASSERT(index >= 0); |
+ ASSERT(index <= subject->length()); |
+ ASSERT(subject->IsFlat()); |
- OffsetsVector captures(number_of_capture_registers); |
- int* captures_vector = captures.vector(); |
- NativeRegExpMacroAssembler::Result res; |
+#ifdef V8_NATIVE_REGEXP |
do { |
bool is_ascii = subject->IsAsciiRepresentation(); |
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { |
- return Handle<Object>::null(); |
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); |
+ NativeRegExpMacroAssembler::Result res = |
+ NativeRegExpMacroAssembler::Match(code, |
+ subject, |
+ output.start(), |
+ output.length(), |
+ index); |
+ if (res != NativeRegExpMacroAssembler::RETRY) { |
+ ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
+ Top::has_pending_exception()); |
+ STATIC_ASSERT( |
+ static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
+ STATIC_ASSERT( |
+ static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
+ STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
+ == RE_EXCEPTION); |
+ return static_cast<IrregexpResult>(res); |
} |
- Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); |
- res = NativeRegExpMacroAssembler::Match(code, |
- subject, |
- captures_vector, |
- captures.length(), |
- previous_index); |
// If result is RETRY, the string have changed representation, and we |
Erik Corry
2010/03/19 11:04:11
have -> has
|
// must restart from scratch. |
- } while (res == NativeRegExpMacroAssembler::RETRY); |
- if (res == NativeRegExpMacroAssembler::EXCEPTION) { |
- ASSERT(Top::has_pending_exception()); |
- return Handle<Object>::null(); |
- } |
- ASSERT(res == NativeRegExpMacroAssembler::SUCCESS |
- || res == NativeRegExpMacroAssembler::FAILURE); |
- |
- if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); |
- |
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); |
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
- // The captures come in (start, end+1) pairs. |
- for (int i = 0; i < number_of_capture_registers; i += 2) { |
- // Capture values are relative to start_offset only. |
- // Convert them to be relative to start of string. |
- if (captures_vector[i] >= 0) { |
- captures_vector[i] += previous_index; |
- } |
- if (captures_vector[i + 1] >= 0) { |
- captures_vector[i + 1] += previous_index; |
- } |
- SetCapture(*array, i, captures_vector[i]); |
- SetCapture(*array, i + 1, captures_vector[i + 1]); |
- } |
- |
+ // In this case, it means we must make sure we are prepared to handle |
+ // the, potentially, differen subject (the string can switch between |
Erik Corry
2010/03/19 11:04:11
en -> ent
|
+ // being internal and external, and even between being ASCII and UC16, |
+ // but the characters are always the same). |
+ IrregexpPrepare(regexp, subject); |
+ } while (true); |
+ UNREACHABLE(); |
+ return RE_EXCEPTION; |
#else // ! V8_NATIVE_REGEXP |
bool is_ascii = subject->IsAsciiRepresentation(); |
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { |
- return Handle<Object>::null(); |
- } |
- // Now that we have done EnsureCompiledIrregexp we can get the number of |
+ // We must have done EnsureCompiledIrregexp, so we can get the number of |
// registers. |
- int number_of_registers = |
- IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data())); |
- OffsetsVector registers(number_of_registers); |
- int* register_vector = registers.vector(); |
+ int* register_vector = output.start(); |
+ int number_of_capture_registers = |
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
register_vector[i] = -1; |
} |
- Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); |
- if (!IrregexpInterpreter::Match(byte_codes, |
- subject, |
- register_vector, |
- previous_index)) { |
- return Factory::null_value(); |
+ if (IrregexpInterpreter::Match(byte_codes, |
+ subject, |
+ register_vector, |
+ index)) { |
+ return RE_SUCCESS; |
} |
+ return RE_FAILURE; |
+#endif // V8_NATIVE_REGEXP |
+} |
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); |
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
- // The captures come in (start, end+1) pairs. |
- for (int i = 0; i < number_of_capture_registers; i += 2) { |
- SetCapture(*array, i, register_vector[i]); |
- SetCapture(*array, i + 1, register_vector[i + 1]); |
- } |
-#endif // V8_NATIVE_REGEXP |
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
+ Handle<String> subject, |
+ int previous_index, |
+ Handle<JSArray> last_match_info) { |
+ ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
+ |
+ // Prepare space for the return values. |
+#ifndef V8_NATIVE_REGEXP |
+#ifdef DEBUG |
+ if (FLAG_trace_regexp_bytecodes) { |
+ String* pattern = jsregexp->Pattern(); |
+ PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
+ PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
+ } |
+#endif |
+#endif |
+ int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
+ if (required_registers < 0) { |
+ // Compiling failed with an exception. |
+ ASSERT(Top::has_pending_exception()); |
+ return Handle<Object>::null(); |
+ } |
- SetLastCaptureCount(*array, number_of_capture_registers); |
- SetLastSubject(*array, *subject); |
- SetLastInput(*array, *subject); |
+ OffsetsVector registers(required_registers); |
Erik Corry
2010/03/19 11:04:11
2 blank lines
Lasse Reichstein
2010/03/19 11:25:42
Ignored, per offline discussion.
|
- return last_match_info; |
+ IrregexpResult res = IrregexpExecOnce(jsregexp, |
+ subject, |
+ previous_index, |
+ Vector<int>(registers.vector(), |
+ registers.length())); |
+ if (res == RE_SUCCESS) { |
+ int capture_register_count = |
+ (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
+ last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
+ AssertNoAllocation no_gc; |
+ int* register_vector = registers.vector(); |
+ FixedArray* array = FixedArray::cast(last_match_info->elements()); |
+ for (int i = 0; i < capture_register_count; i += 2) { |
+ SetCapture(array, i, register_vector[i]); |
+ SetCapture(array, i + 1, register_vector[i + 1]); |
+ } |
+ SetLastCaptureCount(array, capture_register_count); |
+ SetLastSubject(array, *subject); |
+ SetLastInput(array, *subject); |
+ return last_match_info; |
+ } |
+ if (res == RE_EXCEPTION) { |
+ ASSERT(Top::has_pending_exception()); |
+ return Handle<Object>::null(); |
+ } |
+ ASSERT(res == RE_FAILURE); |
+ return Factory::null_value(); |
} |