Chromium Code Reviews| Index: src/jsregexp.cc |
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
| index 505cf03e5ee47ce33fee04095dc7815658423ad1..85a8a59920b63b7ff14148acbc2baa6607dffb27 100644 |
| --- a/src/jsregexp.cc |
| +++ b/src/jsregexp.cc |
| @@ -149,7 +149,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| AtomCompile(re, pattern, flags, atom_string); |
| } else { |
| - IrregexpPrepare(re, pattern, flags, parse_result.capture_count); |
| + IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
| } |
| ASSERT(re->data()->IsFixedArray()); |
| // Compilation succeeded so the data is set on the regexp |
| @@ -341,7 +341,7 @@ Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
| } |
| -void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| +void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
| Handle<String> pattern, |
|
Erik Corry
2010/03/19 11:04:11
indent
|
| JSRegExp::Flags flags, |
| int capture_count) { |
| @@ -354,120 +354,149 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| } |
| -Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| - Handle<String> subject, |
| - int previous_index, |
| - Handle<JSArray> last_match_info) { |
| - ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| - |
| - // Prepare space for the return values. |
| - int number_of_capture_registers = |
| - (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| - |
| -#ifndef V8_NATIVE_REGEXP |
| -#ifdef DEBUG |
| - if (FLAG_trace_regexp_bytecodes) { |
| - String* pattern = jsregexp->Pattern(); |
| - PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| - PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| - } |
| -#endif |
| -#endif |
| - |
| +int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| + Handle<String> subject) { |
| if (!subject->IsFlat()) { |
| FlattenString(subject); |
| } |
| + bool is_ascii = subject->IsAsciiRepresentation(); |
| + if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| + return -1; |
| + } |
| +#ifdef V8_NATIVE_REGEXP |
| + // Native regexp only needs room to output captures. Registers are handled |
| + // internally. |
| + return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| +#else // !V8_NATIVE_REGEXP |
| + // Byte-code regexp needs space allocated for all its registers. |
| + return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| +#endif // V8_NATIVE_REGEXP |
|
Erik Corry
2010/03/19 11:04:11
V8 -> !V8
Actually I prefer the comment // ndef
Lasse Reichstein
2010/03/19 11:25:42
Changed to ndef.
|
| +} |
| - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
| - |
| - Handle<FixedArray> array; |
| - // Dispatch to the correct RegExp implementation. |
| - Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); |
| +RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, |
| + Handle<String> subject, |
| + int index, |
| + Vector<int> output) { |
| + Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); |
| #ifdef V8_NATIVE_REGEXP |
| + ASSERT(output.length() >= |
|
Erik Corry
2010/03/19 11:04:11
please move this assert into the ifdef below
Lasse Reichstein
2010/03/19 11:25:42
Done.
|
| + (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| +#else |
| + ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| +#endif |
| + ASSERT(index >= 0); |
| + ASSERT(index <= subject->length()); |
| + ASSERT(subject->IsFlat()); |
| - OffsetsVector captures(number_of_capture_registers); |
| - int* captures_vector = captures.vector(); |
| - NativeRegExpMacroAssembler::Result res; |
| +#ifdef V8_NATIVE_REGEXP |
| do { |
| bool is_ascii = subject->IsAsciiRepresentation(); |
| - if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { |
| - return Handle<Object>::null(); |
| + Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); |
| + NativeRegExpMacroAssembler::Result res = |
| + NativeRegExpMacroAssembler::Match(code, |
| + subject, |
| + output.start(), |
| + output.length(), |
| + index); |
| + if (res != NativeRegExpMacroAssembler::RETRY) { |
| + ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| + Top::has_pending_exception()); |
| + STATIC_ASSERT( |
| + static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| + STATIC_ASSERT( |
| + static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| + STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| + == RE_EXCEPTION); |
| + return static_cast<IrregexpResult>(res); |
| } |
| - Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); |
| - res = NativeRegExpMacroAssembler::Match(code, |
| - subject, |
| - captures_vector, |
| - captures.length(), |
| - previous_index); |
| // If result is RETRY, the string have changed representation, and we |
|
Erik Corry
2010/03/19 11:04:11
have -> has
|
| // must restart from scratch. |
| - } while (res == NativeRegExpMacroAssembler::RETRY); |
| - if (res == NativeRegExpMacroAssembler::EXCEPTION) { |
| - ASSERT(Top::has_pending_exception()); |
| - return Handle<Object>::null(); |
| - } |
| - ASSERT(res == NativeRegExpMacroAssembler::SUCCESS |
| - || res == NativeRegExpMacroAssembler::FAILURE); |
| - |
| - if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); |
| - |
| - array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); |
| - ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
| - // The captures come in (start, end+1) pairs. |
| - for (int i = 0; i < number_of_capture_registers; i += 2) { |
| - // Capture values are relative to start_offset only. |
| - // Convert them to be relative to start of string. |
| - if (captures_vector[i] >= 0) { |
| - captures_vector[i] += previous_index; |
| - } |
| - if (captures_vector[i + 1] >= 0) { |
| - captures_vector[i + 1] += previous_index; |
| - } |
| - SetCapture(*array, i, captures_vector[i]); |
| - SetCapture(*array, i + 1, captures_vector[i + 1]); |
| - } |
| - |
| + // In this case, it means we must make sure we are prepared to handle |
| + // the, potentially, differen subject (the string can switch between |
|
Erik Corry
2010/03/19 11:04:11
en -> ent
|
| + // being internal and external, and even between being ASCII and UC16, |
| + // but the characters are always the same). |
| + IrregexpPrepare(regexp, subject); |
| + } while (true); |
| + UNREACHABLE(); |
| + return RE_EXCEPTION; |
| #else // ! V8_NATIVE_REGEXP |
| bool is_ascii = subject->IsAsciiRepresentation(); |
| - if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { |
| - return Handle<Object>::null(); |
| - } |
| - // Now that we have done EnsureCompiledIrregexp we can get the number of |
| + // We must have done EnsureCompiledIrregexp, so we can get the number of |
| // registers. |
| - int number_of_registers = |
| - IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data())); |
| - OffsetsVector registers(number_of_registers); |
| - int* register_vector = registers.vector(); |
| + int* register_vector = output.start(); |
| + int number_of_capture_registers = |
| + (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| register_vector[i] = -1; |
| } |
| - Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
| + Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); |
| - if (!IrregexpInterpreter::Match(byte_codes, |
| - subject, |
| - register_vector, |
| - previous_index)) { |
| - return Factory::null_value(); |
| + if (IrregexpInterpreter::Match(byte_codes, |
| + subject, |
| + register_vector, |
| + index)) { |
| + return RE_SUCCESS; |
| } |
| + return RE_FAILURE; |
| +#endif // V8_NATIVE_REGEXP |
| +} |
| - array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); |
| - ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
| - // The captures come in (start, end+1) pairs. |
| - for (int i = 0; i < number_of_capture_registers; i += 2) { |
| - SetCapture(*array, i, register_vector[i]); |
| - SetCapture(*array, i + 1, register_vector[i + 1]); |
| - } |
| -#endif // V8_NATIVE_REGEXP |
| +Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| + Handle<String> subject, |
| + int previous_index, |
| + Handle<JSArray> last_match_info) { |
| + ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| + |
| + // Prepare space for the return values. |
| +#ifndef V8_NATIVE_REGEXP |
| +#ifdef DEBUG |
| + if (FLAG_trace_regexp_bytecodes) { |
| + String* pattern = jsregexp->Pattern(); |
| + PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| + PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| + } |
| +#endif |
| +#endif |
| + int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| + if (required_registers < 0) { |
| + // Compiling failed with an exception. |
| + ASSERT(Top::has_pending_exception()); |
| + return Handle<Object>::null(); |
| + } |
| - SetLastCaptureCount(*array, number_of_capture_registers); |
| - SetLastSubject(*array, *subject); |
| - SetLastInput(*array, *subject); |
| + OffsetsVector registers(required_registers); |
|
Erik Corry
2010/03/19 11:04:11
2 blank lines
Lasse Reichstein
2010/03/19 11:25:42
Ignored, per offline discussion.
|
| - return last_match_info; |
| + IrregexpResult res = IrregexpExecOnce(jsregexp, |
| + subject, |
| + previous_index, |
| + Vector<int>(registers.vector(), |
| + registers.length())); |
| + if (res == RE_SUCCESS) { |
| + int capture_register_count = |
| + (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| + last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| + AssertNoAllocation no_gc; |
| + int* register_vector = registers.vector(); |
| + FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| + for (int i = 0; i < capture_register_count; i += 2) { |
| + SetCapture(array, i, register_vector[i]); |
| + SetCapture(array, i + 1, register_vector[i + 1]); |
| + } |
| + SetLastCaptureCount(array, capture_register_count); |
| + SetLastSubject(array, *subject); |
| + SetLastInput(array, *subject); |
| + return last_match_info; |
| + } |
| + if (res == RE_EXCEPTION) { |
| + ASSERT(Top::has_pending_exception()); |
| + return Handle<Object>::null(); |
| + } |
| + ASSERT(res == RE_FAILURE); |
| + return Factory::null_value(); |
| } |