Chromium Code Reviews| Index: src/jsregexp.cc |
| =================================================================== |
| --- src/jsregexp.cc (revision 1489) |
| +++ src/jsregexp.cc (working copy) |
| @@ -213,55 +213,54 @@ |
| Handle<Object> result; |
| if (in_cache) { |
| re->set_data(*cached); |
| - result = re; |
| + return re; |
| + } |
| + FlattenString(pattern); |
| + ZoneScope zone_scope(DELETE_ON_EXIT); |
| + RegExpCompileData parse_result; |
| + FlatStringReader reader(pattern); |
| + if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| + // Throw an exception if we fail to parse the pattern. |
| + ThrowRegExpException(re, |
| + pattern, |
| + parse_result.error, |
| + "malformed_regexp"); |
| + return Handle<Object>::null(); |
| + } |
| + |
| + if (parse_result.simple && !flags.is_ignore_case()) { |
| + // Parse-tree is a single atom that is equal to the pattern. |
| + AtomCompile(re, pattern, flags, pattern); |
| + } else if (parse_result.tree->IsAtom() && |
| + !flags.is_ignore_case() && |
| + parse_result.capture_count == 0) { |
| + RegExpAtom* atom = parse_result.tree->AsAtom(); |
| + Vector<const uc16> atom_pattern = atom->data(); |
| + Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| + AtomCompile(re, pattern, flags, atom_string); |
| } else { |
| - FlattenString(pattern); |
| - ZoneScope zone_scope(DELETE_ON_EXIT); |
| - RegExpCompileData parse_result; |
| - FlatStringReader reader(pattern); |
| - if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| - // Throw an exception if we fail to parse the pattern. |
| - ThrowRegExpException(re, |
| - pattern, |
| - parse_result.error, |
| - "malformed_regexp"); |
| - return Handle<Object>::null(); |
| - } |
| - |
| - if (parse_result.simple && !flags.is_ignore_case()) { |
| - // Parse-tree is a single atom that is equal to the pattern. |
| - result = AtomCompile(re, pattern, flags, pattern); |
| - } else if (parse_result.tree->IsAtom() && |
| - !flags.is_ignore_case() && |
| - parse_result.capture_count == 0) { |
| - RegExpAtom* atom = parse_result.tree->AsAtom(); |
| - Vector<const uc16> atom_pattern = atom->data(); |
| - Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| - result = AtomCompile(re, pattern, flags, atom_string); |
| - } else { |
| - result = IrregexpPrepare(re, pattern, flags); |
| - } |
| - Object* data = re->data(); |
| - if (data->IsFixedArray()) { |
| - // If compilation succeeded then the data is set on the regexp |
| - // and we can store it in the cache. |
| - Handle<FixedArray> data(FixedArray::cast(re->data())); |
| - CompilationCache::PutRegExp(pattern, flags, data); |
| - } |
| + IrregexpPrepare(re, pattern, flags, parse_result.capture_count); |
| } |
| + ASSERT(re->data()->IsFixedArray()); |
| + // Compilation succeeded so the data is set on the regexp |
| + // and we can store it in the cache. |
| + Handle<FixedArray> data(FixedArray::cast(re->data())); |
| + CompilationCache::PutRegExp(pattern, flags, data); |
| - return result; |
| + return re; |
| } |
| Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + int index, |
| + Handle<JSArray> last_match_info) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::ATOM: |
| - return AtomExec(regexp, subject, index); |
| + return AtomExec(regexp, subject, index, last_match_info); |
| case JSRegExp::IRREGEXP: { |
| - Handle<Object> result = IrregexpExec(regexp, subject, index); |
| + Handle<Object> result = |
| + IrregexpExec(regexp, subject, index, last_match_info); |
| ASSERT(!result.is_null() || Top::has_pending_exception()); |
| return result; |
| } |
| @@ -273,12 +272,14 @@ |
| Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> last_match_info) { |
| switch (regexp->TypeTag()) { |
| case JSRegExp::ATOM: |
| - return AtomExecGlobal(regexp, subject); |
| + return AtomExecGlobal(regexp, subject, last_match_info); |
| case JSRegExp::IRREGEXP: { |
| - Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
| + Handle<Object> result = |
| + IrregexpExecGlobal(regexp, subject, last_match_info); |
| ASSERT(!result.is_null() || Top::has_pending_exception()); |
| return result; |
| } |
| @@ -292,60 +293,95 @@ |
| // RegExp Atom implementation: Simple string search using indexOf. |
| -Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| - Handle<String> pattern, |
| - JSRegExp::Flags flags, |
| - Handle<String> match_pattern) { |
| - Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); |
| - return re; |
| +void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| + Handle<String> pattern, |
| + JSRegExp::Flags flags, |
| + Handle<String> match_pattern) { |
| + Factory::SetRegExpAtomData(re, |
| + JSRegExp::ATOM, |
| + pattern, |
| + flags, |
| + match_pattern); |
| } |
| +static void SetAtomLastCapture(FixedArray* array, |
| + String* subject, |
| + int from, |
| + int to) { |
| + NoHandleAllocation no_handles; |
| + RegExpImpl::SetLastCaptureCount(array, 2); |
| + RegExpImpl::SetLastSubject(array, subject); |
| + RegExpImpl::SetLastInput(array, subject); |
| + RegExpImpl::SetCapture(array, 0, from); |
| + RegExpImpl::SetCapture(array, 1, to); |
| +} |
| + |
| + |
| Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + int index, |
| + Handle<JSArray> last_match_info) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| - uint32_t start_index; |
| - if (!Array::IndexFromObject(*index, &start_index)) { |
| - return Handle<Smi>(Smi::FromInt(-1)); |
| - } |
| + uint32_t start_index = index; |
| int value = Runtime::StringMatch(subject, needle, start_index); |
| if (value == -1) return Factory::null_value(); |
| + ASSERT(last_match_info->HasFastElements()); |
| - Handle<FixedArray> array = Factory::NewFixedArray(2); |
| - array->set(0, Smi::FromInt(value)); |
| - array->set(1, Smi::FromInt(value + needle->length())); |
| - return Factory::NewJSArrayWithElements(array); |
| + { |
| + NoHandleAllocation no_handles; |
| + FixedArray* array = last_match_info->elements(); |
| + SetAtomLastCapture(array, *subject, value, value + needle->length()); |
| + } |
| + return last_match_info; |
| } |
| Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> last_match_info) { |
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); |
| + ASSERT(last_match_info->HasFastElements()); |
| Handle<JSArray> result = Factory::NewJSArray(1); |
| int index = 0; |
| int match_count = 0; |
| int subject_length = subject->length(); |
| int needle_length = needle->length(); |
| + int last_value = -1; |
| while (true) { |
| + HandleScope scope; |
| int value = -1; |
| if (index + needle_length <= subject_length) { |
| value = Runtime::StringMatch(subject, needle, index); |
| } |
| - if (value == -1) break; |
| - HandleScope scope; |
| + if (value == -1) { |
| + if (last_value != -1) { |
| + Handle<FixedArray> array(last_match_info->elements()); |
| + SetAtomLastCapture(*array, |
| + *subject, |
| + last_value, |
| + last_value + needle->length()); |
| + } |
| + break; |
| + } |
| + |
| int end = value + needle_length; |
| - Handle<FixedArray> array = Factory::NewFixedArray(2); |
| - array->set(0, Smi::FromInt(value)); |
| - array->set(1, Smi::FromInt(end)); |
| + // Create an array that looks like the static last_match_info array |
| + // that is attached to the global RegExp object. We will be returning |
| + // an array of these. |
| + Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); |
| + SetCapture(*array, 0, value); |
| + SetCapture(*array, 1, end); |
| + SetLastCaptureCount(*array, 2); |
| Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| SetElement(result, match_count, pair); |
| match_count++; |
| index = end; |
| if (needle_length == 0) index++; |
| + last_value = value; |
| } |
| return result; |
| } |
| @@ -354,24 +390,30 @@ |
| // Irregexp implementation. |
| -// Retrieves a compiled version of the regexp for either ASCII or non-ASCII |
| -// strings. If the compiled version doesn't already exist, it is compiled |
| +// Ensures that the regexp object contains a compiled version of the |
| +// source for either ASCII or non-ASCII strings. |
| +// If the compiled version doesn't already exist, it is compiled |
| // from the source pattern. |
| -// Irregexp is not feature complete yet. If there is something in the |
| -// regexp that the compiler cannot currently handle, an empty |
| -// handle is returned, but no exception is thrown. |
| -static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
| - bool is_ascii) { |
| - ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| - Handle<FixedArray> alternatives( |
| - FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); |
| - ASSERT_EQ(2, alternatives->length()); |
| - |
| - int index = is_ascii ? 0 : 1; |
| - Object* entry = alternatives->get(index); |
| - if (!entry->IsNull()) { |
| - return Handle<FixedArray>(FixedArray::cast(entry)); |
| +// If compilation fails, an exception is thrown and this function |
| +// returns false. |
| +bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, |
| + bool is_ascii) { |
| + int index; |
| + if (is_ascii) { |
| + index = JSRegExp::kIrregexpASCIICodeIndex; |
| + } else { |
| + index = JSRegExp::kIrregexpUC16CodeIndex; |
| } |
| + Object* entry = re->DataAt(index); |
| + if (!entry->IsTheHole()) { |
| + // A value has already been compiled. |
| + if (entry->IsJSObject()) { |
| + // If it's a JS value, it's an error. |
| + Top::Throw(entry); |
| + return false; |
| + } |
| + return true; |
| + } |
| // Compile the RegExp. |
| ZoneScope zone_scope(DELETE_ON_EXIT); |
| @@ -392,78 +434,116 @@ |
| pattern, |
| compile_data.error, |
| "malformed_regexp"); |
| - return Handle<FixedArray>::null(); |
| + return false; |
| } |
| - Handle<FixedArray> compiled_entry = |
| + RegExpEngine::CompilationResult result = |
| RegExpEngine::Compile(&compile_data, |
| flags.is_ignore_case(), |
| flags.is_multiline(), |
| pattern, |
| is_ascii); |
| - if (!compiled_entry.is_null()) { |
| - alternatives->set(index, *compiled_entry); |
| + if (result.error_message != NULL) { |
| + // Unable to compile regexp. |
| + Handle<JSArray> array = Factory::NewJSArray(2); |
| + SetElement(array, 0, pattern); |
| + SetElement(array, |
| + 1, |
| + Factory::NewStringFromUtf8(CStrVector(result.error_message))); |
| + Handle<Object> regexp_err = |
| + Factory::NewSyntaxError("malformed_regexp", array); |
| + Top::Throw(*regexp_err); |
| + re->SetDataAt(index, *regexp_err); |
| + return false; |
| } |
| - return compiled_entry; |
| + |
| + NoHandleAllocation no_handles; |
| + |
| + FixedArray* data = FixedArray::cast(re->data()); |
| + data->set(index, result.code); |
| + int register_max = IrregexpMaxRegisterCount(data); |
| + if (result.num_registers > register_max) { |
| + SetIrregexpMaxRegisterCount(data, result.num_registers); |
| + } |
| + |
| + return true; |
| } |
| -int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { |
| - return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); |
| +int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { |
| + return Smi::cast( |
| + re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); |
| } |
| -int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { |
| - return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); |
| +void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { |
| + re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); |
| } |
| -Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { |
| - ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| - == RegExpMacroAssembler::kBytecodeImplementation); |
| - return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); |
| +int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { |
| + return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); |
| } |
| -Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { |
| - ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() |
| - != RegExpMacroAssembler::kBytecodeImplementation); |
| - return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); |
| +int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { |
| + return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); |
| } |
| -Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| - Handle<String> pattern, |
| - JSRegExp::Flags flags) { |
| - // Make space for ASCII and UC16 versions. |
| - Handle<FixedArray> alternatives = Factory::NewFixedArray(2); |
| - alternatives->set_null(0); |
| - alternatives->set_null(1); |
| - Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); |
| - return re; |
| +ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
| + int index; |
| + if (is_ascii) { |
| + index = JSRegExp::kIrregexpASCIICodeIndex; |
| + } else { |
| + index = JSRegExp::kIrregexpUC16CodeIndex; |
| + } |
| + return ByteArray::cast(re->get(index)); |
| } |
| +Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
| + int index; |
| + if (is_ascii) { |
| + index = JSRegExp::kIrregexpASCIICodeIndex; |
| + } else { |
| + index = JSRegExp::kIrregexpUC16CodeIndex; |
| + } |
| + return Code::cast(re->get(index)); |
| +} |
| + |
| + |
| +void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| + Handle<String> pattern, |
| + JSRegExp::Flags flags, |
| + int capture_count) { |
| + // Initialize compiled code entries to null. |
| + Factory::SetRegExpIrregexpData(re, |
| + JSRegExp::IRREGEXP, |
| + pattern, |
| + flags, |
| + capture_count); |
| +} |
| + |
| + |
| Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| Handle<String> subject, |
| - Handle<Object> index) { |
| + int index, |
| + Handle<JSArray> last_match_info) { |
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| - ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| - Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| - if (irregexp.is_null()) { |
| - // We can't handle the RegExp with IRRegExp. |
| + if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| return Handle<Object>::null(); |
| } |
| // Prepare space for the return values. |
| - int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| - OffsetsVector offsets(number_of_registers); |
| + Handle<FixedArray> re_data(FixedArray::cast(regexp->data())); |
| + int number_of_capture_registers = |
| + (IrregexpNumberOfCaptures(*re_data) + 1) * 2; |
| + OffsetsVector offsets(number_of_capture_registers); |
| - int num_captures = IrregexpNumberOfCaptures(irregexp); |
| + int previous_index = index; |
| - int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| - |
| #ifdef DEBUG |
| if (FLAG_trace_regexp_bytecodes) { |
| String* pattern = regexp->Pattern(); |
| @@ -476,8 +556,11 @@ |
| FlattenString(subject); |
| } |
| - return IrregexpExecOnce(irregexp, |
| - num_captures, |
| + last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
| + |
| + return IrregexpExecOnce(re_data, |
| + number_of_capture_registers, |
| + last_match_info, |
| subject, |
| previous_index, |
| offsets.vector(), |
| @@ -486,29 +569,33 @@ |
| Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
| - Handle<String> subject) { |
| + Handle<String> subject, |
| + Handle<JSArray> last_match_info) { |
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| + Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); |
| bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| - Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); |
| - if (irregexp.is_null()) { |
| + if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| return Handle<Object>::null(); |
| } |
| // Prepare space for the return values. |
| - int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| - OffsetsVector offsets(number_of_registers); |
| + int number_of_capture_registers = |
| + (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| + OffsetsVector offsets(number_of_capture_registers); |
| int previous_index = 0; |
| Handle<JSArray> result = Factory::NewJSArray(0); |
| - int i = 0; |
| + int result_length = 0; |
| Handle<Object> matches; |
| if (!subject->IsFlat(StringShape(*subject))) { |
| FlattenString(subject); |
| } |
| + last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
| + |
| while (true) { |
| if (previous_index > subject->length() || previous_index < 0) { |
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| @@ -523,8 +610,10 @@ |
| PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| } |
| #endif |
| + HandleScope scope; |
| matches = IrregexpExecOnce(irregexp, |
| - IrregexpNumberOfCaptures(irregexp), |
| + number_of_capture_registers, |
| + last_match_info, |
| subject, |
| previous_index, |
| offsets.vector(), |
| @@ -536,12 +625,25 @@ |
| } |
| if (matches->IsJSArray()) { |
| - SetElement(result, i, matches); |
| - i++; |
| - previous_index = offsets.vector()[1]; |
| - if (offsets.vector()[0] == offsets.vector()[1]) { |
| + // Create an array that looks like the static last_match_info array |
| + // that is attached to the global RegExp object. We will be returning |
| + // an array of these. |
| + Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements()); |
| + Handle<JSArray> latest_match = |
| + Factory::NewJSArray(kFirstCapture + number_of_capture_registers); |
| + Handle<FixedArray> latest_match_array(latest_match->elements()); |
| + |
| + for (int i = 0; i < number_of_capture_registers; i++) { |
| + SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); |
| + } |
| + SetLastCaptureCount(*latest_match_array, number_of_capture_registers); |
| + |
| + SetElement(result, result_length, latest_match); |
| + result_length++; |
| + previous_index = GetCapture(*matches_array, 1); |
| + if (GetCapture(*matches_array, 0) == previous_index) |
| previous_index++; |
| - } |
| + |
| } else { |
| ASSERT(matches->IsNull()); |
| return result; |
| @@ -551,131 +653,125 @@ |
| } |
| -Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
| - int num_captures, |
| +Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp, |
| + int number_of_capture_registers, |
| + Handle<JSArray> last_match_info, |
| Handle<String> subject, |
| int previous_index, |
| int* offsets_vector, |
| int offsets_vector_length) { |
| - ASSERT(subject->IsFlat(StringShape(*subject))); |
| + StringShape shape(*subject); |
| + ASSERT(subject->IsFlat(shape)); |
| + bool is_ascii = shape.IsAsciiRepresentation(); |
| bool rc; |
| - int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); |
| - |
| - switch (tag) { |
| - case RegExpMacroAssembler::kIA32Implementation: { |
| + Handle<String> original_subject = subject; |
| + if (FLAG_regexp_native) { |
| #ifndef ARM |
| - Handle<Code> code = IrregexpNativeCode(irregexp); |
| + Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); |
| - StringShape shape(*subject); |
| + // Character offsets into string. |
| + int start_offset = previous_index; |
| + int end_offset = subject->length(shape); |
| - // Character offsets into string. |
| - int start_offset = previous_index; |
| - int end_offset = subject->length(shape); |
| + if (shape.IsCons()) { |
| + subject = Handle<String>(ConsString::cast(*subject)->first()); |
| + } else if (shape.IsSliced()) { |
| + SlicedString* slice = SlicedString::cast(*subject); |
| + start_offset += slice->start(); |
| + end_offset += slice->start(); |
| + subject = Handle<String>(slice->buffer()); |
| + } |
| - if (shape.IsCons()) { |
| - subject = Handle<String>(ConsString::cast(*subject)->first()); |
| - } else if (shape.IsSliced()) { |
| - SlicedString* slice = SlicedString::cast(*subject); |
| - start_offset += slice->start(); |
| - end_offset += slice->start(); |
| - subject = Handle<String>(slice->buffer()); |
| - } |
| + // String is now either Sequential or External |
| + StringShape flatshape(*subject); |
| + bool is_ascii = flatshape.IsAsciiRepresentation(); |
| + int char_size_shift = is_ascii ? 0 : 1; |
| - // String is now either Sequential or External |
| - StringShape flatshape(*subject); |
| - bool is_ascii = flatshape.IsAsciiRepresentation(); |
| - int char_size_shift = is_ascii ? 0 : 1; |
| + RegExpMacroAssemblerIA32::Result res; |
| - RegExpMacroAssemblerIA32::Result res; |
| - |
| - if (flatshape.IsExternal()) { |
| - const byte* address; |
| - if (is_ascii) { |
| - ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
| - address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| - } else { |
| - ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); |
| - address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| - } |
| - res = RegExpMacroAssemblerIA32::Execute( |
| - *code, |
| - const_cast<Address*>(&address), |
| - start_offset << char_size_shift, |
| - end_offset << char_size_shift, |
| - offsets_vector, |
| - previous_index == 0); |
| - } else { // Sequential string |
| - ASSERT(StringShape(*subject).IsSequential()); |
| - Address char_address = |
| - is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() |
| - : SeqTwoByteString::cast(*subject)->GetCharsAddress(); |
| - int byte_offset = char_address - reinterpret_cast<Address>(*subject); |
| - res = RegExpMacroAssemblerIA32::Execute( |
| - *code, |
| - reinterpret_cast<Address*>(subject.location()), |
| - byte_offset + (start_offset << char_size_shift), |
| - byte_offset + (end_offset << char_size_shift), |
| - offsets_vector, |
| - previous_index == 0); |
| + if (flatshape.IsExternal()) { |
| + const byte* address; |
| + if (is_ascii) { |
| + ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
| + address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| + } else { |
| + ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); |
| + address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| } |
| + res = RegExpMacroAssemblerIA32::Execute( |
| + *code, |
| + const_cast<Address*>(&address), |
| + start_offset << char_size_shift, |
| + end_offset << char_size_shift, |
| + offsets_vector, |
| + previous_index == 0); |
| + } else { // Sequential string |
| + ASSERT(StringShape(*subject).IsSequential()); |
| + Address char_address = |
| + is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() |
| + : SeqTwoByteString::cast(*subject)->GetCharsAddress(); |
| + int byte_offset = char_address - reinterpret_cast<Address>(*subject); |
| + res = RegExpMacroAssemblerIA32::Execute( |
| + *code, |
| + reinterpret_cast<Address*>(subject.location()), |
| + byte_offset + (start_offset << char_size_shift), |
| + byte_offset + (end_offset << char_size_shift), |
| + offsets_vector, |
| + previous_index == 0); |
| + } |
| - if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
| - ASSERT(Top::has_pending_exception()); |
| - return Handle<Object>::null(); |
| - } |
| - rc = (res == RegExpMacroAssemblerIA32::SUCCESS); |
| + if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
| + ASSERT(Top::has_pending_exception()); |
| + return Handle<Object>::null(); |
| + } |
| + rc = (res == RegExpMacroAssemblerIA32::SUCCESS); |
| - if (rc) { |
| - // Capture values are relative to start_offset only. |
| - for (int i = 0; i < offsets_vector_length; i++) { |
| - if (offsets_vector[i] >= 0) { |
| - offsets_vector[i] += previous_index; |
| - } |
| + if (rc) { |
| + // Capture values are relative to start_offset only. |
| + for (int i = 0; i < offsets_vector_length; i++) { |
| + if (offsets_vector[i] >= 0) { |
| + offsets_vector[i] += previous_index; |
| } |
| } |
| - break; |
| + } |
| + } else { |
| #else |
| - UNIMPLEMENTED(); |
| - rc = false; |
| - break; |
| + // Unimplemented on ARM, fall through to bytecode. |
|
Mads Ager (chromium)
2009/03/11 13:49:17
Auch, this is hard to read. Can we factor this di
Lasse Reichstein
2009/03/11 13:54:03
I'm all for factoring it differently, but I think
Erik Corry
2009/03/11 14:01:06
I'll leave it alone for now.
|
| + } |
| + { |
| #endif |
| + for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| + offsets_vector[i] = -1; |
| } |
| - case RegExpMacroAssembler::kBytecodeImplementation: { |
| - for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
| - offsets_vector[i] = -1; |
| - } |
| - Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
| + Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
| - rc = IrregexpInterpreter::Match(byte_codes, |
| - subject, |
| - offsets_vector, |
| - previous_index); |
| - break; |
| - } |
| - case RegExpMacroAssembler::kARMImplementation: |
| - default: |
| - UNREACHABLE(); |
| - rc = false; |
| - break; |
| + rc = IrregexpInterpreter::Match(byte_codes, |
| + subject, |
| + offsets_vector, |
| + previous_index); |
| } |
| if (!rc) { |
| return Factory::null_value(); |
| } |
| - Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| + FixedArray* array = last_match_info->elements(); |
| + ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
| // The captures come in (start, end+1) pairs. |
| - for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
| - array->set(i, Smi::FromInt(offsets_vector[i])); |
| - array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
| + for (int i = 0; i < number_of_capture_registers; i += 2) { |
| + SetCapture(array, i, offsets_vector[i]); |
| + SetCapture(array, i + 1, offsets_vector[i + 1]); |
| } |
| - return Factory::NewJSArrayWithElements(array); |
| + SetLastCaptureCount(array, number_of_capture_registers); |
| + SetLastSubject(array, *original_subject); |
| + SetLastInput(array, *original_subject); |
| + return last_match_info; |
| } |
| // ------------------------------------------------------------------- |
| -// Implmentation of the Irregexp regular expression engine. |
| +// Implementation of the Irregexp regular expression engine. |
| // |
| // The Irregexp regular expression engine is intended to be a complete |
| // implementation of ECMAScript regular expressions. It generates either |
| @@ -892,10 +988,10 @@ |
| return next_register_++; |
| } |
| - Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| - RegExpNode* start, |
| - int capture_count, |
| - Handle<String> pattern); |
| + RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, |
| + RegExpNode* start, |
| + int capture_count, |
| + Handle<String> pattern); |
| inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| @@ -940,15 +1036,8 @@ |
| }; |
| -static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { |
| - Handle<JSArray> array = Factory::NewJSArray(2); |
| - SetElement(array, 0, pattern); |
| - const char* message = "RegExp too big"; |
| - SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); |
| - Handle<Object> regexp_err = |
| - Factory::NewSyntaxError("malformed_regexp", array); |
| - Top::Throw(*regexp_err); |
| - return Handle<FixedArray>(); |
| +static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
| + return RegExpEngine::CompilationResult("RegExp too big"); |
| } |
| @@ -966,7 +1055,7 @@ |
| } |
| -Handle<FixedArray> RegExpCompiler::Assemble( |
| +RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| RegExpMacroAssembler* macro_assembler, |
| RegExpNode* start, |
| int capture_count, |
| @@ -988,24 +1077,17 @@ |
| while (!work_list.is_empty()) { |
| work_list.RemoveLast()->Emit(this, &new_trace); |
| } |
| - if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); |
| - Handle<FixedArray> array = |
| - Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
| - array->set(RegExpImpl::kIrregexpImplementationIndex, |
| - Smi::FromInt(macro_assembler_->Implementation())); |
| - array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
| - Smi::FromInt(next_register_)); |
| - array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
| - Smi::FromInt(capture_count)); |
| + if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
| + |
| Handle<Object> code = macro_assembler_->GetCode(pattern); |
| - array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
| + |
| work_list_ = NULL; |
| #ifdef DEBUG |
| if (FLAG_trace_regexp_assembler) { |
| delete macro_assembler_; |
| } |
| #endif |
| - return array; |
| + return RegExpEngine::CompilationResult(*code, next_register_); |
| } |
| @@ -3723,9 +3805,6 @@ |
| // | |
| // [if r >= f] \----> ... |
| // |
| - // |
| - // TODO(someone): clear captures on repetition and handle empty |
| - // matches. |
| // 15.10.2.5 RepeatMatcher algorithm. |
| // The parser has already eliminated the case where max is 0. In the case |
| @@ -4592,13 +4671,13 @@ |
| } |
| -Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, |
| - bool ignore_case, |
| - bool is_multiline, |
| - Handle<String> pattern, |
| - bool is_ascii) { |
| +RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, |
| + bool ignore_case, |
| + bool is_multiline, |
| + Handle<String> pattern, |
| + bool is_ascii) { |
| if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| - return IrregexpRegExpTooBig(pattern); |
| + return IrregexpRegExpTooBig(); |
| } |
| RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
| // Wrap the body of the regexp in capture #0. |