| Index: src/jsregexp.cc
|
| ===================================================================
|
| --- src/jsregexp.cc (revision 1426)
|
| +++ src/jsregexp.cc (working copy)
|
| @@ -213,54 +213,55 @@
|
| Handle<Object> result;
|
| if (in_cache) {
|
| re->set_data(*cached);
|
| - return re;
|
| - }
|
| - FlattenString(pattern);
|
| - ZoneScope zone_scope(DELETE_ON_EXIT);
|
| - RegExpCompileData parse_result;
|
| - FlatStringReader reader(pattern);
|
| - if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
|
| - // Throw an exception if we fail to parse the pattern.
|
| - ThrowRegExpException(re,
|
| - pattern,
|
| - parse_result.error,
|
| - "malformed_regexp");
|
| - return Handle<Object>::null();
|
| - }
|
| -
|
| - if (parse_result.simple && !flags.is_ignore_case()) {
|
| - // Parse-tree is a single atom that is equal to the pattern.
|
| - AtomCompile(re, pattern, flags, pattern);
|
| - } else if (parse_result.tree->IsAtom() &&
|
| - !flags.is_ignore_case() &&
|
| - parse_result.capture_count == 0) {
|
| - RegExpAtom* atom = parse_result.tree->AsAtom();
|
| - Vector<const uc16> atom_pattern = atom->data();
|
| - Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
|
| - AtomCompile(re, pattern, flags, atom_string);
|
| + result = re;
|
| } else {
|
| - IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
|
| + FlattenString(pattern);
|
| + ZoneScope zone_scope(DELETE_ON_EXIT);
|
| + RegExpCompileData parse_result;
|
| + FlatStringReader reader(pattern);
|
| + if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
|
| + // Throw an exception if we fail to parse the pattern.
|
| + ThrowRegExpException(re,
|
| + pattern,
|
| + parse_result.error,
|
| + "malformed_regexp");
|
| + return Handle<Object>::null();
|
| + }
|
| +
|
| + if (parse_result.simple && !flags.is_ignore_case()) {
|
| + // Parse-tree is a single atom that is equal to the pattern.
|
| + result = AtomCompile(re, pattern, flags, pattern);
|
| + } else if (parse_result.tree->IsAtom() &&
|
| + !flags.is_ignore_case() &&
|
| + parse_result.capture_count == 0) {
|
| + RegExpAtom* atom = parse_result.tree->AsAtom();
|
| + Vector<const uc16> atom_pattern = atom->data();
|
| + Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
|
| + result = AtomCompile(re, pattern, flags, atom_string);
|
| + } else {
|
| + result = IrregexpPrepare(re, pattern, flags);
|
| + }
|
| + Object* data = re->data();
|
| + if (data->IsFixedArray()) {
|
| + // If compilation succeeded then the data is set on the regexp
|
| + // and we can store it in the cache.
|
| + Handle<FixedArray> data(FixedArray::cast(re->data()));
|
| + CompilationCache::PutRegExp(pattern, flags, data);
|
| + }
|
| }
|
| - ASSERT(re->data()->IsFixedArray());
|
| - // Compilation succeeded so the data is set on the regexp
|
| - // and we can store it in the cache.
|
| - Handle<FixedArray> data(FixedArray::cast(re->data()));
|
| - CompilationCache::PutRegExp(pattern, flags, data);
|
|
|
| - return re;
|
| + return result;
|
| }
|
|
|
|
|
| Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
|
| Handle<String> subject,
|
| - int index,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<Object> index) {
|
| switch (regexp->TypeTag()) {
|
| case JSRegExp::ATOM:
|
| - return AtomExec(regexp, subject, index, last_match_info);
|
| + return AtomExec(regexp, subject, index);
|
| case JSRegExp::IRREGEXP: {
|
| - Handle<Object> result =
|
| - IrregexpExec(regexp, subject, index, last_match_info);
|
| + Handle<Object> result = IrregexpExec(regexp, subject, index);
|
| ASSERT(!result.is_null() || Top::has_pending_exception());
|
| return result;
|
| }
|
| @@ -272,14 +273,12 @@
|
|
|
|
|
| Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
|
| - Handle<String> subject,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<String> subject) {
|
| switch (regexp->TypeTag()) {
|
| case JSRegExp::ATOM:
|
| - return AtomExecGlobal(regexp, subject, last_match_info);
|
| + return AtomExecGlobal(regexp, subject);
|
| case JSRegExp::IRREGEXP: {
|
| - Handle<Object> result =
|
| - IrregexpExecGlobal(regexp, subject, last_match_info);
|
| + Handle<Object> result = IrregexpExecGlobal(regexp, subject);
|
| ASSERT(!result.is_null() || Top::has_pending_exception());
|
| return result;
|
| }
|
| @@ -293,95 +292,60 @@
|
| // RegExp Atom implementation: Simple string search using indexOf.
|
|
|
|
|
| -void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
|
| - Handle<String> pattern,
|
| - JSRegExp::Flags flags,
|
| - Handle<String> match_pattern) {
|
| - Factory::SetRegExpAtomData(re,
|
| - JSRegExp::ATOM,
|
| - pattern,
|
| - flags,
|
| - match_pattern);
|
| +Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
|
| + Handle<String> pattern,
|
| + JSRegExp::Flags flags,
|
| + Handle<String> match_pattern) {
|
| + Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
|
| + return re;
|
| }
|
|
|
|
|
| -static void SetAtomLastCapture(FixedArray* array,
|
| - String* subject,
|
| - int from,
|
| - int to) {
|
| - NoHandleAllocation no_handles;
|
| - RegExpImpl::SetLastCaptureCount(array, 2);
|
| - RegExpImpl::SetLastSubject(array, subject);
|
| - RegExpImpl::SetLastInput(array, subject);
|
| - RegExpImpl::SetCapture(array, 0, from);
|
| - RegExpImpl::SetCapture(array, 1, to);
|
| -}
|
| -
|
| -
|
| Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
|
| Handle<String> subject,
|
| - int index,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<Object> index) {
|
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
|
|
|
| - uint32_t start_index = index;
|
| + uint32_t start_index;
|
| + if (!Array::IndexFromObject(*index, &start_index)) {
|
| + return Handle<Smi>(Smi::FromInt(-1));
|
| + }
|
|
|
| int value = Runtime::StringMatch(subject, needle, start_index);
|
| if (value == -1) return Factory::null_value();
|
| - ASSERT(last_match_info->HasFastElements());
|
|
|
| - {
|
| - NoHandleAllocation no_handles;
|
| - FixedArray* array = last_match_info->elements();
|
| - SetAtomLastCapture(array, *subject, value, value + needle->length());
|
| - }
|
| - return last_match_info;
|
| + Handle<FixedArray> array = Factory::NewFixedArray(2);
|
| + array->set(0, Smi::FromInt(value));
|
| + array->set(1, Smi::FromInt(value + needle->length()));
|
| + return Factory::NewJSArrayWithElements(array);
|
| }
|
|
|
|
|
| Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
|
| - Handle<String> subject,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<String> subject) {
|
| Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
|
| - ASSERT(last_match_info->HasFastElements());
|
| Handle<JSArray> result = Factory::NewJSArray(1);
|
| int index = 0;
|
| int match_count = 0;
|
| int subject_length = subject->length();
|
| int needle_length = needle->length();
|
| - int last_value = -1;
|
| while (true) {
|
| - HandleScope scope;
|
| int value = -1;
|
| if (index + needle_length <= subject_length) {
|
| value = Runtime::StringMatch(subject, needle, index);
|
| }
|
| - if (value == -1) {
|
| - if (last_value != -1) {
|
| - Handle<FixedArray> array(last_match_info->elements());
|
| - SetAtomLastCapture(*array,
|
| - *subject,
|
| - last_value,
|
| - last_value + needle->length());
|
| - }
|
| - break;
|
| - }
|
| -
|
| + if (value == -1) break;
|
| + HandleScope scope;
|
| int end = value + needle_length;
|
|
|
| - // Create an array that looks like the static last_match_info array
|
| - // that is attached to the global RegExp object. We will be returning
|
| - // an array of these.
|
| - Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
|
| - SetCapture(*array, 0, value);
|
| - SetCapture(*array, 1, end);
|
| - SetLastCaptureCount(*array, 2);
|
| + Handle<FixedArray> array = Factory::NewFixedArray(2);
|
| + array->set(0, Smi::FromInt(value));
|
| + array->set(1, Smi::FromInt(end));
|
| Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
|
| SetElement(result, match_count, pair);
|
| match_count++;
|
| index = end;
|
| if (needle_length == 0) index++;
|
| - last_value = value;
|
| }
|
| return result;
|
| }
|
| @@ -390,30 +354,24 @@
|
| // Irregexp implementation.
|
|
|
|
|
| -// Ensures that the regexp object contains a compiled version of the
|
| -// source for either ASCII or non-ASCII strings.
|
| -// If the compiled version doesn't already exist, it is compiled
|
| +// Retrieves a compiled version of the regexp for either ASCII or non-ASCII
|
| +// strings. If the compiled version doesn't already exist, it is compiled
|
| // from the source pattern.
|
| -// If compilation fails, an exception is thrown and this function
|
| -// returns false.
|
| -bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
|
| - bool is_ascii) {
|
| - int index;
|
| - if (is_ascii) {
|
| - index = JSRegExp::kIrregexpASCIICodeIndex;
|
| - } else {
|
| - index = JSRegExp::kIrregexpUC16CodeIndex;
|
| +// Irregexp is not feature complete yet. If there is something in the
|
| +// regexp that the compiler cannot currently handle, an empty
|
| +// handle is returned, but no exception is thrown.
|
| +static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
|
| + bool is_ascii) {
|
| + ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
|
| + Handle<FixedArray> alternatives(
|
| + FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
|
| + ASSERT_EQ(2, alternatives->length());
|
| +
|
| + int index = is_ascii ? 0 : 1;
|
| + Object* entry = alternatives->get(index);
|
| + if (!entry->IsNull()) {
|
| + return Handle<FixedArray>(FixedArray::cast(entry));
|
| }
|
| - Object* entry = re->DataAt(index);
|
| - if (!entry->IsTheHole()) {
|
| - // A value has already been compiled.
|
| - if (entry->IsJSObject()) {
|
| - // If it's a JS value, it's an error.
|
| - Top::Throw(entry);
|
| - return false;
|
| - }
|
| - return true;
|
| - }
|
|
|
| // Compile the RegExp.
|
| ZoneScope zone_scope(DELETE_ON_EXIT);
|
| @@ -434,116 +392,78 @@
|
| pattern,
|
| compile_data.error,
|
| "malformed_regexp");
|
| - return false;
|
| + return Handle<FixedArray>::null();
|
| }
|
| - RegExpEngine::CompilationResult result =
|
| + Handle<FixedArray> compiled_entry =
|
| RegExpEngine::Compile(&compile_data,
|
| flags.is_ignore_case(),
|
| flags.is_multiline(),
|
| pattern,
|
| is_ascii);
|
| - if (result.error_message != NULL) {
|
| - // Unable to compile regexp.
|
| - Handle<JSArray> array = Factory::NewJSArray(2);
|
| - SetElement(array, 0, pattern);
|
| - SetElement(array,
|
| - 1,
|
| - Factory::NewStringFromUtf8(CStrVector(result.error_message)));
|
| - Handle<Object> regexp_err =
|
| - Factory::NewSyntaxError("malformed_regexp", array);
|
| - Top::Throw(*regexp_err);
|
| - re->SetDataAt(index, *regexp_err);
|
| - return false;
|
| + if (!compiled_entry.is_null()) {
|
| + alternatives->set(index, *compiled_entry);
|
| }
|
| -
|
| - NoHandleAllocation no_handles;
|
| -
|
| - FixedArray* data = FixedArray::cast(re->data());
|
| - data->set(index, result.code);
|
| - int register_max = IrregexpMaxRegisterCount(data);
|
| - if (result.num_registers > register_max) {
|
| - SetIrregexpMaxRegisterCount(data, result.num_registers);
|
| - }
|
| -
|
| - return true;
|
| + return compiled_entry;
|
| }
|
|
|
|
|
| -int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
|
| - return Smi::cast(
|
| - re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
|
| +int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
|
| + return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
|
| }
|
|
|
|
|
| -void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
|
| - re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
|
| +int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
|
| + return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
|
| }
|
|
|
|
|
| -int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
|
| - return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
|
| +Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
|
| + ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
|
| + == RegExpMacroAssembler::kBytecodeImplementation);
|
| + return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
|
| }
|
|
|
|
|
| -int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
|
| - return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
|
| +Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
|
| + ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
|
| + != RegExpMacroAssembler::kBytecodeImplementation);
|
| + return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
|
| }
|
|
|
|
|
| -ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
|
| - int index;
|
| - if (is_ascii) {
|
| - index = JSRegExp::kIrregexpASCIICodeIndex;
|
| - } else {
|
| - index = JSRegExp::kIrregexpUC16CodeIndex;
|
| - }
|
| - return ByteArray::cast(re->get(index));
|
| +Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
|
| + Handle<String> pattern,
|
| + JSRegExp::Flags flags) {
|
| + // Make space for ASCII and UC16 versions.
|
| + Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
|
| + alternatives->set_null(0);
|
| + alternatives->set_null(1);
|
| + Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
|
| + return re;
|
| }
|
|
|
|
|
| -Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
|
| - int index;
|
| - if (is_ascii) {
|
| - index = JSRegExp::kIrregexpASCIICodeIndex;
|
| - } else {
|
| - index = JSRegExp::kIrregexpUC16CodeIndex;
|
| - }
|
| - return Code::cast(re->get(index));
|
| -}
|
| -
|
| -
|
| -void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
|
| - Handle<String> pattern,
|
| - JSRegExp::Flags flags,
|
| - int capture_count) {
|
| - // Initialize compiled code entries to null.
|
| - Factory::SetRegExpIrregexpData(re,
|
| - JSRegExp::IRREGEXP,
|
| - pattern,
|
| - flags,
|
| - capture_count);
|
| -}
|
| -
|
| -
|
| Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
|
| Handle<String> subject,
|
| - int index,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<Object> index) {
|
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
|
| + ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
|
|
|
| bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
|
| - if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
|
| + Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
|
| + if (irregexp.is_null()) {
|
| + // We can't handle the RegExp with IRRegExp.
|
| return Handle<Object>::null();
|
| }
|
|
|
| // Prepare space for the return values.
|
| - Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
|
| - int number_of_capture_registers =
|
| - (IrregexpNumberOfCaptures(*re_data) + 1) * 2;
|
| - OffsetsVector offsets(number_of_capture_registers);
|
| + int number_of_registers = IrregexpNumberOfRegisters(irregexp);
|
| + OffsetsVector offsets(number_of_registers);
|
|
|
| - int previous_index = index;
|
| + int num_captures = IrregexpNumberOfCaptures(irregexp);
|
|
|
| + int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
|
| +
|
| #ifdef DEBUG
|
| if (FLAG_trace_regexp_bytecodes) {
|
| String* pattern = regexp->Pattern();
|
| @@ -556,11 +476,8 @@
|
| FlattenString(subject);
|
| }
|
|
|
| - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
|
| -
|
| - return IrregexpExecOnce(re_data,
|
| - number_of_capture_registers,
|
| - last_match_info,
|
| + return IrregexpExecOnce(irregexp,
|
| + num_captures,
|
| subject,
|
| previous_index,
|
| offsets.vector(),
|
| @@ -569,33 +486,29 @@
|
|
|
|
|
| Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
|
| - Handle<String> subject,
|
| - Handle<JSArray> last_match_info) {
|
| + Handle<String> subject) {
|
| ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
|
| - Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
|
|
|
| bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
|
| - if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
|
| + Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
|
| + if (irregexp.is_null()) {
|
| return Handle<Object>::null();
|
| }
|
|
|
| // Prepare space for the return values.
|
| - int number_of_capture_registers =
|
| - (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
|
| - OffsetsVector offsets(number_of_capture_registers);
|
| + int number_of_registers = IrregexpNumberOfRegisters(irregexp);
|
| + OffsetsVector offsets(number_of_registers);
|
|
|
| int previous_index = 0;
|
|
|
| Handle<JSArray> result = Factory::NewJSArray(0);
|
| - int result_length = 0;
|
| + int i = 0;
|
| Handle<Object> matches;
|
|
|
| if (!subject->IsFlat(StringShape(*subject))) {
|
| FlattenString(subject);
|
| }
|
|
|
| - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
|
| -
|
| while (true) {
|
| if (previous_index > subject->length() || previous_index < 0) {
|
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the
|
| @@ -610,10 +523,8 @@
|
| PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
|
| }
|
| #endif
|
| - HandleScope scope;
|
| matches = IrregexpExecOnce(irregexp,
|
| - number_of_capture_registers,
|
| - last_match_info,
|
| + IrregexpNumberOfCaptures(irregexp),
|
| subject,
|
| previous_index,
|
| offsets.vector(),
|
| @@ -625,25 +536,12 @@
|
| }
|
|
|
| if (matches->IsJSArray()) {
|
| - // Create an array that looks like the static last_match_info array
|
| - // that is attached to the global RegExp object. We will be returning
|
| - // an array of these.
|
| - Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements());
|
| - Handle<JSArray> latest_match =
|
| - Factory::NewJSArray(kFirstCapture + number_of_capture_registers);
|
| - Handle<FixedArray> latest_match_array(latest_match->elements());
|
| -
|
| - for (int i = 0; i < number_of_capture_registers; i++) {
|
| - SetCapture(*latest_match_array, i, GetCapture(*matches_array, i));
|
| - }
|
| - SetLastCaptureCount(*latest_match_array, number_of_capture_registers);
|
| -
|
| - SetElement(result, result_length, latest_match);
|
| - result_length++;
|
| - previous_index = GetCapture(*matches_array, 1);
|
| - if (GetCapture(*matches_array, 0) == previous_index)
|
| + SetElement(result, i, matches);
|
| + i++;
|
| + previous_index = offsets.vector()[1];
|
| + if (offsets.vector()[0] == offsets.vector()[1]) {
|
| previous_index++;
|
| -
|
| + }
|
| } else {
|
| ASSERT(matches->IsNull());
|
| return result;
|
| @@ -653,124 +551,131 @@
|
| }
|
|
|
|
|
| -Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
|
| - int number_of_capture_registers,
|
| - Handle<JSArray> last_match_info,
|
| +Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
|
| + int num_captures,
|
| Handle<String> subject,
|
| int previous_index,
|
| int* offsets_vector,
|
| int offsets_vector_length) {
|
| - StringShape shape(*subject);
|
| - ASSERT(subject->IsFlat(shape));
|
| - bool is_ascii = shape.IsAsciiRepresentation();
|
| + ASSERT(subject->IsFlat(StringShape(*subject)));
|
| bool rc;
|
|
|
| - if (FLAG_regexp_native) {
|
| + int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
|
| +
|
| + switch (tag) {
|
| + case RegExpMacroAssembler::kIA32Implementation: {
|
| #ifndef ARM
|
| - Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii));
|
| + Handle<Code> code = IrregexpNativeCode(irregexp);
|
|
|
| - // Character offsets into string.
|
| - int start_offset = previous_index;
|
| - int end_offset = subject->length(shape);
|
| + StringShape shape(*subject);
|
|
|
| - if (shape.IsCons()) {
|
| - subject = Handle<String>(ConsString::cast(*subject)->first());
|
| - } else if (shape.IsSliced()) {
|
| - SlicedString* slice = SlicedString::cast(*subject);
|
| - start_offset += slice->start();
|
| - end_offset += slice->start();
|
| - subject = Handle<String>(slice->buffer());
|
| - }
|
| + // Character offsets into string.
|
| + int start_offset = previous_index;
|
| + int end_offset = subject->length(shape);
|
|
|
| - // String is now either Sequential or External
|
| - StringShape flatshape(*subject);
|
| - bool is_ascii = flatshape.IsAsciiRepresentation();
|
| - int char_size_shift = is_ascii ? 0 : 1;
|
| + if (shape.IsCons()) {
|
| + subject = Handle<String>(ConsString::cast(*subject)->first());
|
| + } else if (shape.IsSliced()) {
|
| + SlicedString* slice = SlicedString::cast(*subject);
|
| + start_offset += slice->start();
|
| + end_offset += slice->start();
|
| + subject = Handle<String>(slice->buffer());
|
| + }
|
|
|
| - RegExpMacroAssemblerIA32::Result res;
|
| + // String is now either Sequential or External
|
| + StringShape flatshape(*subject);
|
| + bool is_ascii = flatshape.IsAsciiRepresentation();
|
| + int char_size_shift = is_ascii ? 0 : 1;
|
|
|
| - if (flatshape.IsExternal()) {
|
| - const byte* address;
|
| - if (is_ascii) {
|
| - ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
|
| - address = reinterpret_cast<const byte*>(ext->resource()->data());
|
| - } else {
|
| - ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
|
| - address = reinterpret_cast<const byte*>(ext->resource()->data());
|
| + RegExpMacroAssemblerIA32::Result res;
|
| +
|
| + if (flatshape.IsExternal()) {
|
| + const byte* address;
|
| + if (is_ascii) {
|
| + ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
|
| + address = reinterpret_cast<const byte*>(ext->resource()->data());
|
| + } else {
|
| + ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
|
| + address = reinterpret_cast<const byte*>(ext->resource()->data());
|
| + }
|
| + res = RegExpMacroAssemblerIA32::Execute(
|
| + *code,
|
| + const_cast<Address*>(&address),
|
| + start_offset << char_size_shift,
|
| + end_offset << char_size_shift,
|
| + offsets_vector,
|
| + previous_index == 0);
|
| + } else { // Sequential string
|
| + ASSERT(StringShape(*subject).IsSequential());
|
| + Address char_address =
|
| + is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
|
| + : SeqTwoByteString::cast(*subject)->GetCharsAddress();
|
| + int byte_offset = char_address - reinterpret_cast<Address>(*subject);
|
| + res = RegExpMacroAssemblerIA32::Execute(
|
| + *code,
|
| + reinterpret_cast<Address*>(subject.location()),
|
| + byte_offset + (start_offset << char_size_shift),
|
| + byte_offset + (end_offset << char_size_shift),
|
| + offsets_vector,
|
| + previous_index == 0);
|
| }
|
| - res = RegExpMacroAssemblerIA32::Execute(
|
| - *code,
|
| - const_cast<Address*>(&address),
|
| - start_offset << char_size_shift,
|
| - end_offset << char_size_shift,
|
| - offsets_vector,
|
| - previous_index == 0);
|
| - } else { // Sequential string
|
| - ASSERT(StringShape(*subject).IsSequential());
|
| - Address char_address =
|
| - is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
|
| - : SeqTwoByteString::cast(*subject)->GetCharsAddress();
|
| - int byte_offset = char_address - reinterpret_cast<Address>(*subject);
|
| - res = RegExpMacroAssemblerIA32::Execute(
|
| - *code,
|
| - reinterpret_cast<Address*>(subject.location()),
|
| - byte_offset + (start_offset << char_size_shift),
|
| - byte_offset + (end_offset << char_size_shift),
|
| - offsets_vector,
|
| - previous_index == 0);
|
| - }
|
|
|
| - if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
|
| - ASSERT(Top::has_pending_exception());
|
| - return Handle<Object>::null();
|
| - }
|
| - rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
|
| + if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
|
| + ASSERT(Top::has_pending_exception());
|
| + return Handle<Object>::null();
|
| + }
|
| + rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
|
|
|
| - if (rc) {
|
| - // Capture values are relative to start_offset only.
|
| - for (int i = 0; i < offsets_vector_length; i++) {
|
| - if (offsets_vector[i] >= 0) {
|
| - offsets_vector[i] += previous_index;
|
| + if (rc) {
|
| + // Capture values are relative to start_offset only.
|
| + for (int i = 0; i < offsets_vector_length; i++) {
|
| + if (offsets_vector[i] >= 0) {
|
| + offsets_vector[i] += previous_index;
|
| + }
|
| }
|
| }
|
| - }
|
| - } else {
|
| + break;
|
| #else
|
| - // Unimplemented on ARM, fall through to bytecode.
|
| - }
|
| - {
|
| + UNIMPLEMENTED();
|
| + rc = false;
|
| + break;
|
| #endif
|
| - for (int i = number_of_capture_registers - 1; i >= 0; i--) {
|
| - offsets_vector[i] = -1;
|
| }
|
| - Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
|
| + case RegExpMacroAssembler::kBytecodeImplementation: {
|
| + for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
|
| + offsets_vector[i] = -1;
|
| + }
|
| + Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
|
|
|
| - rc = IrregexpInterpreter::Match(byte_codes,
|
| - subject,
|
| - offsets_vector,
|
| - previous_index);
|
| + rc = IrregexpInterpreter::Match(byte_codes,
|
| + subject,
|
| + offsets_vector,
|
| + previous_index);
|
| + break;
|
| + }
|
| + case RegExpMacroAssembler::kARMImplementation:
|
| + default:
|
| + UNREACHABLE();
|
| + rc = false;
|
| + break;
|
| }
|
|
|
| if (!rc) {
|
| return Factory::null_value();
|
| }
|
|
|
| - FixedArray* array = last_match_info->elements();
|
| - ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
|
| + Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
|
| // The captures come in (start, end+1) pairs.
|
| - for (int i = 0; i < number_of_capture_registers; i += 2) {
|
| - SetCapture(array, i, offsets_vector[i]);
|
| - SetCapture(array, i + 1, offsets_vector[i + 1]);
|
| + for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
|
| + array->set(i, Smi::FromInt(offsets_vector[i]));
|
| + array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
|
| }
|
| - SetLastCaptureCount(array, number_of_capture_registers);
|
| - SetLastSubject(array, *subject);
|
| - SetLastInput(array, *subject);
|
| - return last_match_info;
|
| + return Factory::NewJSArrayWithElements(array);
|
| }
|
|
|
|
|
| // -------------------------------------------------------------------
|
| -// Implementation of the Irregexp regular expression engine.
|
| +// Implmentation of the Irregexp regular expression engine.
|
| //
|
| // The Irregexp regular expression engine is intended to be a complete
|
| // implementation of ECMAScript regular expressions. It generates either
|
| @@ -987,10 +892,10 @@
|
| return next_register_++;
|
| }
|
|
|
| - RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
|
| - RegExpNode* start,
|
| - int capture_count,
|
| - Handle<String> pattern);
|
| + Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
|
| + RegExpNode* start,
|
| + int capture_count,
|
| + Handle<String> pattern);
|
|
|
| inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
|
|
|
| @@ -1035,8 +940,15 @@
|
| };
|
|
|
|
|
| -static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
|
| - return RegExpEngine::CompilationResult("RegExp too big");
|
| +static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
|
| + Handle<JSArray> array = Factory::NewJSArray(2);
|
| + SetElement(array, 0, pattern);
|
| + const char* message = "RegExp too big";
|
| + SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
|
| + Handle<Object> regexp_err =
|
| + Factory::NewSyntaxError("malformed_regexp", array);
|
| + Top::Throw(*regexp_err);
|
| + return Handle<FixedArray>();
|
| }
|
|
|
|
|
| @@ -1054,7 +966,7 @@
|
| }
|
|
|
|
|
| -RegExpEngine::CompilationResult RegExpCompiler::Assemble(
|
| +Handle<FixedArray> RegExpCompiler::Assemble(
|
| RegExpMacroAssembler* macro_assembler,
|
| RegExpNode* start,
|
| int capture_count,
|
| @@ -1076,17 +988,24 @@
|
| while (!work_list.is_empty()) {
|
| work_list.RemoveLast()->Emit(this, &new_trace);
|
| }
|
| - if (reg_exp_too_big_) return IrregexpRegExpTooBig();
|
| -
|
| + if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
|
| + Handle<FixedArray> array =
|
| + Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
|
| + array->set(RegExpImpl::kIrregexpImplementationIndex,
|
| + Smi::FromInt(macro_assembler_->Implementation()));
|
| + array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
|
| + Smi::FromInt(next_register_));
|
| + array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
|
| + Smi::FromInt(capture_count));
|
| Handle<Object> code = macro_assembler_->GetCode(pattern);
|
| -
|
| + array->set(RegExpImpl::kIrregexpCodeIndex, *code);
|
| work_list_ = NULL;
|
| #ifdef DEBUG
|
| if (FLAG_trace_regexp_assembler) {
|
| delete macro_assembler_;
|
| }
|
| #endif
|
| - return RegExpEngine::CompilationResult(*code, next_register_);
|
| + return array;
|
| }
|
|
|
|
|
| @@ -3804,6 +3723,9 @@
|
| // |
|
| // [if r >= f] \----> ...
|
| //
|
| + //
|
| + // TODO(someone): clear captures on repetition and handle empty
|
| + // matches.
|
|
|
| // 15.10.2.5 RepeatMatcher algorithm.
|
| // The parser has already eliminated the case where max is 0. In the case
|
| @@ -4670,13 +4592,13 @@
|
| }
|
|
|
|
|
| -RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
|
| - bool ignore_case,
|
| - bool is_multiline,
|
| - Handle<String> pattern,
|
| - bool is_ascii) {
|
| +Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
|
| + bool ignore_case,
|
| + bool is_multiline,
|
| + Handle<String> pattern,
|
| + bool is_ascii) {
|
| if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
|
| - return IrregexpRegExpTooBig();
|
| + return IrregexpRegExpTooBig(pattern);
|
| }
|
| RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
|
| // Wrap the body of the regexp in capture #0.
|
|
|