Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1685)

Unified Diff: src/jsregexp.cc

Issue 43075: * Reapply revisions 1383, 1384, 1391, 1398, 1401, 1402,... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 1489)
+++ src/jsregexp.cc (working copy)
@@ -213,55 +213,54 @@
Handle<Object> result;
if (in_cache) {
re->set_data(*cached);
- result = re;
+ return re;
+ }
+ FlattenString(pattern);
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ RegExpCompileData parse_result;
+ FlatStringReader reader(pattern);
+ if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+ // Throw an exception if we fail to parse the pattern.
+ ThrowRegExpException(re,
+ pattern,
+ parse_result.error,
+ "malformed_regexp");
+ return Handle<Object>::null();
+ }
+
+ if (parse_result.simple && !flags.is_ignore_case()) {
+ // Parse-tree is a single atom that is equal to the pattern.
+ AtomCompile(re, pattern, flags, pattern);
+ } else if (parse_result.tree->IsAtom() &&
+ !flags.is_ignore_case() &&
+ parse_result.capture_count == 0) {
+ RegExpAtom* atom = parse_result.tree->AsAtom();
+ Vector<const uc16> atom_pattern = atom->data();
+ Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
+ AtomCompile(re, pattern, flags, atom_string);
} else {
- FlattenString(pattern);
- ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpCompileData parse_result;
- FlatStringReader reader(pattern);
- if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
- // Throw an exception if we fail to parse the pattern.
- ThrowRegExpException(re,
- pattern,
- parse_result.error,
- "malformed_regexp");
- return Handle<Object>::null();
- }
-
- if (parse_result.simple && !flags.is_ignore_case()) {
- // Parse-tree is a single atom that is equal to the pattern.
- result = AtomCompile(re, pattern, flags, pattern);
- } else if (parse_result.tree->IsAtom() &&
- !flags.is_ignore_case() &&
- parse_result.capture_count == 0) {
- RegExpAtom* atom = parse_result.tree->AsAtom();
- Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
- result = AtomCompile(re, pattern, flags, atom_string);
- } else {
- result = IrregexpPrepare(re, pattern, flags);
- }
- Object* data = re->data();
- if (data->IsFixedArray()) {
- // If compilation succeeded then the data is set on the regexp
- // and we can store it in the cache.
- Handle<FixedArray> data(FixedArray::cast(re->data()));
- CompilationCache::PutRegExp(pattern, flags, data);
- }
+ IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
}
+ ASSERT(re->data()->IsFixedArray());
+ // Compilation succeeded so the data is set on the regexp
+ // and we can store it in the cache.
+ Handle<FixedArray> data(FixedArray::cast(re->data()));
+ CompilationCache::PutRegExp(pattern, flags, data);
- return result;
+ return re;
}
Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
Handle<String> subject,
- Handle<Object> index) {
+ int index,
+ Handle<JSArray> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
- return AtomExec(regexp, subject, index);
+ return AtomExec(regexp, subject, index, last_match_info);
case JSRegExp::IRREGEXP: {
- Handle<Object> result = IrregexpExec(regexp, subject, index);
+ Handle<Object> result =
+ IrregexpExec(regexp, subject, index, last_match_info);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
@@ -273,12 +272,14 @@
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
- return AtomExecGlobal(regexp, subject);
+ return AtomExecGlobal(regexp, subject, last_match_info);
case JSRegExp::IRREGEXP: {
- Handle<Object> result = IrregexpExecGlobal(regexp, subject);
+ Handle<Object> result =
+ IrregexpExecGlobal(regexp, subject, last_match_info);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
@@ -292,60 +293,95 @@
// RegExp Atom implementation: Simple string search using indexOf.
-Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- Handle<String> match_pattern) {
- Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern);
- return re;
+void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ Handle<String> match_pattern) {
+ Factory::SetRegExpAtomData(re,
+ JSRegExp::ATOM,
+ pattern,
+ flags,
+ match_pattern);
}
+static void SetAtomLastCapture(FixedArray* array,
+ String* subject,
+ int from,
+ int to) {
+ NoHandleAllocation no_handles;
+ RegExpImpl::SetLastCaptureCount(array, 2);
+ RegExpImpl::SetLastSubject(array, subject);
+ RegExpImpl::SetLastInput(array, subject);
+ RegExpImpl::SetCapture(array, 0, from);
+ RegExpImpl::SetCapture(array, 1, to);
+}
+
+
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject,
- Handle<Object> index) {
+ int index,
+ Handle<JSArray> last_match_info) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
- uint32_t start_index;
- if (!Array::IndexFromObject(*index, &start_index)) {
- return Handle<Smi>(Smi::FromInt(-1));
- }
+ uint32_t start_index = index;
int value = Runtime::StringMatch(subject, needle, start_index);
if (value == -1) return Factory::null_value();
+ ASSERT(last_match_info->HasFastElements());
- Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0, Smi::FromInt(value));
- array->set(1, Smi::FromInt(value + needle->length()));
- return Factory::NewJSArrayWithElements(array);
+ {
+ NoHandleAllocation no_handles;
+ FixedArray* array = last_match_info->elements();
+ SetAtomLastCapture(array, *subject, value, value + needle->length());
+ }
+ return last_match_info;
}
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> last_match_info) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
+ ASSERT(last_match_info->HasFastElements());
Handle<JSArray> result = Factory::NewJSArray(1);
int index = 0;
int match_count = 0;
int subject_length = subject->length();
int needle_length = needle->length();
+ int last_value = -1;
while (true) {
+ HandleScope scope;
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
}
- if (value == -1) break;
- HandleScope scope;
+ if (value == -1) {
+ if (last_value != -1) {
+ Handle<FixedArray> array(last_match_info->elements());
+ SetAtomLastCapture(*array,
+ *subject,
+ last_value,
+ last_value + needle->length());
+ }
+ break;
+ }
+
int end = value + needle_length;
- Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0, Smi::FromInt(value));
- array->set(1, Smi::FromInt(end));
+ // Create an array that looks like the static last_match_info array
+ // that is attached to the global RegExp object. We will be returning
+ // an array of these.
+ Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
+ SetCapture(*array, 0, value);
+ SetCapture(*array, 1, end);
+ SetLastCaptureCount(*array, 2);
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
index = end;
if (needle_length == 0) index++;
+ last_value = value;
}
return result;
}
@@ -354,24 +390,30 @@
// Irregexp implementation.
-// Retrieves a compiled version of the regexp for either ASCII or non-ASCII
-// strings. If the compiled version doesn't already exist, it is compiled
+// Ensures that the regexp object contains a compiled version of the
+// source for either ASCII or non-ASCII strings.
+// If the compiled version doesn't already exist, it is compiled
// from the source pattern.
-// Irregexp is not feature complete yet. If there is something in the
-// regexp that the compiler cannot currently handle, an empty
-// handle is returned, but no exception is thrown.
-static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
- bool is_ascii) {
- ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
- Handle<FixedArray> alternatives(
- FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
- ASSERT_EQ(2, alternatives->length());
-
- int index = is_ascii ? 0 : 1;
- Object* entry = alternatives->get(index);
- if (!entry->IsNull()) {
- return Handle<FixedArray>(FixedArray::cast(entry));
+// If compilation fails, an exception is thrown and this function
+// returns false.
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
}
+ Object* entry = re->DataAt(index);
+ if (!entry->IsTheHole()) {
+ // A value has already been compiled.
+ if (entry->IsJSObject()) {
+ // If it's a JS value, it's an error.
+ Top::Throw(entry);
+ return false;
+ }
+ return true;
+ }
// Compile the RegExp.
ZoneScope zone_scope(DELETE_ON_EXIT);
@@ -392,78 +434,116 @@
pattern,
compile_data.error,
"malformed_regexp");
- return Handle<FixedArray>::null();
+ return false;
}
- Handle<FixedArray> compiled_entry =
+ RegExpEngine::CompilationResult result =
RegExpEngine::Compile(&compile_data,
flags.is_ignore_case(),
flags.is_multiline(),
pattern,
is_ascii);
- if (!compiled_entry.is_null()) {
- alternatives->set(index, *compiled_entry);
+ if (result.error_message != NULL) {
+ // Unable to compile regexp.
+ Handle<JSArray> array = Factory::NewJSArray(2);
+ SetElement(array, 0, pattern);
+ SetElement(array,
+ 1,
+ Factory::NewStringFromUtf8(CStrVector(result.error_message)));
+ Handle<Object> regexp_err =
+ Factory::NewSyntaxError("malformed_regexp", array);
+ Top::Throw(*regexp_err);
+ re->SetDataAt(index, *regexp_err);
+ return false;
}
- return compiled_entry;
+
+ NoHandleAllocation no_handles;
+
+ FixedArray* data = FixedArray::cast(re->data());
+ data->set(index, result.code);
+ int register_max = IrregexpMaxRegisterCount(data);
+ if (result.num_registers > register_max) {
+ SetIrregexpMaxRegisterCount(data, result.num_registers);
+ }
+
+ return true;
}
-int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
- return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
+int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
+ return Smi::cast(
+ re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
}
-int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
- return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
+void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
+ re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
}
-Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
- ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
- == RegExpMacroAssembler::kBytecodeImplementation);
- return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
+int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
+ return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
}
-Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
- ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
- != RegExpMacroAssembler::kBytecodeImplementation);
- return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
+int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
+ return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
}
-Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags) {
- // Make space for ASCII and UC16 versions.
- Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
- alternatives->set_null(0);
- alternatives->set_null(1);
- Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives);
- return re;
+ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
+ }
+ return ByteArray::cast(re->get(index));
}
+Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
+ }
+ return Code::cast(re->get(index));
+}
+
+
+void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ int capture_count) {
+ // Initialize compiled code entries to null.
+ Factory::SetRegExpIrregexpData(re,
+ JSRegExp::IRREGEXP,
+ pattern,
+ flags,
+ capture_count);
+}
+
+
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
- Handle<Object> index) {
+ int index,
+ Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
- Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
- if (irregexp.is_null()) {
- // We can't handle the RegExp with IRRegExp.
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
// Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(irregexp);
- OffsetsVector offsets(number_of_registers);
+ Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(*re_data) + 1) * 2;
+ OffsetsVector offsets(number_of_capture_registers);
- int num_captures = IrregexpNumberOfCaptures(irregexp);
+ int previous_index = index;
- int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
@@ -476,8 +556,11 @@
FlattenString(subject);
}
- return IrregexpExecOnce(irregexp,
- num_captures,
+ last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
+
+ return IrregexpExecOnce(re_data,
+ number_of_capture_registers,
+ last_match_info,
subject,
previous_index,
offsets.vector(),
@@ -486,29 +569,33 @@
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
+ Handle<String> subject,
+ Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
- Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
- if (irregexp.is_null()) {
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
// Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(irregexp);
- OffsetsVector offsets(number_of_registers);
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
+ OffsetsVector offsets(number_of_capture_registers);
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
- int i = 0;
+ int result_length = 0;
Handle<Object> matches;
if (!subject->IsFlat(StringShape(*subject))) {
FlattenString(subject);
}
+ last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
+
while (true) {
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
@@ -523,8 +610,10 @@
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
+ HandleScope scope;
matches = IrregexpExecOnce(irregexp,
- IrregexpNumberOfCaptures(irregexp),
+ number_of_capture_registers,
+ last_match_info,
subject,
previous_index,
offsets.vector(),
@@ -536,12 +625,25 @@
}
if (matches->IsJSArray()) {
- SetElement(result, i, matches);
- i++;
- previous_index = offsets.vector()[1];
- if (offsets.vector()[0] == offsets.vector()[1]) {
+ // Create an array that looks like the static last_match_info array
+ // that is attached to the global RegExp object. We will be returning
+ // an array of these.
+ Handle<FixedArray> matches_array(JSArray::cast(*matches)->elements());
+ Handle<JSArray> latest_match =
+ Factory::NewJSArray(kFirstCapture + number_of_capture_registers);
+ Handle<FixedArray> latest_match_array(latest_match->elements());
+
+ for (int i = 0; i < number_of_capture_registers; i++) {
+ SetCapture(*latest_match_array, i, GetCapture(*matches_array, i));
+ }
+ SetLastCaptureCount(*latest_match_array, number_of_capture_registers);
+
+ SetElement(result, result_length, latest_match);
+ result_length++;
+ previous_index = GetCapture(*matches_array, 1);
+ if (GetCapture(*matches_array, 0) == previous_index)
previous_index++;
- }
+
} else {
ASSERT(matches->IsNull());
return result;
@@ -551,131 +653,125 @@
}
-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
- int num_captures,
+Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
+ int number_of_capture_registers,
+ Handle<JSArray> last_match_info,
Handle<String> subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
- ASSERT(subject->IsFlat(StringShape(*subject)));
+ StringShape shape(*subject);
+ ASSERT(subject->IsFlat(shape));
+ bool is_ascii = shape.IsAsciiRepresentation();
bool rc;
- int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
-
- switch (tag) {
- case RegExpMacroAssembler::kIA32Implementation: {
+ Handle<String> original_subject = subject;
+ if (FLAG_regexp_native) {
#ifndef ARM
- Handle<Code> code = IrregexpNativeCode(irregexp);
+ Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii));
- StringShape shape(*subject);
+ // Character offsets into string.
+ int start_offset = previous_index;
+ int end_offset = subject->length(shape);
- // Character offsets into string.
- int start_offset = previous_index;
- int end_offset = subject->length(shape);
+ if (shape.IsCons()) {
+ subject = Handle<String>(ConsString::cast(*subject)->first());
+ } else if (shape.IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject);
+ start_offset += slice->start();
+ end_offset += slice->start();
+ subject = Handle<String>(slice->buffer());
+ }
- if (shape.IsCons()) {
- subject = Handle<String>(ConsString::cast(*subject)->first());
- } else if (shape.IsSliced()) {
- SlicedString* slice = SlicedString::cast(*subject);
- start_offset += slice->start();
- end_offset += slice->start();
- subject = Handle<String>(slice->buffer());
- }
+ // String is now either Sequential or External
+ StringShape flatshape(*subject);
+ bool is_ascii = flatshape.IsAsciiRepresentation();
+ int char_size_shift = is_ascii ? 0 : 1;
- // String is now either Sequential or External
- StringShape flatshape(*subject);
- bool is_ascii = flatshape.IsAsciiRepresentation();
- int char_size_shift = is_ascii ? 0 : 1;
+ RegExpMacroAssemblerIA32::Result res;
- RegExpMacroAssemblerIA32::Result res;
-
- if (flatshape.IsExternal()) {
- const byte* address;
- if (is_ascii) {
- ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
- address = reinterpret_cast<const byte*>(ext->resource()->data());
- } else {
- ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
- address = reinterpret_cast<const byte*>(ext->resource()->data());
- }
- res = RegExpMacroAssemblerIA32::Execute(
- *code,
- const_cast<Address*>(&address),
- start_offset << char_size_shift,
- end_offset << char_size_shift,
- offsets_vector,
- previous_index == 0);
- } else { // Sequential string
- ASSERT(StringShape(*subject).IsSequential());
- Address char_address =
- is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
- : SeqTwoByteString::cast(*subject)->GetCharsAddress();
- int byte_offset = char_address - reinterpret_cast<Address>(*subject);
- res = RegExpMacroAssemblerIA32::Execute(
- *code,
- reinterpret_cast<Address*>(subject.location()),
- byte_offset + (start_offset << char_size_shift),
- byte_offset + (end_offset << char_size_shift),
- offsets_vector,
- previous_index == 0);
+ if (flatshape.IsExternal()) {
+ const byte* address;
+ if (is_ascii) {
+ ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
+ } else {
+ ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
}
+ res = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ const_cast<Address*>(&address),
+ start_offset << char_size_shift,
+ end_offset << char_size_shift,
+ offsets_vector,
+ previous_index == 0);
+ } else { // Sequential string
+ ASSERT(StringShape(*subject).IsSequential());
+ Address char_address =
+ is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
+ : SeqTwoByteString::cast(*subject)->GetCharsAddress();
+ int byte_offset = char_address - reinterpret_cast<Address>(*subject);
+ res = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ reinterpret_cast<Address*>(subject.location()),
+ byte_offset + (start_offset << char_size_shift),
+ byte_offset + (end_offset << char_size_shift),
+ offsets_vector,
+ previous_index == 0);
+ }
- if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
- ASSERT(Top::has_pending_exception());
- return Handle<Object>::null();
- }
- rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
+ if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
+ ASSERT(Top::has_pending_exception());
+ return Handle<Object>::null();
+ }
+ rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
- if (rc) {
- // Capture values are relative to start_offset only.
- for (int i = 0; i < offsets_vector_length; i++) {
- if (offsets_vector[i] >= 0) {
- offsets_vector[i] += previous_index;
- }
+ if (rc) {
+ // Capture values are relative to start_offset only.
+ for (int i = 0; i < offsets_vector_length; i++) {
+ if (offsets_vector[i] >= 0) {
+ offsets_vector[i] += previous_index;
}
}
- break;
+ }
+ } else {
#else
- UNIMPLEMENTED();
- rc = false;
- break;
+ // Unimplemented on ARM, fall through to bytecode.
Mads Ager (chromium) 2009/03/11 13:49:17 Auch, this is hard to read. Can we factor this di
Lasse Reichstein 2009/03/11 13:54:03 I'm all for factoring it differently, but I think
Erik Corry 2009/03/11 14:01:06 I'll leave it alone for now.
+ }
+ {
#endif
+ for (int i = number_of_capture_registers - 1; i >= 0; i--) {
+ offsets_vector[i] = -1;
}
- case RegExpMacroAssembler::kBytecodeImplementation: {
- for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
- offsets_vector[i] = -1;
- }
- Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
- rc = IrregexpInterpreter::Match(byte_codes,
- subject,
- offsets_vector,
- previous_index);
- break;
- }
- case RegExpMacroAssembler::kARMImplementation:
- default:
- UNREACHABLE();
- rc = false;
- break;
+ rc = IrregexpInterpreter::Match(byte_codes,
+ subject,
+ offsets_vector,
+ previous_index);
}
if (!rc) {
return Factory::null_value();
}
- Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+ FixedArray* array = last_match_info->elements();
+ ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
- for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
- array->set(i, Smi::FromInt(offsets_vector[i]));
- array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
+ for (int i = 0; i < number_of_capture_registers; i += 2) {
+ SetCapture(array, i, offsets_vector[i]);
+ SetCapture(array, i + 1, offsets_vector[i + 1]);
}
- return Factory::NewJSArrayWithElements(array);
+ SetLastCaptureCount(array, number_of_capture_registers);
+ SetLastSubject(array, *original_subject);
+ SetLastInput(array, *original_subject);
+ return last_match_info;
}
// -------------------------------------------------------------------
-// Implmentation of the Irregexp regular expression engine.
+// Implementation of the Irregexp regular expression engine.
//
// The Irregexp regular expression engine is intended to be a complete
// implementation of ECMAScript regular expressions. It generates either
@@ -892,10 +988,10 @@
return next_register_++;
}
- Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
- RegExpNode* start,
- int capture_count,
- Handle<String> pattern);
+ RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
+ RegExpNode* start,
+ int capture_count,
+ Handle<String> pattern);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
@@ -940,15 +1036,8 @@
};
-static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
- Handle<JSArray> array = Factory::NewJSArray(2);
- SetElement(array, 0, pattern);
- const char* message = "RegExp too big";
- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
- Handle<Object> regexp_err =
- Factory::NewSyntaxError("malformed_regexp", array);
- Top::Throw(*regexp_err);
- return Handle<FixedArray>();
+static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
+ return RegExpEngine::CompilationResult("RegExp too big");
}
@@ -966,7 +1055,7 @@
}
-Handle<FixedArray> RegExpCompiler::Assemble(
+RegExpEngine::CompilationResult RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
int capture_count,
@@ -988,24 +1077,17 @@
while (!work_list.is_empty()) {
work_list.RemoveLast()->Emit(this, &new_trace);
}
- if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
- Handle<FixedArray> array =
- Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
- array->set(RegExpImpl::kIrregexpImplementationIndex,
- Smi::FromInt(macro_assembler_->Implementation()));
- array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
- Smi::FromInt(next_register_));
- array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
- Smi::FromInt(capture_count));
+ if (reg_exp_too_big_) return IrregexpRegExpTooBig();
+
Handle<Object> code = macro_assembler_->GetCode(pattern);
- array->set(RegExpImpl::kIrregexpCodeIndex, *code);
+
work_list_ = NULL;
#ifdef DEBUG
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
}
#endif
- return array;
+ return RegExpEngine::CompilationResult(*code, next_register_);
}
@@ -3723,9 +3805,6 @@
// |
// [if r >= f] \----> ...
//
- //
- // TODO(someone): clear captures on repetition and handle empty
- // matches.
// 15.10.2.5 RepeatMatcher algorithm.
// The parser has already eliminated the case where max is 0. In the case
@@ -4592,13 +4671,13 @@
}
-Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
- bool ignore_case,
- bool is_multiline,
- Handle<String> pattern,
- bool is_ascii) {
+RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
+ bool ignore_case,
+ bool is_multiline,
+ Handle<String> pattern,
+ bool is_ascii) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
- return IrregexpRegExpTooBig(pattern);
+ return IrregexpRegExpTooBig();
}
RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
// Wrap the body of the regexp in capture #0.

Powered by Google App Engine
This is Rietveld 408576698