Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(428)

Unified Diff: src/jsregexp.cc

Issue 10943: Wire Regexp2000 up to the normal JS RegExp object. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 750)
+++ src/jsregexp.cc (working copy)
@@ -40,7 +40,10 @@
#include "compilation-cache.h"
#include "string-stream.h"
#include "parser.h"
+#include "assembler-re2k.h"
#include "regexp-macro-assembler.h"
+#include "regexp-macro-assembler-re2k.h"
+#include "interpreter-re2k.h"
// Including pcre.h undefines DEBUG to avoid getting debug output from
// the JSCRE implementation. Make sure to redefine it in debug mode
@@ -56,9 +59,6 @@
namespace v8 { namespace internal {
-#define CAPTURE_INDEX 0
-#define INTERNAL_INDEX 1
-
static Failure* malloc_failure;
static void* JSREMalloc(size_t size) {
@@ -229,7 +229,14 @@
result = AtomCompile(re, pattern, flags, pattern);
}
} else {
- result = JsrePrepare(re, pattern, flags);
+ RegExpNode* node = NULL;
+ Handle<FixedArray> re2k_data =
+ RegExpEngine::Compile(&parse_result, &node);
+ if (re2k_data.is_null()) {
+ result = JscrePrepare(re, pattern, flags);
+ } else {
+ result = Re2kPrepare(re, pattern, flags, re2k_data);
+ }
}
Object* data = re->data();
if (data->IsFixedArray()) {
@@ -250,9 +257,11 @@
Handle<Object> index) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
- return JsreExec(regexp, subject, index);
+ return JscreExec(regexp, subject, index);
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
+ case JSRegExp::RE2K:
+ return Re2kExec(regexp, subject, index);
default:
UNREACHABLE();
return Handle<Object>();
@@ -264,9 +273,11 @@
Handle<String> subject) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
- return JsreExecGlobal(regexp, subject);
+ return JscreExecGlobal(regexp, subject);
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
+ case JSRegExp::RE2K:
+ return Re2kExecGlobal(regexp, subject);
default:
UNREACHABLE();
return Handle<Object>();
@@ -298,12 +309,8 @@
if (value == -1) return Factory::null_value();
Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0,
- Smi::FromInt(value),
- SKIP_WRITE_BARRIER);
- array->set(1,
- Smi::FromInt(value + needle->length()),
- SKIP_WRITE_BARRIER);
+ array->set(0, Smi::FromInt(value));
+ array->set(1, Smi::FromInt(value + needle->length()));
return Factory::NewJSArrayWithElements(array);
}
@@ -327,12 +334,8 @@
int end = value + needle_length;
Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0,
- Smi::FromInt(value),
- SKIP_WRITE_BARRIER);
- array->set(1,
- Smi::FromInt(end),
- SKIP_WRITE_BARRIER);
+ array->set(0, Smi::FromInt(value));
+ array->set(1, Smi::FromInt(end));
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
@@ -343,15 +346,24 @@
}
-Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags) {
+Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags) {
Handle<Object> value(Heap::undefined_value());
Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
return re;
}
+Handle<Object>RegExpImpl::Re2kPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ Handle<FixedArray> re2k_data) {
+ Factory::SetRegExpData(re, JSRegExp::RE2K, pattern, flags, re2k_data);
+ return re;
+}
+
+
static inline Object* DoCompile(String* pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
@@ -398,7 +410,7 @@
}
-Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
+Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
@@ -435,26 +447,65 @@
Handle<ByteArray> internal(
ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
- Handle<FixedArray> value = Factory::NewFixedArray(2);
- value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
- value->set(INTERNAL_INDEX, *internal);
+ Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
+ value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures));
+ value->set(kJscreInternalIndex, *internal);
Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
return re;
}
-Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject,
int previous_index,
const uc16* two_byte_subject,
int* offsets_vector,
int offsets_vector_length) {
+ bool rc;
+ {
+ for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+ offsets_vector[i] = -1;
+ }
+
+ AssertNoAllocation a;
+
+ LOG(RegExpExecEvent(regexp, previous_index, subject));
+
+ Handle<ByteArray> byte_codes = Re2kCode(regexp);
+
+ rc = Re2kInterpreter::Match(byte_codes,
+ subject,
+ offsets_vector,
+ previous_index);
+ }
+
+ if (!rc) {
+ return Factory::null_value();
+ }
+
+ Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+ // The captures come in (start, end+1) pairs.
+ for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+ array->set(i, Smi::FromInt(offsets_vector[i]));
+ array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+ }
+ return Factory::NewJSArrayWithElements(array);
+}
+
+
+Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
+ int num_captures,
+ Handle<String> subject,
+ int previous_index,
+ const uc16* two_byte_subject,
+ int* offsets_vector,
+ int offsets_vector_length) {
int rc;
{
AssertNoAllocation a;
- ByteArray* internal = JsreInternal(regexp);
+ ByteArray* internal = JscreInternal(regexp);
const JscreRegExp* js_regexp =
reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());
@@ -488,12 +539,8 @@
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
// The captures come in (start, end+1) pairs.
for (int i = 0; i < 2 * (num_captures+1); i += 2) {
- array->set(i,
- Smi::FromInt(offsets_vector[i]),
- SKIP_WRITE_BARRIER);
- array->set(i+1,
- Smi::FromInt(offsets_vector[i+1]),
- SKIP_WRITE_BARRIER);
+ array->set(i, Smi::FromInt(offsets_vector[i]));
+ array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
}
return Factory::NewJSArrayWithElements(array);
}
@@ -501,8 +548,8 @@
class OffsetsVector {
public:
- inline OffsetsVector(int num_captures) {
- offsets_vector_length_ = (num_captures + 1) * 3;
+ inline OffsetsVector(int num_registers) :
+ offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
@@ -531,7 +578,7 @@
private:
int* vector_;
int offsets_vector_length_;
- static const int kStaticOffsetsVectorSize = 30;
+ static const int kStaticOffsetsVectorSize = 50;
static int static_offsets_vector_[kStaticOffsetsVectorSize];
};
@@ -540,47 +587,127 @@
OffsetsVector::kStaticOffsetsVectorSize];
-Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
+ ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+ // Prepare space for the return values.
+ int number_of_registers = Re2kNumberOfRegisters(regexp);
+ OffsetsVector offsets(number_of_registers);
+
+ int num_captures = Re2kNumberOfCaptures(regexp);
+
+ int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+ Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+ Handle<Object> result(Re2kExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length()));
+ return result;
+}
+
+
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ Handle<Object> index) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
- Handle<Object> compile_result = JsreCompile(regexp);
+ Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result->IsException()) return compile_result;
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
- // Prepare space for the return values.
- int num_captures = JsreCapture(regexp);
+ int num_captures = JscreNumberOfCaptures(regexp);
- OffsetsVector offsets(num_captures);
+ OffsetsVector offsets((num_captures + 1) * 3);
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
Handle<String> subject16 = CachedStringToTwoByte(subject);
- Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
- previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(), offsets.length()));
+ Handle<Object> result(JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length()));
return result;
}
-Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
Lasse Reichstein 2008/11/14 10:03:40 This method looks like an almost, but not entirely
+ ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+ // Prepare space for the return values.
+ int number_of_registers = Re2kNumberOfRegisters(regexp);
+ OffsetsVector offsets(number_of_registers);
+
+ int previous_index = 0;
+
+ Handle<JSArray> result = Factory::NewJSArray(0);
+ int i = 0;
+ Handle<Object> matches;
+
+ Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+ do {
+ if (previous_index > subject->length() || previous_index < 0) {
+ // Per ECMA-262 15.10.6.2, if the previous index is greater than the
+ // string length, there is no match.
+ matches = Factory::null_value();
+ } else {
+ matches = Re2kExecOnce(regexp,
+ Re2kNumberOfCaptures(regexp),
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
+
+ if (matches->IsJSArray()) {
+ SetElement(result, i, matches);
+ i++;
+ previous_index = offsets.vector()[1];
+ if (offsets.vector()[0] == offsets.vector()[1]) {
+ previous_index++;
+ }
+ }
+ }
+ } while (matches->IsJSArray());
+
+ // If we exited the loop with an exception, throw it.
+ if (matches->IsNull()) { // Exited loop normally.
+ return result;
+ } else { // Exited loop with the exception in matches.
+ return matches;
+ }
+}
+
+
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
- Handle<Object> compile_result = JsreCompile(regexp);
+ Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result->IsException()) return compile_result;
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
// Prepare space for the return values.
- int num_captures = JsreCapture(regexp);
+ int num_captures = JscreNumberOfCaptures(regexp);
- OffsetsVector offsets(num_captures);
+ OffsetsVector offsets((num_captures + 1) * 3);
int previous_index = 0;
@@ -596,9 +723,13 @@
// string length, there is no match.
matches = Factory::null_value();
} else {
- matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(), offsets.length());
+ matches = JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
@@ -620,18 +751,37 @@
}
-int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return Smi::cast(value->get(CAPTURE_INDEX))->value();
+ return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->
+ value();
}
-ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return ByteArray::cast(value->get(INTERNAL_INDEX));
+ return ByteArray::cast(value->get(kJscreInternalIndex));
}
+int RegExpImpl::Re2kNumberOfCaptures(Handle<JSRegExp> re) {
+ FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Smi::cast(value->get(kRe2kNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::Re2kNumberOfRegisters(Handle<JSRegExp> re) {
+ FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Smi::cast(value->get(kRe2kNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) {
+ FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex)));
+}
+
+
// -------------------------------------------------------------------
// New regular expression engine
@@ -648,14 +798,11 @@
int AllocateRegister() { return next_register_++; }
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
- RegExpNode* start);
+ RegExpNode* start,
+ int capture_count);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
- static const int kImplementationOffset = 0;
- static const int kNumberOfRegistersOffset = 0;
- static const int kCodeOffset = 1;
-
RegExpMacroAssembler* macro_assembler() {
return macro_assembler_;
}
@@ -666,34 +813,50 @@
};
+// Attempts to compile the regexp using a Regexp2000 code generator. Returns
+// a fixed array or a null handle depending on whether it succeeded.
Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
- RegExpNode* start) {
+ RegExpNode* start,
+ int capture_count) {
Lasse Reichstein 2008/11/14 10:03:40 Also need to know if we ignore case.
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
- start->GoTo(this);
+ Label fail;
+ macro_assembler->PushBacktrack(&fail);
+ if (!start->GoTo(this)) {
+ fail.Unuse();
+ return Handle<FixedArray>::null();
+ }
while (!work_list.is_empty()) {
- work_list.RemoveLast()->Emit(this);
+ if (!work_list.RemoveLast()->Emit(this)) {
+ fail.Unuse();
+ return Handle<FixedArray>::null();
+ }
}
- Handle<FixedArray> array = Factory::NewFixedArray(3);
- array->set(kImplementationOffset,
- Smi::FromInt(macro_assembler->Implementation()),
- SKIP_WRITE_BARRIER);
- array->set(kNumberOfRegistersOffset,
- Smi::FromInt(next_register_),
- SKIP_WRITE_BARRIER);
+ macro_assembler->Bind(&fail);
+ macro_assembler->Fail();
+ Handle<FixedArray> array =
+ Factory::NewFixedArray(RegExpImpl::kRe2kDataLength);
+ array->set(RegExpImpl::kRe2kImplementationIndex,
+ Smi::FromInt(macro_assembler->Implementation()));
+ array->set(RegExpImpl::kRe2kNumberOfRegistersIndex,
+ Smi::FromInt(next_register_));
+ array->set(RegExpImpl::kRe2kNumberOfCapturesIndex,
+ Smi::FromInt(capture_count));
Handle<Object> code = macro_assembler->GetCode();
+ array->set(RegExpImpl::kRe2kCodeIndex, *code);
work_list_ = NULL;
return array;
}
-void RegExpNode::GoTo(RegExpCompiler* compiler) {
+bool RegExpNode::GoTo(RegExpCompiler* compiler) {
if (label.is_bound()) {
compiler->macro_assembler()->GoTo(&label);
+ return true;
} else {
- Emit(compiler);
+ return Emit(compiler);
}
}
@@ -707,6 +870,19 @@
EndNode EndNode::kBacktrack(BACKTRACK);
+bool EndNode::Emit(RegExpCompiler* compiler) {
+ switch (action_) {
+ case ACCEPT:
+ compiler->macro_assembler()->Succeed();
+ return true;
+ case BACKTRACK:
+ compiler->macro_assembler()->Backtrack();
+ return true;
+ }
+ return false;
+}
+
+
void GuardedAlternative::AddGuard(Guard* guard) {
if (guards_ == NULL)
guards_ = new ZoneList<Guard*>(1);
@@ -782,13 +958,13 @@
// Emit code.
-void ChoiceNode::Emit(RegExpCompiler* compiler) {
+bool ChoiceNode::Emit(RegExpCompiler* compiler) {
// TODO(erikcorry): Implement this.
- UNREACHABLE();
+ return false;
}
-void ActionNode::Emit(RegExpCompiler* compiler) {
+bool ActionNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (type_) {
case STORE_REGISTER:
@@ -806,17 +982,19 @@
break;
case BEGIN_SUBMATCH:
// TODO(erikcorry): Implement this.
- UNREACHABLE();
- break;
+ return false;
case ESCAPE_SUBMATCH:
// TODO(erikcorry): Implement this.
- UNREACHABLE();
- break;
+ return false;
case END_SUBMATCH:
// TODO(erikcorry): Implement this.
+ return false;
+ default:
UNREACHABLE();
- break;
+ return false;
}
+ compiler->AddWork(on_success());
+ return true;
}
@@ -1610,7 +1788,8 @@
}
-RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
+Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
+ RegExpNode** node_return) {
RegExpCompiler compiler(input->capture_count);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -1630,9 +1809,13 @@
&compiler,
captured_body,
EndNode::GetBacktrack());
+ if (node_return != NULL) *node_return = node;
Analysis analysis(&compiler);
analysis.Analyze(node);
- return node;
+ byte codes[10240];
+ Re2kAssembler assembler(Vector<byte>(codes, 1024));
+ RegExpMacroAssemblerRe2k macro_assembler(&assembler);
+ return compiler.Assemble(&macro_assembler, node, input->capture_count);
}
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698