Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(442)

Unified Diff: src/regexp-macro-assembler.cc

Issue 165443: X64: Implement RegExp natively. (Closed)
Patch Set: Addressed review comments. Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp-macro-assembler.cc
diff --git a/src/regexp-macro-assembler.cc b/src/regexp-macro-assembler.cc
index 8dede304ebb3b1f94fd8885270bb154ba6dc1380..4ee80a35f939027045ceae0678289dda6f35a571 100644
--- a/src/regexp-macro-assembler.cc
+++ b/src/regexp-macro-assembler.cc
@@ -25,10 +25,10 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include <string.h>
#include "v8.h"
#include "ast.h"
#include "assembler.h"
+#include "regexp-stack.h"
#include "regexp-macro-assembler.h"
namespace v8 {
@@ -42,38 +42,175 @@ RegExpMacroAssembler::~RegExpMacroAssembler() {
}
-ByteArrayProvider::ByteArrayProvider(unsigned int initial_size)
- : byte_array_size_(initial_size),
- current_byte_array_(),
- current_byte_array_free_offset_(initial_size) {}
+NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
Erik Corry 2009/08/14 08:20:55 Can we ensure this doesn't get compiled on ARM.
+}
+
+
+NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
+}
+
+
+const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
+ String* subject,
+ int start_index) {
+ // Not just flat, but ultra flat.
+ ASSERT(subject->IsExternalString() || subject->IsSeqString());
+ ASSERT(start_index >= 0);
+ ASSERT(start_index <= subject->length());
+ if (subject->IsAsciiRepresentation()) {
+ const byte* address;
+ if (StringShape(subject).IsExternal()) {
+ const char* data = ExternalAsciiString::cast(subject)->resource()->data();
+ address = reinterpret_cast<const byte*>(data);
+ } else {
+ ASSERT(subject->IsSeqAsciiString());
+ char* data = SeqAsciiString::cast(subject)->GetChars();
+ address = reinterpret_cast<const byte*>(data);
+ }
+ return address + start_index;
+ }
+ const uc16* data;
+ if (StringShape(subject).IsExternal()) {
+ data = ExternalTwoByteString::cast(subject)->resource()->data();
+ } else {
+ ASSERT(subject->IsSeqTwoByteString());
+ data = SeqTwoByteString::cast(subject)->GetChars();
+ }
+ return reinterpret_cast<const byte*>(data + start_index);
+}
+
+
+NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
+ Handle<Code> regexp_code,
+ Handle<String> subject,
+ int* offsets_vector,
+ int offsets_vector_length,
+ int previous_index) {
+
+ ASSERT(subject->IsFlat());
+ ASSERT(previous_index >= 0);
+ ASSERT(previous_index <= subject->length());
+
+ // No allocations before calling the regexp, but we can't use
+ // AssertNoAllocation, since regexps might be preempted, and another thread
+ // might do allocation anyway.
+
+ String* subject_ptr = *subject;
+ // Character offsets into string.
+ int start_offset = previous_index;
+ int end_offset = subject_ptr->length();
+
+ bool is_ascii = subject->IsAsciiRepresentation();
+ if (StringShape(subject_ptr).IsCons()) {
+ subject_ptr = ConsString::cast(subject_ptr)->first();
+ } else if (StringShape(subject_ptr).IsSliced()) {
+ SlicedString* slice = SlicedString::cast(subject_ptr);
+ start_offset += slice->start();
+ end_offset += slice->start();
+ subject_ptr = slice->buffer();
+ }
+ // Ensure that an underlying string has the same ascii-ness.
+ ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
+ ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
+ // String is now either Sequential or External
+ int char_size_shift = is_ascii ? 0 : 1;
+ int char_length = end_offset - start_offset;
-ArraySlice ByteArrayProvider::GetBuffer(unsigned int size,
- unsigned int elem_size) {
- ASSERT(size > 0);
- size_t byte_size = size * elem_size;
- int free_offset = current_byte_array_free_offset_;
- // align elements
- free_offset += elem_size - 1;
- free_offset = free_offset - (free_offset % elem_size);
+ const byte* input_start =
+ StringCharacterPosition(subject_ptr, start_offset);
+ int byte_length = char_length << char_size_shift;
+ const byte* input_end = input_start + byte_length;
+ Result res = Execute(*regexp_code,
+ subject_ptr,
+ start_offset,
+ input_start,
+ input_end,
+ offsets_vector,
+ previous_index == 0);
- if (free_offset + byte_size > byte_array_size_) {
- if (byte_size > (byte_array_size_ / 2)) {
- Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED));
- return ArraySlice(solo_buffer, 0);
+ if (res == SUCCESS) {
+ // Capture values are relative to start_offset only.
+ // Convert them to be relative to start of string.
+ for (int i = 0; i < offsets_vector_length; i++) {
+ if (offsets_vector[i] >= 0) {
+ offsets_vector[i] += previous_index;
+ }
}
- current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED);
- free_offset = 0;
}
- current_byte_array_free_offset_ = free_offset + byte_size;
- return ArraySlice(current_byte_array_, free_offset);
+
+ return res;
+}
+
+
+NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
+ Code* code,
+ String* input,
+ int start_offset,
+ const byte* input_start,
+ const byte* input_end,
+ int* output,
+ bool at_start) {
+ typedef int (*matcher)(String*, int, const byte*,
+ const byte*, int*, int, Address);
+ matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
+
+ int at_start_val = at_start ? 1 : 0;
+
+ // Ensure that the minimum stack has been allocated.
+ RegExpStack stack;
+ Address stack_base = RegExpStack::stack_base();
+
+ int result = matcher_func(input,
+ start_offset,
+ input_start,
+ input_end,
+ output,
+ at_start_val,
+ stack_base);
+ ASSERT(result <= SUCCESS);
+ ASSERT(result >= RETRY);
+
+ if (result == EXCEPTION && !Top::has_pending_exception()) {
+ // We detected a stack overflow (on the backtrack stack) in RegExp code,
+ // but haven't created the exception yet.
+ Top::StackOverflow();
+ }
+ return static_cast<Result>(result);
}
-template <typename T>
-ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) {
- ArraySlice slice = GetBuffer(values.length(), sizeof(T));
- memcpy(slice.location(), values.start(), values.length() * sizeof(T));
- return slice;
+static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
+
+int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
+ Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length) {
+ // This function is not allowed to cause a garbage collection.
+ // A GC might move the calling generated code and invalidate the
+ // return address on the stack.
+ ASSERT(byte_length % 2 == 0);
+ uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
+ uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
+ size_t length = byte_length >> 1;
+
+ for (size_t i = 0; i < length; i++) {
+ unibrow::uchar c1 = substring1[i];
+ unibrow::uchar c2 = substring2[i];
+ if (c1 != c2) {
+ unibrow::uchar s1[1] = { c1 };
+ canonicalize.get(c1, '\0', s1);
+ if (s1[0] != c2) {
+ unibrow::uchar s2[1] = { c2 };
+ canonicalize.get(c2, '\0', s2);
+ if (s1[0] != s2[0]) {
+ return 0;
+ }
+ }
+ }
+ }
+ return 1;
}
+
+
} } // namespace v8::internal
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698