Chromium Code Reviews| Index: src/regexp-macro-assembler.cc |
| diff --git a/src/regexp-macro-assembler.cc b/src/regexp-macro-assembler.cc |
| index 8dede304ebb3b1f94fd8885270bb154ba6dc1380..4ee80a35f939027045ceae0678289dda6f35a571 100644 |
| --- a/src/regexp-macro-assembler.cc |
| +++ b/src/regexp-macro-assembler.cc |
| @@ -25,10 +25,10 @@ |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| -#include <string.h> |
| #include "v8.h" |
| #include "ast.h" |
| #include "assembler.h" |
| +#include "regexp-stack.h" |
| #include "regexp-macro-assembler.h" |
| namespace v8 { |
| @@ -42,38 +42,175 @@ RegExpMacroAssembler::~RegExpMacroAssembler() { |
| } |
| -ByteArrayProvider::ByteArrayProvider(unsigned int initial_size) |
| - : byte_array_size_(initial_size), |
| - current_byte_array_(), |
| - current_byte_array_free_offset_(initial_size) {} |
| +NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() { |
|
Erik Corry
2009/08/14 08:20:55
Can we ensure this doesn't get compiled on ARM.
|
| +} |
| + |
| + |
| +NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
| +} |
| + |
| + |
| +const byte* NativeRegExpMacroAssembler::StringCharacterPosition( |
| + String* subject, |
| + int start_index) { |
| + // Not just flat, but ultra flat. |
| + ASSERT(subject->IsExternalString() || subject->IsSeqString()); |
| + ASSERT(start_index >= 0); |
| + ASSERT(start_index <= subject->length()); |
| + if (subject->IsAsciiRepresentation()) { |
| + const byte* address; |
| + if (StringShape(subject).IsExternal()) { |
| + const char* data = ExternalAsciiString::cast(subject)->resource()->data(); |
| + address = reinterpret_cast<const byte*>(data); |
| + } else { |
| + ASSERT(subject->IsSeqAsciiString()); |
| + char* data = SeqAsciiString::cast(subject)->GetChars(); |
| + address = reinterpret_cast<const byte*>(data); |
| + } |
| + return address + start_index; |
| + } |
| + const uc16* data; |
| + if (StringShape(subject).IsExternal()) { |
| + data = ExternalTwoByteString::cast(subject)->resource()->data(); |
| + } else { |
| + ASSERT(subject->IsSeqTwoByteString()); |
| + data = SeqTwoByteString::cast(subject)->GetChars(); |
| + } |
| + return reinterpret_cast<const byte*>(data + start_index); |
| +} |
| + |
| + |
| +NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( |
| + Handle<Code> regexp_code, |
| + Handle<String> subject, |
| + int* offsets_vector, |
| + int offsets_vector_length, |
| + int previous_index) { |
| + |
| + ASSERT(subject->IsFlat()); |
| + ASSERT(previous_index >= 0); |
| + ASSERT(previous_index <= subject->length()); |
| + |
| + // No allocations before calling the regexp, but we can't use |
| + // AssertNoAllocation, since regexps might be preempted, and another thread |
| + // might do allocation anyway. |
| + |
| + String* subject_ptr = *subject; |
| + // Character offsets into string. |
| + int start_offset = previous_index; |
| + int end_offset = subject_ptr->length(); |
| + |
| + bool is_ascii = subject->IsAsciiRepresentation(); |
| + if (StringShape(subject_ptr).IsCons()) { |
| + subject_ptr = ConsString::cast(subject_ptr)->first(); |
| + } else if (StringShape(subject_ptr).IsSliced()) { |
| + SlicedString* slice = SlicedString::cast(subject_ptr); |
| + start_offset += slice->start(); |
| + end_offset += slice->start(); |
| + subject_ptr = slice->buffer(); |
| + } |
| + // Ensure that an underlying string has the same ascii-ness. |
| + ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii); |
| + ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); |
| + // String is now either Sequential or External |
| + int char_size_shift = is_ascii ? 0 : 1; |
| + int char_length = end_offset - start_offset; |
| -ArraySlice ByteArrayProvider::GetBuffer(unsigned int size, |
| - unsigned int elem_size) { |
| - ASSERT(size > 0); |
| - size_t byte_size = size * elem_size; |
| - int free_offset = current_byte_array_free_offset_; |
| - // align elements |
| - free_offset += elem_size - 1; |
| - free_offset = free_offset - (free_offset % elem_size); |
| + const byte* input_start = |
| + StringCharacterPosition(subject_ptr, start_offset); |
| + int byte_length = char_length << char_size_shift; |
| + const byte* input_end = input_start + byte_length; |
| + Result res = Execute(*regexp_code, |
| + subject_ptr, |
| + start_offset, |
| + input_start, |
| + input_end, |
| + offsets_vector, |
| + previous_index == 0); |
| - if (free_offset + byte_size > byte_array_size_) { |
| - if (byte_size > (byte_array_size_ / 2)) { |
| - Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED)); |
| - return ArraySlice(solo_buffer, 0); |
| + if (res == SUCCESS) { |
| + // Capture values are relative to start_offset only. |
| + // Convert them to be relative to start of string. |
| + for (int i = 0; i < offsets_vector_length; i++) { |
| + if (offsets_vector[i] >= 0) { |
| + offsets_vector[i] += previous_index; |
| + } |
| } |
| - current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED); |
| - free_offset = 0; |
| } |
| - current_byte_array_free_offset_ = free_offset + byte_size; |
| - return ArraySlice(current_byte_array_, free_offset); |
| + |
| + return res; |
| +} |
| + |
| + |
| +NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( |
| + Code* code, |
| + String* input, |
| + int start_offset, |
| + const byte* input_start, |
| + const byte* input_end, |
| + int* output, |
| + bool at_start) { |
| + typedef int (*matcher)(String*, int, const byte*, |
| + const byte*, int*, int, Address); |
| + matcher matcher_func = FUNCTION_CAST<matcher>(code->entry()); |
| + |
| + int at_start_val = at_start ? 1 : 0; |
| + |
| + // Ensure that the minimum stack has been allocated. |
| + RegExpStack stack; |
| + Address stack_base = RegExpStack::stack_base(); |
| + |
| + int result = matcher_func(input, |
| + start_offset, |
| + input_start, |
| + input_end, |
| + output, |
| + at_start_val, |
| + stack_base); |
| + ASSERT(result <= SUCCESS); |
| + ASSERT(result >= RETRY); |
| + |
| + if (result == EXCEPTION && !Top::has_pending_exception()) { |
| + // We detected a stack overflow (on the backtrack stack) in RegExp code, |
| + // but haven't created the exception yet. |
| + Top::StackOverflow(); |
| + } |
| + return static_cast<Result>(result); |
| } |
| -template <typename T> |
| -ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) { |
| - ArraySlice slice = GetBuffer(values.length(), sizeof(T)); |
| - memcpy(slice.location(), values.start(), values.length() * sizeof(T)); |
| - return slice; |
| +static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize; |
| + |
| +int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( |
| + Address byte_offset1, |
| + Address byte_offset2, |
| + size_t byte_length) { |
| + // This function is not allowed to cause a garbage collection. |
| + // A GC might move the calling generated code and invalidate the |
| + // return address on the stack. |
| + ASSERT(byte_length % 2 == 0); |
| + uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
| + uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
| + size_t length = byte_length >> 1; |
| + |
| + for (size_t i = 0; i < length; i++) { |
| + unibrow::uchar c1 = substring1[i]; |
| + unibrow::uchar c2 = substring2[i]; |
| + if (c1 != c2) { |
| + unibrow::uchar s1[1] = { c1 }; |
| + canonicalize.get(c1, '\0', s1); |
| + if (s1[0] != c2) { |
| + unibrow::uchar s2[1] = { c2 }; |
| + canonicalize.get(c2, '\0', s2); |
| + if (s1[0] != s2[0]) { |
| + return 0; |
| + } |
| + } |
| + } |
| + } |
| + return 1; |
| } |
| + |
| + |
| } } // namespace v8::internal |