Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: src/regexp-macro-assembler.cc

Issue 165443: X64: Implement RegExp natively. (Closed)
Patch Set: Addressed review comments. Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008 the V8 project authors. All rights reserved. 1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution. 11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its 12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived 13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission. 14 // from this software without specific prior written permission.
15 // 15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #include <string.h>
29 #include "v8.h" 28 #include "v8.h"
30 #include "ast.h" 29 #include "ast.h"
31 #include "assembler.h" 30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h" 32 #include "regexp-macro-assembler.h"
33 33
34 namespace v8 { 34 namespace v8 {
35 namespace internal { 35 namespace internal {
36 36
37 RegExpMacroAssembler::RegExpMacroAssembler() { 37 RegExpMacroAssembler::RegExpMacroAssembler() {
38 } 38 }
39 39
40 40
41 RegExpMacroAssembler::~RegExpMacroAssembler() { 41 RegExpMacroAssembler::~RegExpMacroAssembler() {
42 } 42 }
43 43
44 44
45 ByteArrayProvider::ByteArrayProvider(unsigned int initial_size) 45 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
Erik Corry 2009/08/14 08:20:55 Can we ensure this doesn't get compiled on ARM.
46 : byte_array_size_(initial_size),
47 current_byte_array_(),
48 current_byte_array_free_offset_(initial_size) {}
49
50
51 ArraySlice ByteArrayProvider::GetBuffer(unsigned int size,
52 unsigned int elem_size) {
53 ASSERT(size > 0);
54 size_t byte_size = size * elem_size;
55 int free_offset = current_byte_array_free_offset_;
56 // align elements
57 free_offset += elem_size - 1;
58 free_offset = free_offset - (free_offset % elem_size);
59
60 if (free_offset + byte_size > byte_array_size_) {
61 if (byte_size > (byte_array_size_ / 2)) {
62 Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED));
63 return ArraySlice(solo_buffer, 0);
64 }
65 current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED);
66 free_offset = 0;
67 }
68 current_byte_array_free_offset_ = free_offset + byte_size;
69 return ArraySlice(current_byte_array_, free_offset);
70 } 46 }
71 47
72 48
73 template <typename T> 49 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
74 ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) {
75 ArraySlice slice = GetBuffer(values.length(), sizeof(T));
76 memcpy(slice.location(), values.start(), values.length() * sizeof(T));
77 return slice;
78 } 50 }
51
52
53 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
54 String* subject,
55 int start_index) {
56 // Not just flat, but ultra flat.
57 ASSERT(subject->IsExternalString() || subject->IsSeqString());
58 ASSERT(start_index >= 0);
59 ASSERT(start_index <= subject->length());
60 if (subject->IsAsciiRepresentation()) {
61 const byte* address;
62 if (StringShape(subject).IsExternal()) {
63 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
64 address = reinterpret_cast<const byte*>(data);
65 } else {
66 ASSERT(subject->IsSeqAsciiString());
67 char* data = SeqAsciiString::cast(subject)->GetChars();
68 address = reinterpret_cast<const byte*>(data);
69 }
70 return address + start_index;
71 }
72 const uc16* data;
73 if (StringShape(subject).IsExternal()) {
74 data = ExternalTwoByteString::cast(subject)->resource()->data();
75 } else {
76 ASSERT(subject->IsSeqTwoByteString());
77 data = SeqTwoByteString::cast(subject)->GetChars();
78 }
79 return reinterpret_cast<const byte*>(data + start_index);
80 }
81
82
83 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
84 Handle<Code> regexp_code,
85 Handle<String> subject,
86 int* offsets_vector,
87 int offsets_vector_length,
88 int previous_index) {
89
90 ASSERT(subject->IsFlat());
91 ASSERT(previous_index >= 0);
92 ASSERT(previous_index <= subject->length());
93
94 // No allocations before calling the regexp, but we can't use
95 // AssertNoAllocation, since regexps might be preempted, and another thread
96 // might do allocation anyway.
97
98 String* subject_ptr = *subject;
99 // Character offsets into string.
100 int start_offset = previous_index;
101 int end_offset = subject_ptr->length();
102
103 bool is_ascii = subject->IsAsciiRepresentation();
104
105 if (StringShape(subject_ptr).IsCons()) {
106 subject_ptr = ConsString::cast(subject_ptr)->first();
107 } else if (StringShape(subject_ptr).IsSliced()) {
108 SlicedString* slice = SlicedString::cast(subject_ptr);
109 start_offset += slice->start();
110 end_offset += slice->start();
111 subject_ptr = slice->buffer();
112 }
113 // Ensure that an underlying string has the same ascii-ness.
114 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
115 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
116 // String is now either Sequential or External
117 int char_size_shift = is_ascii ? 0 : 1;
118 int char_length = end_offset - start_offset;
119
120 const byte* input_start =
121 StringCharacterPosition(subject_ptr, start_offset);
122 int byte_length = char_length << char_size_shift;
123 const byte* input_end = input_start + byte_length;
124 Result res = Execute(*regexp_code,
125 subject_ptr,
126 start_offset,
127 input_start,
128 input_end,
129 offsets_vector,
130 previous_index == 0);
131
132 if (res == SUCCESS) {
133 // Capture values are relative to start_offset only.
134 // Convert them to be relative to start of string.
135 for (int i = 0; i < offsets_vector_length; i++) {
136 if (offsets_vector[i] >= 0) {
137 offsets_vector[i] += previous_index;
138 }
139 }
140 }
141
142 return res;
143 }
144
145
146 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
147 Code* code,
148 String* input,
149 int start_offset,
150 const byte* input_start,
151 const byte* input_end,
152 int* output,
153 bool at_start) {
154 typedef int (*matcher)(String*, int, const byte*,
155 const byte*, int*, int, Address);
156 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
157
158 int at_start_val = at_start ? 1 : 0;
159
160 // Ensure that the minimum stack has been allocated.
161 RegExpStack stack;
162 Address stack_base = RegExpStack::stack_base();
163
164 int result = matcher_func(input,
165 start_offset,
166 input_start,
167 input_end,
168 output,
169 at_start_val,
170 stack_base);
171 ASSERT(result <= SUCCESS);
172 ASSERT(result >= RETRY);
173
174 if (result == EXCEPTION && !Top::has_pending_exception()) {
175 // We detected a stack overflow (on the backtrack stack) in RegExp code,
176 // but haven't created the exception yet.
177 Top::StackOverflow();
178 }
179 return static_cast<Result>(result);
180 }
181
182
183 static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
184
185 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
186 Address byte_offset1,
187 Address byte_offset2,
188 size_t byte_length) {
189 // This function is not allowed to cause a garbage collection.
190 // A GC might move the calling generated code and invalidate the
191 // return address on the stack.
192 ASSERT(byte_length % 2 == 0);
193 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
194 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
195 size_t length = byte_length >> 1;
196
197 for (size_t i = 0; i < length; i++) {
198 unibrow::uchar c1 = substring1[i];
199 unibrow::uchar c2 = substring2[i];
200 if (c1 != c2) {
201 unibrow::uchar s1[1] = { c1 };
202 canonicalize.get(c1, '\0', s1);
203 if (s1[0] != c2) {
204 unibrow::uchar s2[1] = { c2 };
205 canonicalize.get(c2, '\0', s2);
206 if (s1[0] != s2[0]) {
207 return 0;
208 }
209 }
210 }
211 }
212 return 1;
213 }
214
215
79 } } // namespace v8::internal 216 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698