OLD | NEW |
---|---|
1 // Copyright 2008 the V8 project authors. All rights reserved. | 1 // Copyright 2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
11 // with the distribution. | 11 // with the distribution. |
12 // * Neither the name of Google Inc. nor the names of its | 12 // * Neither the name of Google Inc. nor the names of its |
13 // contributors may be used to endorse or promote products derived | 13 // contributors may be used to endorse or promote products derived |
14 // from this software without specific prior written permission. | 14 // from this software without specific prior written permission. |
15 // | 15 // |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #include <string.h> | |
29 #include "v8.h" | 28 #include "v8.h" |
30 #include "ast.h" | 29 #include "ast.h" |
31 #include "assembler.h" | 30 #include "assembler.h" |
31 #include "regexp-stack.h" | |
32 #include "regexp-macro-assembler.h" | 32 #include "regexp-macro-assembler.h" |
33 | 33 |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 RegExpMacroAssembler::RegExpMacroAssembler() { | 37 RegExpMacroAssembler::RegExpMacroAssembler() { |
38 } | 38 } |
39 | 39 |
40 | 40 |
41 RegExpMacroAssembler::~RegExpMacroAssembler() { | 41 RegExpMacroAssembler::~RegExpMacroAssembler() { |
42 } | 42 } |
43 | 43 |
44 | 44 |
45 ByteArrayProvider::ByteArrayProvider(unsigned int initial_size) | 45 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() { |
Erik Corry
2009/08/14 08:20:55
Can we ensure this doesn't get compiled on ARM.
| |
46 : byte_array_size_(initial_size), | |
47 current_byte_array_(), | |
48 current_byte_array_free_offset_(initial_size) {} | |
49 | |
50 | |
51 ArraySlice ByteArrayProvider::GetBuffer(unsigned int size, | |
52 unsigned int elem_size) { | |
53 ASSERT(size > 0); | |
54 size_t byte_size = size * elem_size; | |
55 int free_offset = current_byte_array_free_offset_; | |
56 // align elements | |
57 free_offset += elem_size - 1; | |
58 free_offset = free_offset - (free_offset % elem_size); | |
59 | |
60 if (free_offset + byte_size > byte_array_size_) { | |
61 if (byte_size > (byte_array_size_ / 2)) { | |
62 Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED)); | |
63 return ArraySlice(solo_buffer, 0); | |
64 } | |
65 current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED); | |
66 free_offset = 0; | |
67 } | |
68 current_byte_array_free_offset_ = free_offset + byte_size; | |
69 return ArraySlice(current_byte_array_, free_offset); | |
70 } | 46 } |
71 | 47 |
72 | 48 |
73 template <typename T> | 49 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
74 ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) { | |
75 ArraySlice slice = GetBuffer(values.length(), sizeof(T)); | |
76 memcpy(slice.location(), values.start(), values.length() * sizeof(T)); | |
77 return slice; | |
78 } | 50 } |
51 | |
52 | |
53 const byte* NativeRegExpMacroAssembler::StringCharacterPosition( | |
54 String* subject, | |
55 int start_index) { | |
56 // Not just flat, but ultra flat. | |
57 ASSERT(subject->IsExternalString() || subject->IsSeqString()); | |
58 ASSERT(start_index >= 0); | |
59 ASSERT(start_index <= subject->length()); | |
60 if (subject->IsAsciiRepresentation()) { | |
61 const byte* address; | |
62 if (StringShape(subject).IsExternal()) { | |
63 const char* data = ExternalAsciiString::cast(subject)->resource()->data(); | |
64 address = reinterpret_cast<const byte*>(data); | |
65 } else { | |
66 ASSERT(subject->IsSeqAsciiString()); | |
67 char* data = SeqAsciiString::cast(subject)->GetChars(); | |
68 address = reinterpret_cast<const byte*>(data); | |
69 } | |
70 return address + start_index; | |
71 } | |
72 const uc16* data; | |
73 if (StringShape(subject).IsExternal()) { | |
74 data = ExternalTwoByteString::cast(subject)->resource()->data(); | |
75 } else { | |
76 ASSERT(subject->IsSeqTwoByteString()); | |
77 data = SeqTwoByteString::cast(subject)->GetChars(); | |
78 } | |
79 return reinterpret_cast<const byte*>(data + start_index); | |
80 } | |
81 | |
82 | |
83 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( | |
84 Handle<Code> regexp_code, | |
85 Handle<String> subject, | |
86 int* offsets_vector, | |
87 int offsets_vector_length, | |
88 int previous_index) { | |
89 | |
90 ASSERT(subject->IsFlat()); | |
91 ASSERT(previous_index >= 0); | |
92 ASSERT(previous_index <= subject->length()); | |
93 | |
94 // No allocations before calling the regexp, but we can't use | |
95 // AssertNoAllocation, since regexps might be preempted, and another thread | |
96 // might do allocation anyway. | |
97 | |
98 String* subject_ptr = *subject; | |
99 // Character offsets into string. | |
100 int start_offset = previous_index; | |
101 int end_offset = subject_ptr->length(); | |
102 | |
103 bool is_ascii = subject->IsAsciiRepresentation(); | |
104 | |
105 if (StringShape(subject_ptr).IsCons()) { | |
106 subject_ptr = ConsString::cast(subject_ptr)->first(); | |
107 } else if (StringShape(subject_ptr).IsSliced()) { | |
108 SlicedString* slice = SlicedString::cast(subject_ptr); | |
109 start_offset += slice->start(); | |
110 end_offset += slice->start(); | |
111 subject_ptr = slice->buffer(); | |
112 } | |
113 // Ensure that an underlying string has the same ascii-ness. | |
114 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii); | |
115 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); | |
116 // String is now either Sequential or External | |
117 int char_size_shift = is_ascii ? 0 : 1; | |
118 int char_length = end_offset - start_offset; | |
119 | |
120 const byte* input_start = | |
121 StringCharacterPosition(subject_ptr, start_offset); | |
122 int byte_length = char_length << char_size_shift; | |
123 const byte* input_end = input_start + byte_length; | |
124 Result res = Execute(*regexp_code, | |
125 subject_ptr, | |
126 start_offset, | |
127 input_start, | |
128 input_end, | |
129 offsets_vector, | |
130 previous_index == 0); | |
131 | |
132 if (res == SUCCESS) { | |
133 // Capture values are relative to start_offset only. | |
134 // Convert them to be relative to start of string. | |
135 for (int i = 0; i < offsets_vector_length; i++) { | |
136 if (offsets_vector[i] >= 0) { | |
137 offsets_vector[i] += previous_index; | |
138 } | |
139 } | |
140 } | |
141 | |
142 return res; | |
143 } | |
144 | |
145 | |
146 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( | |
147 Code* code, | |
148 String* input, | |
149 int start_offset, | |
150 const byte* input_start, | |
151 const byte* input_end, | |
152 int* output, | |
153 bool at_start) { | |
154 typedef int (*matcher)(String*, int, const byte*, | |
155 const byte*, int*, int, Address); | |
156 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry()); | |
157 | |
158 int at_start_val = at_start ? 1 : 0; | |
159 | |
160 // Ensure that the minimum stack has been allocated. | |
161 RegExpStack stack; | |
162 Address stack_base = RegExpStack::stack_base(); | |
163 | |
164 int result = matcher_func(input, | |
165 start_offset, | |
166 input_start, | |
167 input_end, | |
168 output, | |
169 at_start_val, | |
170 stack_base); | |
171 ASSERT(result <= SUCCESS); | |
172 ASSERT(result >= RETRY); | |
173 | |
174 if (result == EXCEPTION && !Top::has_pending_exception()) { | |
175 // We detected a stack overflow (on the backtrack stack) in RegExp code, | |
176 // but haven't created the exception yet. | |
177 Top::StackOverflow(); | |
178 } | |
179 return static_cast<Result>(result); | |
180 } | |
181 | |
182 | |
183 static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize; | |
184 | |
185 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
186 Address byte_offset1, | |
187 Address byte_offset2, | |
188 size_t byte_length) { | |
189 // This function is not allowed to cause a garbage collection. | |
190 // A GC might move the calling generated code and invalidate the | |
191 // return address on the stack. | |
192 ASSERT(byte_length % 2 == 0); | |
193 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
194 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
195 size_t length = byte_length >> 1; | |
196 | |
197 for (size_t i = 0; i < length; i++) { | |
198 unibrow::uchar c1 = substring1[i]; | |
199 unibrow::uchar c2 = substring2[i]; | |
200 if (c1 != c2) { | |
201 unibrow::uchar s1[1] = { c1 }; | |
202 canonicalize.get(c1, '\0', s1); | |
203 if (s1[0] != c2) { | |
204 unibrow::uchar s2[1] = { c2 }; | |
205 canonicalize.get(c2, '\0', s2); | |
206 if (s1[0] != s2[0]) { | |
207 return 0; | |
208 } | |
209 } | |
210 } | |
211 } | |
212 return 1; | |
213 } | |
214 | |
215 | |
79 } } // namespace v8::internal | 216 } } // namespace v8::internal |
OLD | NEW |