Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Side by Side Diff: src/regexp-macro-assembler.cc

Issue 1285163003: Move regexp implementation into its own folder. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: addressed comment Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6
7 #include "src/assembler.h"
8 #include "src/ast.h"
9 #include "src/regexp-macro-assembler.h"
10 #include "src/regexp-stack.h"
11 #include "src/simulator.h"
12
13 namespace v8 {
14 namespace internal {
15
16 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
17 : slow_safe_compiler_(false),
18 global_mode_(NOT_GLOBAL),
19 isolate_(isolate),
20 zone_(zone) {}
21
22
23 RegExpMacroAssembler::~RegExpMacroAssembler() {
24 }
25
26
27 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
28
29 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
30 Zone* zone)
31 : RegExpMacroAssembler(isolate, zone) {}
32
33
34 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
35 }
36
37
38 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
39 return FLAG_enable_unaligned_accesses && !slow_safe();
40 }
41
42 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
43 String* subject,
44 int start_index) {
45 if (subject->IsConsString()) {
46 subject = ConsString::cast(subject)->first();
47 } else if (subject->IsSlicedString()) {
48 start_index += SlicedString::cast(subject)->offset();
49 subject = SlicedString::cast(subject)->parent();
50 }
51 DCHECK(start_index >= 0);
52 DCHECK(start_index <= subject->length());
53 if (subject->IsSeqOneByteString()) {
54 return reinterpret_cast<const byte*>(
55 SeqOneByteString::cast(subject)->GetChars() + start_index);
56 } else if (subject->IsSeqTwoByteString()) {
57 return reinterpret_cast<const byte*>(
58 SeqTwoByteString::cast(subject)->GetChars() + start_index);
59 } else if (subject->IsExternalOneByteString()) {
60 return reinterpret_cast<const byte*>(
61 ExternalOneByteString::cast(subject)->GetChars() + start_index);
62 } else {
63 return reinterpret_cast<const byte*>(
64 ExternalTwoByteString::cast(subject)->GetChars() + start_index);
65 }
66 }
67
68
69 int NativeRegExpMacroAssembler::CheckStackGuardState(
70 Isolate* isolate, int start_index, bool is_direct_call,
71 Address* return_address, Code* re_code, String** subject,
72 const byte** input_start, const byte** input_end) {
73 DCHECK(re_code->instruction_start() <= *return_address);
74 DCHECK(*return_address <= re_code->instruction_end());
75 int return_value = 0;
76 // Prepare for possible GC.
77 HandleScope handles(isolate);
78 Handle<Code> code_handle(re_code);
79 Handle<String> subject_handle(*subject);
80 bool is_one_byte = subject_handle->IsOneByteRepresentationUnderneath();
81
82 StackLimitCheck check(isolate);
83 if (check.JsHasOverflowed()) {
84 isolate->StackOverflow();
85 return_value = EXCEPTION;
86 } else if (is_direct_call) {
87 // If not real stack overflow the stack guard was used to interrupt
88 // execution for another purpose. If this is a direct call from JavaScript
89 // retry the RegExp forcing the call through the runtime system.
90 // Currently the direct call cannot handle a GC.
91 return_value = RETRY;
92 } else {
93 Object* result = isolate->stack_guard()->HandleInterrupts();
94 if (result->IsException()) return_value = EXCEPTION;
95 }
96
97 DisallowHeapAllocation no_gc;
98
99 if (*code_handle != re_code) { // Return address no longer valid
100 intptr_t delta = code_handle->address() - re_code->address();
101 // Overwrite the return address on the stack.
102 *return_address += delta;
103 }
104
105 // If we continue, we need to update the subject string addresses.
106 if (return_value == 0) {
107 // String encoding might have changed.
108 if (subject_handle->IsOneByteRepresentationUnderneath() != is_one_byte) {
109 // If we changed between an LATIN1 and an UC16 string, the specialized
110 // code cannot be used, and we need to restart regexp matching from
111 // scratch (including, potentially, compiling a new version of the code).
112 return_value = RETRY;
113 } else {
114 *subject = *subject_handle;
115 intptr_t byte_length = *input_end - *input_start;
116 *input_start = StringCharacterPosition(*subject, start_index);
117 *input_end = *input_start + byte_length;
118 }
119 }
120 return return_value;
121 }
122
123
124 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
125 Handle<Code> regexp_code,
126 Handle<String> subject,
127 int* offsets_vector,
128 int offsets_vector_length,
129 int previous_index,
130 Isolate* isolate) {
131
132 DCHECK(subject->IsFlat());
133 DCHECK(previous_index >= 0);
134 DCHECK(previous_index <= subject->length());
135
136 // No allocations before calling the regexp, but we can't use
137 // DisallowHeapAllocation, since regexps might be preempted, and another
138 // thread might do allocation anyway.
139
140 String* subject_ptr = *subject;
141 // Character offsets into string.
142 int start_offset = previous_index;
143 int char_length = subject_ptr->length() - start_offset;
144 int slice_offset = 0;
145
146 // The string has been flattened, so if it is a cons string it contains the
147 // full string in the first part.
148 if (StringShape(subject_ptr).IsCons()) {
149 DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
150 subject_ptr = ConsString::cast(subject_ptr)->first();
151 } else if (StringShape(subject_ptr).IsSliced()) {
152 SlicedString* slice = SlicedString::cast(subject_ptr);
153 subject_ptr = slice->parent();
154 slice_offset = slice->offset();
155 }
156 // Ensure that an underlying string has the same representation.
157 bool is_one_byte = subject_ptr->IsOneByteRepresentation();
158 DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
159 // String is now either Sequential or External
160 int char_size_shift = is_one_byte ? 0 : 1;
161
162 const byte* input_start =
163 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
164 int byte_length = char_length << char_size_shift;
165 const byte* input_end = input_start + byte_length;
166 Result res = Execute(*regexp_code,
167 *subject,
168 start_offset,
169 input_start,
170 input_end,
171 offsets_vector,
172 offsets_vector_length,
173 isolate);
174 return res;
175 }
176
177
178 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
179 Code* code,
180 String* input, // This needs to be the unpacked (sliced, cons) string.
181 int start_offset,
182 const byte* input_start,
183 const byte* input_end,
184 int* output,
185 int output_size,
186 Isolate* isolate) {
187 // Ensure that the minimum stack has been allocated.
188 RegExpStackScope stack_scope(isolate);
189 Address stack_base = stack_scope.stack()->stack_base();
190
191 int direct_call = 0;
192 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
193 input,
194 start_offset,
195 input_start,
196 input_end,
197 output,
198 output_size,
199 stack_base,
200 direct_call,
201 isolate);
202 DCHECK(result >= RETRY);
203
204 if (result == EXCEPTION && !isolate->has_pending_exception()) {
205 // We detected a stack overflow (on the backtrack stack) in RegExp code,
206 // but haven't created the exception yet.
207 isolate->StackOverflow();
208 }
209 return static_cast<Result>(result);
210 }
211
212
213 const byte NativeRegExpMacroAssembler::word_character_map[] = {
214 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
215 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
216 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
217 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
218
219 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
220 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
221 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
222 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
223
224 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
225 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
226 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
227 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
228
229 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
230 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
231 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
232 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
233 // Latin-1 range
234 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
235 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
236 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
237 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
238
239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
240 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
243
244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
245 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
246 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
247 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
248
249 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
250 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
251 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
252 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
253 };
254
255
256 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
257 Address byte_offset1,
258 Address byte_offset2,
259 size_t byte_length,
260 Isolate* isolate) {
261 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
262 isolate->regexp_macro_assembler_canonicalize();
263 // This function is not allowed to cause a garbage collection.
264 // A GC might move the calling generated code and invalidate the
265 // return address on the stack.
266 DCHECK(byte_length % 2 == 0);
267 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
268 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
269 size_t length = byte_length >> 1;
270
271 for (size_t i = 0; i < length; i++) {
272 unibrow::uchar c1 = substring1[i];
273 unibrow::uchar c2 = substring2[i];
274 if (c1 != c2) {
275 unibrow::uchar s1[1] = { c1 };
276 canonicalize->get(c1, '\0', s1);
277 if (s1[0] != c2) {
278 unibrow::uchar s2[1] = { c2 };
279 canonicalize->get(c2, '\0', s2);
280 if (s1[0] != s2[0]) {
281 return 0;
282 }
283 }
284 }
285 }
286 return 1;
287 }
288
289
290 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
291 Address* stack_base,
292 Isolate* isolate) {
293 RegExpStack* regexp_stack = isolate->regexp_stack();
294 size_t size = regexp_stack->stack_capacity();
295 Address old_stack_base = regexp_stack->stack_base();
296 DCHECK(old_stack_base == *stack_base);
297 DCHECK(stack_pointer <= old_stack_base);
298 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
299 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
300 if (new_stack_base == NULL) {
301 return NULL;
302 }
303 *stack_base = new_stack_base;
304 intptr_t stack_content_size = old_stack_base - stack_pointer;
305 return new_stack_base - stack_content_size;
306 }
307
308 #endif // V8_INTERPRETED_REGEXP
309
310 } // namespace internal
311 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp-macro-assembler.h ('k') | src/regexp-macro-assembler-irregexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698