OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 17 matching lines...) Expand all Loading... |
28 #include "v8.h" | 28 #include "v8.h" |
29 | 29 |
30 #include "ast.h" | 30 #include "ast.h" |
31 #include "compiler.h" | 31 #include "compiler.h" |
32 #include "execution.h" | 32 #include "execution.h" |
33 #include "factory.h" | 33 #include "factory.h" |
34 #include "jsregexp.h" | 34 #include "jsregexp.h" |
35 #include "platform.h" | 35 #include "platform.h" |
36 #include "string-search.h" | 36 #include "string-search.h" |
37 #include "runtime.h" | 37 #include "runtime.h" |
38 #include "top.h" | |
39 #include "compilation-cache.h" | 38 #include "compilation-cache.h" |
40 #include "string-stream.h" | 39 #include "string-stream.h" |
41 #include "parser.h" | 40 #include "parser.h" |
42 #include "regexp-macro-assembler.h" | 41 #include "regexp-macro-assembler.h" |
43 #include "regexp-macro-assembler-tracer.h" | 42 #include "regexp-macro-assembler-tracer.h" |
44 #include "regexp-macro-assembler-irregexp.h" | 43 #include "regexp-macro-assembler-irregexp.h" |
45 #include "regexp-stack.h" | 44 #include "regexp-stack.h" |
46 | 45 |
47 #ifndef V8_INTERPRETED_REGEXP | 46 #ifndef V8_INTERPRETED_REGEXP |
48 #if V8_TARGET_ARCH_IA32 | 47 #if V8_TARGET_ARCH_IA32 |
49 #include "ia32/regexp-macro-assembler-ia32.h" | 48 #include "ia32/regexp-macro-assembler-ia32.h" |
50 #elif V8_TARGET_ARCH_X64 | 49 #elif V8_TARGET_ARCH_X64 |
51 #include "x64/regexp-macro-assembler-x64.h" | 50 #include "x64/regexp-macro-assembler-x64.h" |
52 #elif V8_TARGET_ARCH_ARM | 51 #elif V8_TARGET_ARCH_ARM |
53 #include "arm/regexp-macro-assembler-arm.h" | 52 #include "arm/regexp-macro-assembler-arm.h" |
54 #else | 53 #else |
55 #error Unsupported target architecture. | 54 #error Unsupported target architecture. |
56 #endif | 55 #endif |
57 #endif | 56 #endif |
58 | 57 |
59 #include "interpreter-irregexp.h" | 58 #include "interpreter-irregexp.h" |
60 | 59 |
61 | 60 |
62 namespace v8 { | 61 namespace v8 { |
63 namespace internal { | 62 namespace internal { |
64 | 63 |
65 | |
66 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, | 64 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, |
67 Handle<String> pattern, | 65 Handle<String> pattern, |
68 Handle<String> flags, | 66 Handle<String> flags, |
69 bool* has_pending_exception) { | 67 bool* has_pending_exception) { |
70 // Call the construct code with 2 arguments. | 68 // Call the construct code with 2 arguments. |
71 Object** argv[2] = { Handle<Object>::cast(pattern).location(), | 69 Object** argv[2] = { Handle<Object>::cast(pattern).location(), |
72 Handle<Object>::cast(flags).location() }; | 70 Handle<Object>::cast(flags).location() }; |
73 return Execution::New(constructor, 2, argv, has_pending_exception); | 71 return Execution::New(constructor, 2, argv, has_pending_exception); |
74 } | 72 } |
75 | 73 |
(...skipping 14 matching lines...) Expand all Loading... |
90 } | 88 } |
91 } | 89 } |
92 return JSRegExp::Flags(flags); | 90 return JSRegExp::Flags(flags); |
93 } | 91 } |
94 | 92 |
95 | 93 |
96 static inline void ThrowRegExpException(Handle<JSRegExp> re, | 94 static inline void ThrowRegExpException(Handle<JSRegExp> re, |
97 Handle<String> pattern, | 95 Handle<String> pattern, |
98 Handle<String> error_text, | 96 Handle<String> error_text, |
99 const char* message) { | 97 const char* message) { |
100 Handle<FixedArray> elements = Factory::NewFixedArray(2); | 98 Isolate* isolate = re->GetIsolate(); |
| 99 Factory* factory = isolate->factory(); |
| 100 Handle<FixedArray> elements = factory->NewFixedArray(2); |
101 elements->set(0, *pattern); | 101 elements->set(0, *pattern); |
102 elements->set(1, *error_text); | 102 elements->set(1, *error_text); |
103 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); | 103 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
104 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); | 104 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); |
105 Top::Throw(*regexp_err); | 105 isolate->Throw(*regexp_err); |
106 } | 106 } |
107 | 107 |
108 | 108 |
109 // Generic RegExp methods. Dispatches to implementation specific methods. | 109 // Generic RegExp methods. Dispatches to implementation specific methods. |
110 | 110 |
111 | 111 |
112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
113 Handle<String> pattern, | 113 Handle<String> pattern, |
114 Handle<String> flag_str) { | 114 Handle<String> flag_str) { |
| 115 Isolate* isolate = re->GetIsolate(); |
115 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 116 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
116 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 117 CompilationCache* compilation_cache = isolate->compilation_cache(); |
| 118 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); |
117 bool in_cache = !cached.is_null(); | 119 bool in_cache = !cached.is_null(); |
118 LOG(RegExpCompileEvent(re, in_cache)); | 120 LOG(isolate, RegExpCompileEvent(re, in_cache)); |
119 | 121 |
120 Handle<Object> result; | 122 Handle<Object> result; |
121 if (in_cache) { | 123 if (in_cache) { |
122 re->set_data(*cached); | 124 re->set_data(*cached); |
123 return re; | 125 return re; |
124 } | 126 } |
125 pattern = FlattenGetString(pattern); | 127 pattern = FlattenGetString(pattern); |
126 CompilationZoneScope zone_scope(DELETE_ON_EXIT); | 128 CompilationZoneScope zone_scope(DELETE_ON_EXIT); |
127 PostponeInterruptsScope postpone; | 129 PostponeInterruptsScope postpone(isolate); |
128 RegExpCompileData parse_result; | 130 RegExpCompileData parse_result; |
129 FlatStringReader reader(pattern); | 131 FlatStringReader reader(isolate, pattern); |
130 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 132 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
131 &parse_result)) { | 133 &parse_result)) { |
132 // Throw an exception if we fail to parse the pattern. | 134 // Throw an exception if we fail to parse the pattern. |
133 ThrowRegExpException(re, | 135 ThrowRegExpException(re, |
134 pattern, | 136 pattern, |
135 parse_result.error, | 137 parse_result.error, |
136 "malformed_regexp"); | 138 "malformed_regexp"); |
137 return Handle<Object>::null(); | 139 return Handle<Object>::null(); |
138 } | 140 } |
139 | 141 |
140 if (parse_result.simple && !flags.is_ignore_case()) { | 142 if (parse_result.simple && !flags.is_ignore_case()) { |
141 // Parse-tree is a single atom that is equal to the pattern. | 143 // Parse-tree is a single atom that is equal to the pattern. |
142 AtomCompile(re, pattern, flags, pattern); | 144 AtomCompile(re, pattern, flags, pattern); |
143 } else if (parse_result.tree->IsAtom() && | 145 } else if (parse_result.tree->IsAtom() && |
144 !flags.is_ignore_case() && | 146 !flags.is_ignore_case() && |
145 parse_result.capture_count == 0) { | 147 parse_result.capture_count == 0) { |
146 RegExpAtom* atom = parse_result.tree->AsAtom(); | 148 RegExpAtom* atom = parse_result.tree->AsAtom(); |
147 Vector<const uc16> atom_pattern = atom->data(); | 149 Vector<const uc16> atom_pattern = atom->data(); |
148 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | 150 Handle<String> atom_string = |
| 151 isolate->factory()->NewStringFromTwoByte(atom_pattern); |
149 AtomCompile(re, pattern, flags, atom_string); | 152 AtomCompile(re, pattern, flags, atom_string); |
150 } else { | 153 } else { |
151 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); | 154 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
152 } | 155 } |
153 ASSERT(re->data()->IsFixedArray()); | 156 ASSERT(re->data()->IsFixedArray()); |
154 // Compilation succeeded so the data is set on the regexp | 157 // Compilation succeeded so the data is set on the regexp |
155 // and we can store it in the cache. | 158 // and we can store it in the cache. |
156 Handle<FixedArray> data(FixedArray::cast(re->data())); | 159 Handle<FixedArray> data(FixedArray::cast(re->data())); |
157 CompilationCache::PutRegExp(pattern, flags, data); | 160 compilation_cache->PutRegExp(pattern, flags, data); |
158 | 161 |
159 return re; | 162 return re; |
160 } | 163 } |
161 | 164 |
162 | 165 |
163 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 166 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
164 Handle<String> subject, | 167 Handle<String> subject, |
165 int index, | 168 int index, |
166 Handle<JSArray> last_match_info) { | 169 Handle<JSArray> last_match_info) { |
167 switch (regexp->TypeTag()) { | 170 switch (regexp->TypeTag()) { |
168 case JSRegExp::ATOM: | 171 case JSRegExp::ATOM: |
169 return AtomExec(regexp, subject, index, last_match_info); | 172 return AtomExec(regexp, subject, index, last_match_info); |
170 case JSRegExp::IRREGEXP: { | 173 case JSRegExp::IRREGEXP: { |
171 Handle<Object> result = | 174 Handle<Object> result = |
172 IrregexpExec(regexp, subject, index, last_match_info); | 175 IrregexpExec(regexp, subject, index, last_match_info); |
173 ASSERT(!result.is_null() || Top::has_pending_exception()); | 176 ASSERT(!result.is_null() || Isolate::Current()->has_pending_exception()); |
174 return result; | 177 return result; |
175 } | 178 } |
176 default: | 179 default: |
177 UNREACHABLE(); | 180 UNREACHABLE(); |
178 return Handle<Object>::null(); | 181 return Handle<Object>::null(); |
179 } | 182 } |
180 } | 183 } |
181 | 184 |
182 | 185 |
183 // RegExp Atom implementation: Simple string search using indexOf. | 186 // RegExp Atom implementation: Simple string search using indexOf. |
184 | 187 |
185 | 188 |
186 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 189 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
187 Handle<String> pattern, | 190 Handle<String> pattern, |
188 JSRegExp::Flags flags, | 191 JSRegExp::Flags flags, |
189 Handle<String> match_pattern) { | 192 Handle<String> match_pattern) { |
190 Factory::SetRegExpAtomData(re, | 193 re->GetIsolate()->factory()->SetRegExpAtomData(re, |
191 JSRegExp::ATOM, | 194 JSRegExp::ATOM, |
192 pattern, | 195 pattern, |
193 flags, | 196 flags, |
194 match_pattern); | 197 match_pattern); |
195 } | 198 } |
196 | 199 |
197 | 200 |
198 static void SetAtomLastCapture(FixedArray* array, | 201 static void SetAtomLastCapture(FixedArray* array, |
199 String* subject, | 202 String* subject, |
200 int from, | 203 int from, |
201 int to) { | 204 int to) { |
202 NoHandleAllocation no_handles; | 205 NoHandleAllocation no_handles; |
203 RegExpImpl::SetLastCaptureCount(array, 2); | 206 RegExpImpl::SetLastCaptureCount(array, 2); |
204 RegExpImpl::SetLastSubject(array, subject); | 207 RegExpImpl::SetLastSubject(array, subject); |
(...skipping 12 matching lines...) Expand all Loading... |
217 | 220 |
218 int subject_length = sub_vector.length(); | 221 int subject_length = sub_vector.length(); |
219 if (start_index + pattern_length > subject_length) return -1; | 222 if (start_index + pattern_length > subject_length) return -1; |
220 return SearchString(sub_vector, pat_vector, start_index); | 223 return SearchString(sub_vector, pat_vector, start_index); |
221 } | 224 } |
222 */ | 225 */ |
223 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 226 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
224 Handle<String> subject, | 227 Handle<String> subject, |
225 int index, | 228 int index, |
226 Handle<JSArray> last_match_info) { | 229 Handle<JSArray> last_match_info) { |
| 230 Isolate* isolate = re->GetIsolate(); |
| 231 |
227 ASSERT(0 <= index); | 232 ASSERT(0 <= index); |
228 ASSERT(index <= subject->length()); | 233 ASSERT(index <= subject->length()); |
229 | 234 |
230 if (!subject->IsFlat()) FlattenString(subject); | 235 if (!subject->IsFlat()) FlattenString(subject); |
231 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 236 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
232 // Extract flattened substrings of cons strings before determining asciiness. | 237 // Extract flattened substrings of cons strings before determining asciiness. |
233 String* seq_sub = *subject; | 238 String* seq_sub = *subject; |
234 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); | 239 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); |
235 | 240 |
236 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 241 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
237 int needle_len = needle->length(); | 242 int needle_len = needle->length(); |
238 | 243 |
239 if (needle_len != 0) { | 244 if (needle_len != 0) { |
240 if (index + needle_len > subject->length()) return Factory::null_value(); | 245 if (index + needle_len > subject->length()) |
| 246 return isolate->factory()->null_value(); |
| 247 |
241 // dispatch on type of strings | 248 // dispatch on type of strings |
242 index = (needle->IsAsciiRepresentation() | 249 index = (needle->IsAsciiRepresentation() |
243 ? (seq_sub->IsAsciiRepresentation() | 250 ? (seq_sub->IsAsciiRepresentation() |
244 ? SearchString(seq_sub->ToAsciiVector(), | 251 ? SearchString(isolate, |
| 252 seq_sub->ToAsciiVector(), |
245 needle->ToAsciiVector(), | 253 needle->ToAsciiVector(), |
246 index) | 254 index) |
247 : SearchString(seq_sub->ToUC16Vector(), | 255 : SearchString(isolate, |
| 256 seq_sub->ToUC16Vector(), |
248 needle->ToAsciiVector(), | 257 needle->ToAsciiVector(), |
249 index)) | 258 index)) |
250 : (seq_sub->IsAsciiRepresentation() | 259 : (seq_sub->IsAsciiRepresentation() |
251 ? SearchString(seq_sub->ToAsciiVector(), | 260 ? SearchString(isolate, |
| 261 seq_sub->ToAsciiVector(), |
252 needle->ToUC16Vector(), | 262 needle->ToUC16Vector(), |
253 index) | 263 index) |
254 : SearchString(seq_sub->ToUC16Vector(), | 264 : SearchString(isolate, |
| 265 seq_sub->ToUC16Vector(), |
255 needle->ToUC16Vector(), | 266 needle->ToUC16Vector(), |
256 index))); | 267 index))); |
257 if (index == -1) return Factory::null_value(); | 268 if (index == -1) return FACTORY->null_value(); |
258 } | 269 } |
259 ASSERT(last_match_info->HasFastElements()); | 270 ASSERT(last_match_info->HasFastElements()); |
260 | 271 |
261 { | 272 { |
262 NoHandleAllocation no_handles; | 273 NoHandleAllocation no_handles; |
263 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 274 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
264 SetAtomLastCapture(array, *subject, index, index + needle_len); | 275 SetAtomLastCapture(array, *subject, index, index + needle_len); |
265 } | 276 } |
266 return last_match_info; | 277 return last_match_info; |
267 } | 278 } |
(...skipping 13 matching lines...) Expand all Loading... |
281 if (compiled_code->IsByteArray()) return true; | 292 if (compiled_code->IsByteArray()) return true; |
282 #else // V8_INTERPRETED_REGEXP (RegExp native code) | 293 #else // V8_INTERPRETED_REGEXP (RegExp native code) |
283 if (compiled_code->IsCode()) return true; | 294 if (compiled_code->IsCode()) return true; |
284 #endif | 295 #endif |
285 return CompileIrregexp(re, is_ascii); | 296 return CompileIrregexp(re, is_ascii); |
286 } | 297 } |
287 | 298 |
288 | 299 |
289 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { | 300 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { |
290 // Compile the RegExp. | 301 // Compile the RegExp. |
| 302 Isolate* isolate = re->GetIsolate(); |
291 CompilationZoneScope zone_scope(DELETE_ON_EXIT); | 303 CompilationZoneScope zone_scope(DELETE_ON_EXIT); |
292 PostponeInterruptsScope postpone; | 304 PostponeInterruptsScope postpone(isolate); |
293 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); | 305 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); |
294 if (entry->IsJSObject()) { | 306 if (entry->IsJSObject()) { |
295 // If it's a JSObject, a previous compilation failed and threw this object. | 307 // If it's a JSObject, a previous compilation failed and threw this object. |
296 // Re-throw the object without trying again. | 308 // Re-throw the object without trying again. |
297 Top::Throw(entry); | 309 isolate->Throw(entry); |
298 return false; | 310 return false; |
299 } | 311 } |
300 ASSERT(entry->IsTheHole()); | 312 ASSERT(entry->IsTheHole()); |
301 | 313 |
302 JSRegExp::Flags flags = re->GetFlags(); | 314 JSRegExp::Flags flags = re->GetFlags(); |
303 | 315 |
304 Handle<String> pattern(re->Pattern()); | 316 Handle<String> pattern(re->Pattern()); |
305 if (!pattern->IsFlat()) { | 317 if (!pattern->IsFlat()) { |
306 FlattenString(pattern); | 318 FlattenString(pattern); |
307 } | 319 } |
308 | 320 |
309 RegExpCompileData compile_data; | 321 RegExpCompileData compile_data; |
310 FlatStringReader reader(pattern); | 322 FlatStringReader reader(isolate, pattern); |
311 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 323 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
312 &compile_data)) { | 324 &compile_data)) { |
313 // Throw an exception if we fail to parse the pattern. | 325 // Throw an exception if we fail to parse the pattern. |
314 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 326 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
315 ThrowRegExpException(re, | 327 ThrowRegExpException(re, |
316 pattern, | 328 pattern, |
317 compile_data.error, | 329 compile_data.error, |
318 "malformed_regexp"); | 330 "malformed_regexp"); |
319 return false; | 331 return false; |
320 } | 332 } |
321 RegExpEngine::CompilationResult result = | 333 RegExpEngine::CompilationResult result = |
322 RegExpEngine::Compile(&compile_data, | 334 RegExpEngine::Compile(&compile_data, |
323 flags.is_ignore_case(), | 335 flags.is_ignore_case(), |
324 flags.is_multiline(), | 336 flags.is_multiline(), |
325 pattern, | 337 pattern, |
326 is_ascii); | 338 is_ascii); |
327 if (result.error_message != NULL) { | 339 if (result.error_message != NULL) { |
328 // Unable to compile regexp. | 340 // Unable to compile regexp. |
329 Handle<FixedArray> elements = Factory::NewFixedArray(2); | 341 Factory* factory = isolate->factory(); |
| 342 Handle<FixedArray> elements = factory->NewFixedArray(2); |
330 elements->set(0, *pattern); | 343 elements->set(0, *pattern); |
331 Handle<String> error_message = | 344 Handle<String> error_message = |
332 Factory::NewStringFromUtf8(CStrVector(result.error_message)); | 345 factory->NewStringFromUtf8(CStrVector(result.error_message)); |
333 elements->set(1, *error_message); | 346 elements->set(1, *error_message); |
334 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); | 347 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
335 Handle<Object> regexp_err = | 348 Handle<Object> regexp_err = |
336 Factory::NewSyntaxError("malformed_regexp", array); | 349 factory->NewSyntaxError("malformed_regexp", array); |
337 Top::Throw(*regexp_err); | 350 isolate->Throw(*regexp_err); |
338 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); | 351 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); |
339 return false; | 352 return false; |
340 } | 353 } |
341 | 354 |
342 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); | 355 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); |
343 data->set(JSRegExp::code_index(is_ascii), result.code); | 356 data->set(JSRegExp::code_index(is_ascii), result.code); |
344 int register_max = IrregexpMaxRegisterCount(*data); | 357 int register_max = IrregexpMaxRegisterCount(*data); |
345 if (result.num_registers > register_max) { | 358 if (result.num_registers > register_max) { |
346 SetIrregexpMaxRegisterCount(*data, result.num_registers); | 359 SetIrregexpMaxRegisterCount(*data, result.num_registers); |
347 } | 360 } |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
379 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | 392 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
380 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); | 393 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); |
381 } | 394 } |
382 | 395 |
383 | 396 |
384 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, | 397 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
385 Handle<String> pattern, | 398 Handle<String> pattern, |
386 JSRegExp::Flags flags, | 399 JSRegExp::Flags flags, |
387 int capture_count) { | 400 int capture_count) { |
388 // Initialize compiled code entries to null. | 401 // Initialize compiled code entries to null. |
389 Factory::SetRegExpIrregexpData(re, | 402 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
390 JSRegExp::IRREGEXP, | 403 JSRegExp::IRREGEXP, |
391 pattern, | 404 pattern, |
392 flags, | 405 flags, |
393 capture_count); | 406 capture_count); |
394 } | 407 } |
395 | 408 |
396 | 409 |
397 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 410 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
398 Handle<String> subject) { | 411 Handle<String> subject) { |
399 if (!subject->IsFlat()) { | 412 if (!subject->IsFlat()) { |
400 FlattenString(subject); | 413 FlattenString(subject); |
401 } | 414 } |
402 // Check the asciiness of the underlying storage. | 415 // Check the asciiness of the underlying storage. |
403 bool is_ascii; | 416 bool is_ascii; |
(...skipping 17 matching lines...) Expand all Loading... |
421 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 434 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
422 #endif // V8_INTERPRETED_REGEXP | 435 #endif // V8_INTERPRETED_REGEXP |
423 } | 436 } |
424 | 437 |
425 | 438 |
426 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( | 439 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( |
427 Handle<JSRegExp> regexp, | 440 Handle<JSRegExp> regexp, |
428 Handle<String> subject, | 441 Handle<String> subject, |
429 int index, | 442 int index, |
430 Vector<int> output) { | 443 Vector<int> output) { |
431 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | 444 Isolate* isolate = regexp->GetIsolate(); |
| 445 |
| 446 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
432 | 447 |
433 ASSERT(index >= 0); | 448 ASSERT(index >= 0); |
434 ASSERT(index <= subject->length()); | 449 ASSERT(index <= subject->length()); |
435 ASSERT(subject->IsFlat()); | 450 ASSERT(subject->IsFlat()); |
436 | 451 |
437 // A flat ASCII string might have a two-byte first part. | 452 // A flat ASCII string might have a two-byte first part. |
438 if (subject->IsConsString()) { | 453 if (subject->IsConsString()) { |
439 subject = Handle<String>(ConsString::cast(*subject)->first()); | 454 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); |
440 } | 455 } |
441 | 456 |
442 #ifndef V8_INTERPRETED_REGEXP | 457 #ifndef V8_INTERPRETED_REGEXP |
443 ASSERT(output.length() >= | 458 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
444 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | |
445 do { | 459 do { |
446 bool is_ascii = subject->IsAsciiRepresentation(); | 460 bool is_ascii = subject->IsAsciiRepresentation(); |
447 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); | 461 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
448 NativeRegExpMacroAssembler::Result res = | 462 NativeRegExpMacroAssembler::Result res = |
449 NativeRegExpMacroAssembler::Match(code, | 463 NativeRegExpMacroAssembler::Match(code, |
450 subject, | 464 subject, |
451 output.start(), | 465 output.start(), |
452 output.length(), | 466 output.length(), |
453 index); | 467 index, |
| 468 isolate); |
454 if (res != NativeRegExpMacroAssembler::RETRY) { | 469 if (res != NativeRegExpMacroAssembler::RETRY) { |
455 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 470 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
456 Top::has_pending_exception()); | 471 isolate->has_pending_exception()); |
457 STATIC_ASSERT( | 472 STATIC_ASSERT( |
458 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 473 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
459 STATIC_ASSERT( | 474 STATIC_ASSERT( |
460 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 475 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
461 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 476 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
462 == RE_EXCEPTION); | 477 == RE_EXCEPTION); |
463 return static_cast<IrregexpResult>(res); | 478 return static_cast<IrregexpResult>(res); |
464 } | 479 } |
465 // If result is RETRY, the string has changed representation, and we | 480 // If result is RETRY, the string has changed representation, and we |
466 // must restart from scratch. | 481 // must restart from scratch. |
(...skipping 10 matching lines...) Expand all Loading... |
477 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 492 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
478 bool is_ascii = subject->IsAsciiRepresentation(); | 493 bool is_ascii = subject->IsAsciiRepresentation(); |
479 // We must have done EnsureCompiledIrregexp, so we can get the number of | 494 // We must have done EnsureCompiledIrregexp, so we can get the number of |
480 // registers. | 495 // registers. |
481 int* register_vector = output.start(); | 496 int* register_vector = output.start(); |
482 int number_of_capture_registers = | 497 int number_of_capture_registers = |
483 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 498 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
484 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 499 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
485 register_vector[i] = -1; | 500 register_vector[i] = -1; |
486 } | 501 } |
487 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); | 502 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
488 | 503 |
489 if (IrregexpInterpreter::Match(byte_codes, | 504 if (IrregexpInterpreter::Match(byte_codes, |
490 subject, | 505 subject, |
491 register_vector, | 506 register_vector, |
492 index)) { | 507 index)) { |
493 return RE_SUCCESS; | 508 return RE_SUCCESS; |
494 } | 509 } |
495 return RE_FAILURE; | 510 return RE_FAILURE; |
496 #endif // V8_INTERPRETED_REGEXP | 511 #endif // V8_INTERPRETED_REGEXP |
497 } | 512 } |
(...skipping 11 matching lines...) Expand all Loading... |
509 if (FLAG_trace_regexp_bytecodes) { | 524 if (FLAG_trace_regexp_bytecodes) { |
510 String* pattern = jsregexp->Pattern(); | 525 String* pattern = jsregexp->Pattern(); |
511 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 526 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
512 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 527 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
513 } | 528 } |
514 #endif | 529 #endif |
515 #endif | 530 #endif |
516 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); | 531 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
517 if (required_registers < 0) { | 532 if (required_registers < 0) { |
518 // Compiling failed with an exception. | 533 // Compiling failed with an exception. |
519 ASSERT(Top::has_pending_exception()); | 534 ASSERT(Isolate::Current()->has_pending_exception()); |
520 return Handle<Object>::null(); | 535 return Handle<Object>::null(); |
521 } | 536 } |
522 | 537 |
523 OffsetsVector registers(required_registers); | 538 OffsetsVector registers(required_registers); |
524 | 539 |
525 IrregexpResult res = RegExpImpl::IrregexpExecOnce( | 540 IrregexpResult res = RegExpImpl::IrregexpExecOnce( |
526 jsregexp, subject, previous_index, Vector<int>(registers.vector(), | 541 jsregexp, subject, previous_index, Vector<int>(registers.vector(), |
527 registers.length())); | 542 registers.length())); |
528 if (res == RE_SUCCESS) { | 543 if (res == RE_SUCCESS) { |
529 int capture_register_count = | 544 int capture_register_count = |
530 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | 545 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
531 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | 546 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
532 AssertNoAllocation no_gc; | 547 AssertNoAllocation no_gc; |
533 int* register_vector = registers.vector(); | 548 int* register_vector = registers.vector(); |
534 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 549 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
535 for (int i = 0; i < capture_register_count; i += 2) { | 550 for (int i = 0; i < capture_register_count; i += 2) { |
536 SetCapture(array, i, register_vector[i]); | 551 SetCapture(array, i, register_vector[i]); |
537 SetCapture(array, i + 1, register_vector[i + 1]); | 552 SetCapture(array, i + 1, register_vector[i + 1]); |
538 } | 553 } |
539 SetLastCaptureCount(array, capture_register_count); | 554 SetLastCaptureCount(array, capture_register_count); |
540 SetLastSubject(array, *subject); | 555 SetLastSubject(array, *subject); |
541 SetLastInput(array, *subject); | 556 SetLastInput(array, *subject); |
542 return last_match_info; | 557 return last_match_info; |
543 } | 558 } |
544 if (res == RE_EXCEPTION) { | 559 if (res == RE_EXCEPTION) { |
545 ASSERT(Top::has_pending_exception()); | 560 ASSERT(Isolate::Current()->has_pending_exception()); |
546 return Handle<Object>::null(); | 561 return Handle<Object>::null(); |
547 } | 562 } |
548 ASSERT(res == RE_FAILURE); | 563 ASSERT(res == RE_FAILURE); |
549 return Factory::null_value(); | 564 return Isolate::Current()->factory()->null_value(); |
550 } | 565 } |
551 | 566 |
552 | 567 |
553 // ------------------------------------------------------------------- | 568 // ------------------------------------------------------------------- |
554 // Implementation of the Irregexp regular expression engine. | 569 // Implementation of the Irregexp regular expression engine. |
555 // | 570 // |
556 // The Irregexp regular expression engine is intended to be a complete | 571 // The Irregexp regular expression engine is intended to be a complete |
557 // implementation of ECMAScript regular expressions. It generates either | 572 // implementation of ECMAScript regular expressions. It generates either |
558 // bytecodes or native code. | 573 // bytecodes or native code. |
559 | 574 |
(...skipping 739 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1299 case Guard::GEQ: | 1314 case Guard::GEQ: |
1300 ASSERT(!trace->mentions_reg(guard->reg())); | 1315 ASSERT(!trace->mentions_reg(guard->reg())); |
1301 macro_assembler->IfRegisterLT(guard->reg(), | 1316 macro_assembler->IfRegisterLT(guard->reg(), |
1302 guard->value(), | 1317 guard->value(), |
1303 trace->backtrack()); | 1318 trace->backtrack()); |
1304 break; | 1319 break; |
1305 } | 1320 } |
1306 } | 1321 } |
1307 | 1322 |
1308 | 1323 |
1309 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | |
1310 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | |
1311 | |
1312 | |
1313 // Returns the number of characters in the equivalence class, omitting those | 1324 // Returns the number of characters in the equivalence class, omitting those |
1314 // that cannot occur in the source string because it is ASCII. | 1325 // that cannot occur in the source string because it is ASCII. |
1315 static int GetCaseIndependentLetters(uc16 character, | 1326 static int GetCaseIndependentLetters(Isolate* isolate, |
| 1327 uc16 character, |
1316 bool ascii_subject, | 1328 bool ascii_subject, |
1317 unibrow::uchar* letters) { | 1329 unibrow::uchar* letters) { |
1318 int length = uncanonicalize.get(character, '\0', letters); | 1330 int length = |
| 1331 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
1319 // Unibrow returns 0 or 1 for characters where case independence is | 1332 // Unibrow returns 0 or 1 for characters where case independence is |
1320 // trivial. | 1333 // trivial. |
1321 if (length == 0) { | 1334 if (length == 0) { |
1322 letters[0] = character; | 1335 letters[0] = character; |
1323 length = 1; | 1336 length = 1; |
1324 } | 1337 } |
1325 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1338 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { |
1326 return length; | 1339 return length; |
1327 } | 1340 } |
1328 // The standard requires that non-ASCII characters cannot have ASCII | 1341 // The standard requires that non-ASCII characters cannot have ASCII |
1329 // character codes in their equivalence class. | 1342 // character codes in their equivalence class. |
1330 return 0; | 1343 return 0; |
1331 } | 1344 } |
1332 | 1345 |
1333 | 1346 |
1334 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, | 1347 static inline bool EmitSimpleCharacter(Isolate* isolate, |
| 1348 RegExpCompiler* compiler, |
1335 uc16 c, | 1349 uc16 c, |
1336 Label* on_failure, | 1350 Label* on_failure, |
1337 int cp_offset, | 1351 int cp_offset, |
1338 bool check, | 1352 bool check, |
1339 bool preloaded) { | 1353 bool preloaded) { |
1340 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1354 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1341 bool bound_checked = false; | 1355 bool bound_checked = false; |
1342 if (!preloaded) { | 1356 if (!preloaded) { |
1343 assembler->LoadCurrentCharacter( | 1357 assembler->LoadCurrentCharacter( |
1344 cp_offset, | 1358 cp_offset, |
1345 on_failure, | 1359 on_failure, |
1346 check); | 1360 check); |
1347 bound_checked = true; | 1361 bound_checked = true; |
1348 } | 1362 } |
1349 assembler->CheckNotCharacter(c, on_failure); | 1363 assembler->CheckNotCharacter(c, on_failure); |
1350 return bound_checked; | 1364 return bound_checked; |
1351 } | 1365 } |
1352 | 1366 |
1353 | 1367 |
1354 // Only emits non-letters (things that don't have case). Only used for case | 1368 // Only emits non-letters (things that don't have case). Only used for case |
1355 // independent matches. | 1369 // independent matches. |
1356 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, | 1370 static inline bool EmitAtomNonLetter(Isolate* isolate, |
| 1371 RegExpCompiler* compiler, |
1357 uc16 c, | 1372 uc16 c, |
1358 Label* on_failure, | 1373 Label* on_failure, |
1359 int cp_offset, | 1374 int cp_offset, |
1360 bool check, | 1375 bool check, |
1361 bool preloaded) { | 1376 bool preloaded) { |
1362 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1377 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1363 bool ascii = compiler->ascii(); | 1378 bool ascii = compiler->ascii(); |
1364 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1379 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1365 int length = GetCaseIndependentLetters(c, ascii, chars); | 1380 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
1366 if (length < 1) { | 1381 if (length < 1) { |
1367 // This can't match. Must be an ASCII subject and a non-ASCII character. | 1382 // This can't match. Must be an ASCII subject and a non-ASCII character. |
1368 // We do not need to do anything since the ASCII pass already handled this. | 1383 // We do not need to do anything since the ASCII pass already handled this. |
1369 return false; // Bounds not checked. | 1384 return false; // Bounds not checked. |
1370 } | 1385 } |
1371 bool checked = false; | 1386 bool checked = false; |
1372 // We handle the length > 1 case in a later pass. | 1387 // We handle the length > 1 case in a later pass. |
1373 if (length == 1) { | 1388 if (length == 1) { |
1374 if (ascii && c > String::kMaxAsciiCharCodeU) { | 1389 if (ascii && c > String::kMaxAsciiCharCodeU) { |
1375 // Can't match - see above. | 1390 // Can't match - see above. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1417 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 1432 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
1418 diff, | 1433 diff, |
1419 mask, | 1434 mask, |
1420 on_failure); | 1435 on_failure); |
1421 return true; | 1436 return true; |
1422 } | 1437 } |
1423 return false; | 1438 return false; |
1424 } | 1439 } |
1425 | 1440 |
1426 | 1441 |
1427 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, | 1442 typedef bool EmitCharacterFunction(Isolate* isolate, |
| 1443 RegExpCompiler* compiler, |
1428 uc16 c, | 1444 uc16 c, |
1429 Label* on_failure, | 1445 Label* on_failure, |
1430 int cp_offset, | 1446 int cp_offset, |
1431 bool check, | 1447 bool check, |
1432 bool preloaded); | 1448 bool preloaded); |
1433 | 1449 |
1434 // Only emits letters (things that have case). Only used for case independent | 1450 // Only emits letters (things that have case). Only used for case independent |
1435 // matches. | 1451 // matches. |
1436 static inline bool EmitAtomLetter(RegExpCompiler* compiler, | 1452 static inline bool EmitAtomLetter(Isolate* isolate, |
| 1453 RegExpCompiler* compiler, |
1437 uc16 c, | 1454 uc16 c, |
1438 Label* on_failure, | 1455 Label* on_failure, |
1439 int cp_offset, | 1456 int cp_offset, |
1440 bool check, | 1457 bool check, |
1441 bool preloaded) { | 1458 bool preloaded) { |
1442 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1459 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1443 bool ascii = compiler->ascii(); | 1460 bool ascii = compiler->ascii(); |
1444 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1461 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1445 int length = GetCaseIndependentLetters(c, ascii, chars); | 1462 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
1446 if (length <= 1) return false; | 1463 if (length <= 1) return false; |
1447 // We may not need to check against the end of the input string | 1464 // We may not need to check against the end of the input string |
1448 // if this character lies before a character that matched. | 1465 // if this character lies before a character that matched. |
1449 if (!preloaded) { | 1466 if (!preloaded) { |
1450 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1467 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1451 } | 1468 } |
1452 Label ok; | 1469 Label ok; |
1453 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1470 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1454 switch (length) { | 1471 switch (length) { |
1455 case 2: { | 1472 case 2: { |
(...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1873 // | 1890 // |
1874 // We iterate along the text object, building up for each character a | 1891 // We iterate along the text object, building up for each character a |
1875 // mask and value that can be used to test for a quick failure to match. | 1892 // mask and value that can be used to test for a quick failure to match. |
1876 // The masks and values for the positions will be combined into a single | 1893 // The masks and values for the positions will be combined into a single |
1877 // machine word for the current character width in order to be used in | 1894 // machine word for the current character width in order to be used in |
1878 // generating a quick check. | 1895 // generating a quick check. |
1879 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1896 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
1880 RegExpCompiler* compiler, | 1897 RegExpCompiler* compiler, |
1881 int characters_filled_in, | 1898 int characters_filled_in, |
1882 bool not_at_start) { | 1899 bool not_at_start) { |
| 1900 Isolate* isolate = Isolate::Current(); |
1883 ASSERT(characters_filled_in < details->characters()); | 1901 ASSERT(characters_filled_in < details->characters()); |
1884 int characters = details->characters(); | 1902 int characters = details->characters(); |
1885 int char_mask; | 1903 int char_mask; |
1886 int char_shift; | 1904 int char_shift; |
1887 if (compiler->ascii()) { | 1905 if (compiler->ascii()) { |
1888 char_mask = String::kMaxAsciiCharCode; | 1906 char_mask = String::kMaxAsciiCharCode; |
1889 char_shift = 8; | 1907 char_shift = 8; |
1890 } else { | 1908 } else { |
1891 char_mask = String::kMaxUC16CharCode; | 1909 char_mask = String::kMaxUC16CharCode; |
1892 char_shift = 16; | 1910 char_shift = 16; |
(...skipping 10 matching lines...) Expand all Loading... |
1903 // If we expect a non-ASCII character from an ASCII string, | 1921 // If we expect a non-ASCII character from an ASCII string, |
1904 // there is no way we can match. Not even case independent | 1922 // there is no way we can match. Not even case independent |
1905 // matching can turn an ASCII character into non-ASCII or | 1923 // matching can turn an ASCII character into non-ASCII or |
1906 // vice versa. | 1924 // vice versa. |
1907 details->set_cannot_match(); | 1925 details->set_cannot_match(); |
1908 pos->determines_perfectly = false; | 1926 pos->determines_perfectly = false; |
1909 return; | 1927 return; |
1910 } | 1928 } |
1911 if (compiler->ignore_case()) { | 1929 if (compiler->ignore_case()) { |
1912 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1930 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1913 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars); | 1931 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), |
| 1932 chars); |
1914 ASSERT(length != 0); // Can only happen if c > char_mask (see above). | 1933 ASSERT(length != 0); // Can only happen if c > char_mask (see above). |
1915 if (length == 1) { | 1934 if (length == 1) { |
1916 // This letter has no case equivalents, so it's nice and simple | 1935 // This letter has no case equivalents, so it's nice and simple |
1917 // and the mask-compare will determine definitely whether we have | 1936 // and the mask-compare will determine definitely whether we have |
1918 // a match at this character position. | 1937 // a match at this character position. |
1919 pos->mask = char_mask; | 1938 pos->mask = char_mask; |
1920 pos->value = c; | 1939 pos->value = c; |
1921 pos->determines_perfectly = true; | 1940 pos->determines_perfectly = true; |
1922 } else { | 1941 } else { |
1923 uint32_t common_bits = char_mask; | 1942 uint32_t common_bits = char_mask; |
(...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2403 // loading characters, which means we do not need to recheck the bounds | 2422 // loading characters, which means we do not need to recheck the bounds |
2404 // up to the limit the quick check already checked. In addition the quick | 2423 // up to the limit the quick check already checked. In addition the quick |
2405 // check can have involved a mask and compare operation which may simplify | 2424 // check can have involved a mask and compare operation which may simplify |
2406 // or obviate the need for further checks at some character positions. | 2425 // or obviate the need for further checks at some character positions. |
2407 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 2426 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
2408 TextEmitPassType pass, | 2427 TextEmitPassType pass, |
2409 bool preloaded, | 2428 bool preloaded, |
2410 Trace* trace, | 2429 Trace* trace, |
2411 bool first_element_checked, | 2430 bool first_element_checked, |
2412 int* checked_up_to) { | 2431 int* checked_up_to) { |
| 2432 Isolate* isolate = Isolate::Current(); |
2413 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2433 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2414 bool ascii = compiler->ascii(); | 2434 bool ascii = compiler->ascii(); |
2415 Label* backtrack = trace->backtrack(); | 2435 Label* backtrack = trace->backtrack(); |
2416 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 2436 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
2417 int element_count = elms_->length(); | 2437 int element_count = elms_->length(); |
2418 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 2438 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
2419 TextElement elm = elms_->at(i); | 2439 TextElement elm = elms_->at(i); |
2420 int cp_offset = trace->cp_offset() + elm.cp_offset; | 2440 int cp_offset = trace->cp_offset() + elm.cp_offset; |
2421 if (elm.type == TextElement::ATOM) { | 2441 if (elm.type == TextElement::ATOM) { |
2422 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2442 Vector<const uc16> quarks = elm.data.u_atom->data(); |
(...skipping 15 matching lines...) Expand all Loading... |
2438 case SIMPLE_CHARACTER_MATCH: | 2458 case SIMPLE_CHARACTER_MATCH: |
2439 emit_function = &EmitSimpleCharacter; | 2459 emit_function = &EmitSimpleCharacter; |
2440 break; | 2460 break; |
2441 case CASE_CHARACTER_MATCH: | 2461 case CASE_CHARACTER_MATCH: |
2442 emit_function = &EmitAtomLetter; | 2462 emit_function = &EmitAtomLetter; |
2443 break; | 2463 break; |
2444 default: | 2464 default: |
2445 break; | 2465 break; |
2446 } | 2466 } |
2447 if (emit_function != NULL) { | 2467 if (emit_function != NULL) { |
2448 bool bound_checked = emit_function(compiler, | 2468 bool bound_checked = emit_function(isolate, |
| 2469 compiler, |
2449 quarks[j], | 2470 quarks[j], |
2450 backtrack, | 2471 backtrack, |
2451 cp_offset + j, | 2472 cp_offset + j, |
2452 *checked_up_to < cp_offset + j, | 2473 *checked_up_to < cp_offset + j, |
2453 preloaded); | 2474 preloaded); |
2454 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2475 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
2455 } | 2476 } |
2456 } | 2477 } |
2457 } else { | 2478 } else { |
2458 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2479 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
(...skipping 1619 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4078 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4099 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
4079 for (int i = 0; i < overlay.length(); i += 2) { | 4100 for (int i = 0; i < overlay.length(); i += 2) { |
4080 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4101 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
4081 CharacterRangeSplitter::kInOverlay); | 4102 CharacterRangeSplitter::kInOverlay); |
4082 } | 4103 } |
4083 CharacterRangeSplitter callback(included, excluded); | 4104 CharacterRangeSplitter callback(included, excluded); |
4084 table.ForEach(&callback); | 4105 table.ForEach(&callback); |
4085 } | 4106 } |
4086 | 4107 |
4087 | 4108 |
4088 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 4109 static void AddUncanonicals(Isolate* isolate, |
| 4110 ZoneList<CharacterRange>* ranges, |
4089 int bottom, | 4111 int bottom, |
4090 int top); | 4112 int top); |
4091 | 4113 |
4092 | 4114 |
4093 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4115 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
4094 bool is_ascii) { | 4116 bool is_ascii) { |
| 4117 Isolate* isolate = Isolate::Current(); |
4095 uc16 bottom = from(); | 4118 uc16 bottom = from(); |
4096 uc16 top = to(); | 4119 uc16 top = to(); |
4097 if (is_ascii) { | 4120 if (is_ascii) { |
4098 if (bottom > String::kMaxAsciiCharCode) return; | 4121 if (bottom > String::kMaxAsciiCharCode) return; |
4099 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4122 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
4100 } | 4123 } |
4101 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4124 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4102 if (top == bottom) { | 4125 if (top == bottom) { |
4103 // If this is a singleton we just expand the one character. | 4126 // If this is a singleton we just expand the one character. |
4104 int length = uncanonicalize.get(bottom, '\0', chars); | 4127 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
4105 for (int i = 0; i < length; i++) { | 4128 for (int i = 0; i < length; i++) { |
4106 uc32 chr = chars[i]; | 4129 uc32 chr = chars[i]; |
4107 if (chr != bottom) { | 4130 if (chr != bottom) { |
4108 ranges->Add(CharacterRange::Singleton(chars[i])); | 4131 ranges->Add(CharacterRange::Singleton(chars[i])); |
4109 } | 4132 } |
4110 } | 4133 } |
4111 } else { | 4134 } else { |
4112 // If this is a range we expand the characters block by block, | 4135 // If this is a range we expand the characters block by block, |
4113 // expanding contiguous subranges (blocks) one at a time. | 4136 // expanding contiguous subranges (blocks) one at a time. |
4114 // The approach is as follows. For a given start character we | 4137 // The approach is as follows. For a given start character we |
4115 // look up the remainder of the block that contains it (represented | 4138 // look up the remainder of the block that contains it (represented |
4116 // by the end point), for instance we find 'z' if the character | 4139 // by the end point), for instance we find 'z' if the character |
4117 // is 'c'. A block is characterized by the property | 4140 // is 'c'. A block is characterized by the property |
4118 // that all characters uncanonicalize in the same way, except that | 4141 // that all characters uncanonicalize in the same way, except that |
4119 // each entry in the result is incremented by the distance from the first | 4142 // each entry in the result is incremented by the distance from the first |
4120 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and | 4143 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and |
4121 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. | 4144 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. |
4122 // Once we've found the end point we look up its uncanonicalization | 4145 // Once we've found the end point we look up its uncanonicalization |
4123 // and produce a range for each element. For instance for [c-f] | 4146 // and produce a range for each element. For instance for [c-f] |
4124 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only | 4147 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only |
4125 // add a range if it is not already contained in the input, so [c-f] | 4148 // add a range if it is not already contained in the input, so [c-f] |
4126 // will be skipped but [C-F] will be added. If this range is not | 4149 // will be skipped but [C-F] will be added. If this range is not |
4127 // completely contained in a block we do this for all the blocks | 4150 // completely contained in a block we do this for all the blocks |
4128 // covered by the range (handling characters that is not in a block | 4151 // covered by the range (handling characters that is not in a block |
4129 // as a "singleton block"). | 4152 // as a "singleton block"). |
4130 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4153 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4131 int pos = bottom; | 4154 int pos = bottom; |
4132 while (pos < top) { | 4155 while (pos < top) { |
4133 int length = canonrange.get(pos, '\0', range); | 4156 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); |
4134 uc16 block_end; | 4157 uc16 block_end; |
4135 if (length == 0) { | 4158 if (length == 0) { |
4136 block_end = pos; | 4159 block_end = pos; |
4137 } else { | 4160 } else { |
4138 ASSERT_EQ(1, length); | 4161 ASSERT_EQ(1, length); |
4139 block_end = range[0]; | 4162 block_end = range[0]; |
4140 } | 4163 } |
4141 int end = (block_end > top) ? top : block_end; | 4164 int end = (block_end > top) ? top : block_end; |
4142 length = uncanonicalize.get(block_end, '\0', range); | 4165 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); |
4143 for (int i = 0; i < length; i++) { | 4166 for (int i = 0; i < length; i++) { |
4144 uc32 c = range[i]; | 4167 uc32 c = range[i]; |
4145 uc16 range_from = c - (block_end - pos); | 4168 uc16 range_from = c - (block_end - pos); |
4146 uc16 range_to = c - (block_end - end); | 4169 uc16 range_to = c - (block_end - end); |
4147 if (!(bottom <= range_from && range_to <= top)) { | 4170 if (!(bottom <= range_from && range_to <= top)) { |
4148 ranges->Add(CharacterRange(range_from, range_to)); | 4171 ranges->Add(CharacterRange(range_from, range_to)); |
4149 } | 4172 } |
4150 } | 4173 } |
4151 pos = end + 1; | 4174 pos = end + 1; |
4152 } | 4175 } |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4242 result.SetElementsInSecondSet(); | 4265 result.SetElementsInSecondSet(); |
4243 } else if (j < range->length()) { | 4266 } else if (j < range->length()) { |
4244 // Argument range contains something not in word range. | 4267 // Argument range contains something not in word range. |
4245 result.SetElementsInFirstSet(); | 4268 result.SetElementsInFirstSet(); |
4246 } | 4269 } |
4247 | 4270 |
4248 return result; | 4271 return result; |
4249 } | 4272 } |
4250 | 4273 |
4251 | 4274 |
4252 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 4275 static void AddUncanonicals(Isolate* isolate, |
| 4276 ZoneList<CharacterRange>* ranges, |
4253 int bottom, | 4277 int bottom, |
4254 int top) { | 4278 int top) { |
4255 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4279 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4256 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | 4280 // Zones with no case mappings. There is a DEBUG-mode loop to assert that |
4257 // this table is correct. | 4281 // this table is correct. |
4258 // 0x0600 - 0x0fff | 4282 // 0x0600 - 0x0fff |
4259 // 0x1100 - 0x1cff | 4283 // 0x1100 - 0x1cff |
4260 // 0x2000 - 0x20ff | 4284 // 0x2000 - 0x20ff |
4261 // 0x2200 - 0x23ff | 4285 // 0x2200 - 0x23ff |
4262 // 0x2500 - 0x2bff | 4286 // 0x2500 - 0x2bff |
(...skipping 11 matching lines...) Expand all Loading... |
4274 if (top <= boundaries[0]) { | 4298 if (top <= boundaries[0]) { |
4275 CharacterRange range(bottom, top); | 4299 CharacterRange range(bottom, top); |
4276 range.AddCaseEquivalents(ranges, false); | 4300 range.AddCaseEquivalents(ranges, false); |
4277 return; | 4301 return; |
4278 } | 4302 } |
4279 | 4303 |
4280 // Split up very large ranges. This helps remove ranges where there are no | 4304 // Split up very large ranges. This helps remove ranges where there are no |
4281 // case mappings. | 4305 // case mappings. |
4282 for (int i = 0; i < boundary_count; i++) { | 4306 for (int i = 0; i < boundary_count; i++) { |
4283 if (bottom < boundaries[i] && top >= boundaries[i]) { | 4307 if (bottom < boundaries[i] && top >= boundaries[i]) { |
4284 AddUncanonicals(ranges, bottom, boundaries[i] - 1); | 4308 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); |
4285 AddUncanonicals(ranges, boundaries[i], top); | 4309 AddUncanonicals(isolate, ranges, boundaries[i], top); |
4286 return; | 4310 return; |
4287 } | 4311 } |
4288 } | 4312 } |
4289 | 4313 |
4290 // If we are completely in a zone with no case mappings then we are done. | 4314 // If we are completely in a zone with no case mappings then we are done. |
4291 for (int i = 0; i < boundary_count; i += 2) { | 4315 for (int i = 0; i < boundary_count; i += 2) { |
4292 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | 4316 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
4293 #ifdef DEBUG | 4317 #ifdef DEBUG |
4294 for (int j = bottom; j <= top; j++) { | 4318 for (int j = bottom; j <= top; j++) { |
4295 unsigned current_char = j; | 4319 unsigned current_char = j; |
4296 int length = uncanonicalize.get(current_char, '\0', chars); | 4320 int length = isolate->jsregexp_uncanonicalize()->get(current_char, |
| 4321 '\0', chars); |
4297 for (int k = 0; k < length; k++) { | 4322 for (int k = 0; k < length; k++) { |
4298 ASSERT(chars[k] == current_char); | 4323 ASSERT(chars[k] == current_char); |
4299 } | 4324 } |
4300 } | 4325 } |
4301 #endif | 4326 #endif |
4302 return; | 4327 return; |
4303 } | 4328 } |
4304 } | 4329 } |
4305 | 4330 |
4306 // Step through the range finding equivalent characters. | 4331 // Step through the range finding equivalent characters. |
4307 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | 4332 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
4308 for (int i = bottom; i <= top; i++) { | 4333 for (int i = bottom; i <= top; i++) { |
4309 int length = uncanonicalize.get(i, '\0', chars); | 4334 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); |
4310 for (int j = 0; j < length; j++) { | 4335 for (int j = 0; j < length; j++) { |
4311 uc32 chr = chars[j]; | 4336 uc32 chr = chars[j]; |
4312 if (chr != i && (chr < bottom || chr > top)) { | 4337 if (chr != i && (chr < bottom || chr > top)) { |
4313 characters->Add(chr); | 4338 characters->Add(chr); |
4314 } | 4339 } |
4315 } | 4340 } |
4316 } | 4341 } |
4317 | 4342 |
4318 // Step through the equivalent characters finding simple ranges and | 4343 // Step through the equivalent characters finding simple ranges and |
4319 // adding ranges to the character class. | 4344 // adding ranges to the character class. |
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4821 else | 4846 else |
4822 return empty(); | 4847 return empty(); |
4823 } | 4848 } |
4824 | 4849 |
4825 | 4850 |
4826 // ------------------------------------------------------------------- | 4851 // ------------------------------------------------------------------- |
4827 // Analysis | 4852 // Analysis |
4828 | 4853 |
4829 | 4854 |
4830 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4855 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
4831 StackLimitCheck check; | 4856 StackLimitCheck check(Isolate::Current()); |
4832 if (check.HasOverflowed()) { | 4857 if (check.HasOverflowed()) { |
4833 fail("Stack overflow"); | 4858 fail("Stack overflow"); |
4834 return; | 4859 return; |
4835 } | 4860 } |
4836 if (that->info()->been_analyzed || that->info()->being_analyzed) | 4861 if (that->info()->been_analyzed || that->info()->being_analyzed) |
4837 return; | 4862 return; |
4838 that->info()->being_analyzed = true; | 4863 that->info()->being_analyzed = true; |
4839 that->Accept(this); | 4864 that->Accept(this); |
4840 that->info()->being_analyzed = false; | 4865 that->info()->being_analyzed = false; |
4841 that->info()->been_analyzed = true; | 4866 that->info()->been_analyzed = true; |
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5331 macro_assembler.SetCurrentPositionFromEnd(max_length); | 5356 macro_assembler.SetCurrentPositionFromEnd(max_length); |
5332 } | 5357 } |
5333 | 5358 |
5334 return compiler.Assemble(¯o_assembler, | 5359 return compiler.Assemble(¯o_assembler, |
5335 node, | 5360 node, |
5336 data->capture_count, | 5361 data->capture_count, |
5337 pattern); | 5362 pattern); |
5338 } | 5363 } |
5339 | 5364 |
5340 | 5365 |
5341 int OffsetsVector::static_offsets_vector_[ | |
5342 OffsetsVector::kStaticOffsetsVectorSize]; | |
5343 | |
5344 }} // namespace v8::internal | 5366 }} // namespace v8::internal |
OLD | NEW |