OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-macro-assembler.h" | 5 #include "src/regexp/regexp-macro-assembler.h" |
6 | 6 |
7 #include "src/assembler.h" | 7 #include "src/assembler.h" |
8 #include "src/isolate-inl.h" | 8 #include "src/isolate-inl.h" |
9 #include "src/regexp/regexp-stack.h" | 9 #include "src/regexp/regexp-stack.h" |
10 #include "src/simulator.h" | 10 #include "src/simulator.h" |
11 | 11 |
| 12 #ifdef V8_I18N_SUPPORT |
| 13 #include "unicode/uchar.h" |
| 14 #endif // V8_I18N_SUPPORT |
| 15 |
12 namespace v8 { | 16 namespace v8 { |
13 namespace internal { | 17 namespace internal { |
14 | 18 |
15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) | 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) |
16 : slow_safe_compiler_(false), | 20 : slow_safe_compiler_(false), |
17 global_mode_(NOT_GLOBAL), | 21 global_mode_(NOT_GLOBAL), |
18 isolate_(isolate), | 22 isolate_(isolate), |
19 zone_(zone) {} | 23 zone_(zone) {} |
20 | 24 |
21 | 25 |
22 RegExpMacroAssembler::~RegExpMacroAssembler() { | 26 RegExpMacroAssembler::~RegExpMacroAssembler() { |
23 } | 27 } |
24 | 28 |
25 | 29 |
| 30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, |
| 31 Address byte_offset2, |
| 32 size_t byte_length, |
| 33 Isolate* isolate) { |
| 34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| 35 isolate->regexp_macro_assembler_canonicalize(); |
| 36 // This function is not allowed to cause a garbage collection. |
| 37 // A GC might move the calling generated code and invalidate the |
| 38 // return address on the stack. |
| 39 DCHECK(byte_length % 2 == 0); |
| 40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
| 41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
| 42 size_t length = byte_length >> 1; |
| 43 |
| 44 #ifdef V8_I18N_SUPPORT |
| 45 if (isolate == nullptr) { |
| 46 for (size_t i = 0; i < length; i++) { |
| 47 uc32 c1 = substring1[i]; |
| 48 uc32 c2 = substring2[i]; |
| 49 if (unibrow::Utf16::IsLeadSurrogate(c1)) { |
| 50 // Non-BMP characters do not have case-equivalents in the BMP. |
| 51 // Both have to be non-BMP for them to be able to match. |
| 52 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; |
| 53 if (i + 1 < length) { |
| 54 uc16 c1t = substring1[i + 1]; |
| 55 uc16 c2t = substring2[i + 1]; |
| 56 if (unibrow::Utf16::IsTrailSurrogate(c1t) && |
| 57 unibrow::Utf16::IsTrailSurrogate(c2t)) { |
| 58 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); |
| 59 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); |
| 60 i++; |
| 61 } |
| 62 } |
| 63 } |
| 64 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); |
| 65 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); |
| 66 if (c1 != c2) return 0; |
| 67 } |
| 68 return 1; |
| 69 } |
| 70 #endif // V8_I18N_SUPPORT |
| 71 DCHECK_NOT_NULL(isolate); |
| 72 for (size_t i = 0; i < length; i++) { |
| 73 unibrow::uchar c1 = substring1[i]; |
| 74 unibrow::uchar c2 = substring2[i]; |
| 75 if (c1 != c2) { |
| 76 unibrow::uchar s1[1] = {c1}; |
| 77 canonicalize->get(c1, '\0', s1); |
| 78 if (s1[0] != c2) { |
| 79 unibrow::uchar s2[1] = {c2}; |
| 80 canonicalize->get(c2, '\0', s2); |
| 81 if (s1[0] != s2[0]) { |
| 82 return 0; |
| 83 } |
| 84 } |
| 85 } |
| 86 } |
| 87 return 1; |
| 88 } |
| 89 |
| 90 |
26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. | 91 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
27 | 92 |
28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, | 93 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
29 Zone* zone) | 94 Zone* zone) |
30 : RegExpMacroAssembler(isolate, zone) {} | 95 : RegExpMacroAssembler(isolate, zone) {} |
31 | 96 |
32 | 97 |
33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { | 98 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
34 } | 99 } |
35 | 100 |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 303 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
240 | 305 |
241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 308 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 309 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
245 }; | 310 }; |
246 | 311 |
247 | 312 |
248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
249 Address byte_offset1, | |
250 Address byte_offset2, | |
251 size_t byte_length, | |
252 Isolate* isolate) { | |
253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
254 isolate->regexp_macro_assembler_canonicalize(); | |
255 // This function is not allowed to cause a garbage collection. | |
256 // A GC might move the calling generated code and invalidate the | |
257 // return address on the stack. | |
258 DCHECK(byte_length % 2 == 0); | |
259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
261 size_t length = byte_length >> 1; | |
262 | |
263 for (size_t i = 0; i < length; i++) { | |
264 unibrow::uchar c1 = substring1[i]; | |
265 unibrow::uchar c2 = substring2[i]; | |
266 if (c1 != c2) { | |
267 unibrow::uchar s1[1] = { c1 }; | |
268 canonicalize->get(c1, '\0', s1); | |
269 if (s1[0] != c2) { | |
270 unibrow::uchar s2[1] = { c2 }; | |
271 canonicalize->get(c2, '\0', s2); | |
272 if (s1[0] != s2[0]) { | |
273 return 0; | |
274 } | |
275 } | |
276 } | |
277 } | |
278 return 1; | |
279 } | |
280 | |
281 | |
282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, | 313 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
283 Address* stack_base, | 314 Address* stack_base, |
284 Isolate* isolate) { | 315 Isolate* isolate) { |
285 RegExpStack* regexp_stack = isolate->regexp_stack(); | 316 RegExpStack* regexp_stack = isolate->regexp_stack(); |
286 size_t size = regexp_stack->stack_capacity(); | 317 size_t size = regexp_stack->stack_capacity(); |
287 Address old_stack_base = regexp_stack->stack_base(); | 318 Address old_stack_base = regexp_stack->stack_base(); |
288 DCHECK(old_stack_base == *stack_base); | 319 DCHECK(old_stack_base == *stack_base); |
289 DCHECK(stack_pointer <= old_stack_base); | 320 DCHECK(stack_pointer <= old_stack_base); |
290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); | 321 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); |
291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); | 322 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); |
292 if (new_stack_base == NULL) { | 323 if (new_stack_base == NULL) { |
293 return NULL; | 324 return NULL; |
294 } | 325 } |
295 *stack_base = new_stack_base; | 326 *stack_base = new_stack_base; |
296 intptr_t stack_content_size = old_stack_base - stack_pointer; | 327 intptr_t stack_content_size = old_stack_base - stack_pointer; |
297 return new_stack_base - stack_content_size; | 328 return new_stack_base - stack_content_size; |
298 } | 329 } |
299 | 330 |
300 #endif // V8_INTERPRETED_REGEXP | 331 #endif // V8_INTERPRETED_REGEXP |
301 | 332 |
302 } // namespace internal | 333 } // namespace internal |
303 } // namespace v8 | 334 } // namespace v8 |
OLD | NEW |