| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-macro-assembler.h" | 5 #include "src/regexp/regexp-macro-assembler.h" |
| 6 | 6 |
| 7 #include "src/assembler.h" | 7 #include "src/assembler.h" |
| 8 #include "src/isolate-inl.h" | 8 #include "src/isolate-inl.h" |
| 9 #include "src/regexp/regexp-stack.h" | 9 #include "src/regexp/regexp-stack.h" |
| 10 #include "src/simulator.h" | 10 #include "src/simulator.h" |
| 11 | 11 |
| 12 #ifdef V8_I18N_SUPPORT |
| 13 #include "unicode/uchar.h" |
| 14 #endif // V8_I18N_SUPPORT |
| 15 |
| 12 namespace v8 { | 16 namespace v8 { |
| 13 namespace internal { | 17 namespace internal { |
| 14 | 18 |
| 15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) | 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) |
| 16 : slow_safe_compiler_(false), | 20 : slow_safe_compiler_(false), |
| 17 global_mode_(NOT_GLOBAL), | 21 global_mode_(NOT_GLOBAL), |
| 18 isolate_(isolate), | 22 isolate_(isolate), |
| 19 zone_(zone) {} | 23 zone_(zone) {} |
| 20 | 24 |
| 21 | 25 |
| 22 RegExpMacroAssembler::~RegExpMacroAssembler() { | 26 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 23 } | 27 } |
| 24 | 28 |
| 25 | 29 |
| 30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, |
| 31 Address byte_offset2, |
| 32 size_t byte_length, |
| 33 Isolate* isolate) { |
| 34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| 35 isolate->regexp_macro_assembler_canonicalize(); |
| 36 // This function is not allowed to cause a garbage collection. |
| 37 // A GC might move the calling generated code and invalidate the |
| 38 // return address on the stack. |
| 39 DCHECK(byte_length % 2 == 0); |
| 40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
| 41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
| 42 size_t length = byte_length >> 1; |
| 43 |
| 44 #ifdef V8_I18N_SUPPORT |
| 45 if (isolate == nullptr) { |
| 46 for (size_t i = 0; i < length; i++) { |
| 47 uc32 c1 = substring1[i]; |
| 48 uc32 c2 = substring2[i]; |
| 49 if (unibrow::Utf16::IsLeadSurrogate(c1)) { |
| 50 // Non-BMP characters do not have case-equivalents in the BMP. |
| 51 // Both have to be non-BMP for them to be able to match. |
| 52 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; |
| 53 if (i + 1 < length) { |
| 54 uc16 c1t = substring1[i + 1]; |
| 55 uc16 c2t = substring2[i + 1]; |
| 56 if (unibrow::Utf16::IsTrailSurrogate(c1t) && |
| 57 unibrow::Utf16::IsTrailSurrogate(c2t)) { |
| 58 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); |
| 59 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); |
| 60 i++; |
| 61 } |
| 62 } |
| 63 } |
| 64 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); |
| 65 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); |
| 66 if (c1 != c2) return 0; |
| 67 } |
| 68 return 1; |
| 69 } |
| 70 #endif // V8_I18N_SUPPORT |
| 71 DCHECK_NOT_NULL(isolate); |
| 72 for (size_t i = 0; i < length; i++) { |
| 73 unibrow::uchar c1 = substring1[i]; |
| 74 unibrow::uchar c2 = substring2[i]; |
| 75 if (c1 != c2) { |
| 76 unibrow::uchar s1[1] = {c1}; |
| 77 canonicalize->get(c1, '\0', s1); |
| 78 if (s1[0] != c2) { |
| 79 unibrow::uchar s2[1] = {c2}; |
| 80 canonicalize->get(c2, '\0', s2); |
| 81 if (s1[0] != s2[0]) { |
| 82 return 0; |
| 83 } |
| 84 } |
| 85 } |
| 86 } |
| 87 return 1; |
| 88 } |
| 89 |
| 90 |
| 26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. | 91 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
| 27 | 92 |
| 28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, | 93 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
| 29 Zone* zone) | 94 Zone* zone) |
| 30 : RegExpMacroAssembler(isolate, zone) {} | 95 : RegExpMacroAssembler(isolate, zone) {} |
| 31 | 96 |
| 32 | 97 |
| 33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { | 98 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
| 34 } | 99 } |
| 35 | 100 |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 303 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 240 | 305 |
| 241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 308 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 309 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 245 }; | 310 }; |
| 246 | 311 |
| 247 | 312 |
| 248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
| 249 Address byte_offset1, | |
| 250 Address byte_offset2, | |
| 251 size_t byte_length, | |
| 252 Isolate* isolate) { | |
| 253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
| 254 isolate->regexp_macro_assembler_canonicalize(); | |
| 255 // This function is not allowed to cause a garbage collection. | |
| 256 // A GC might move the calling generated code and invalidate the | |
| 257 // return address on the stack. | |
| 258 DCHECK(byte_length % 2 == 0); | |
| 259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
| 260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
| 261 size_t length = byte_length >> 1; | |
| 262 | |
| 263 for (size_t i = 0; i < length; i++) { | |
| 264 unibrow::uchar c1 = substring1[i]; | |
| 265 unibrow::uchar c2 = substring2[i]; | |
| 266 if (c1 != c2) { | |
| 267 unibrow::uchar s1[1] = { c1 }; | |
| 268 canonicalize->get(c1, '\0', s1); | |
| 269 if (s1[0] != c2) { | |
| 270 unibrow::uchar s2[1] = { c2 }; | |
| 271 canonicalize->get(c2, '\0', s2); | |
| 272 if (s1[0] != s2[0]) { | |
| 273 return 0; | |
| 274 } | |
| 275 } | |
| 276 } | |
| 277 } | |
| 278 return 1; | |
| 279 } | |
| 280 | |
| 281 | |
| 282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, | 313 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
| 283 Address* stack_base, | 314 Address* stack_base, |
| 284 Isolate* isolate) { | 315 Isolate* isolate) { |
| 285 RegExpStack* regexp_stack = isolate->regexp_stack(); | 316 RegExpStack* regexp_stack = isolate->regexp_stack(); |
| 286 size_t size = regexp_stack->stack_capacity(); | 317 size_t size = regexp_stack->stack_capacity(); |
| 287 Address old_stack_base = regexp_stack->stack_base(); | 318 Address old_stack_base = regexp_stack->stack_base(); |
| 288 DCHECK(old_stack_base == *stack_base); | 319 DCHECK(old_stack_base == *stack_base); |
| 289 DCHECK(stack_pointer <= old_stack_base); | 320 DCHECK(stack_pointer <= old_stack_base); |
| 290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); | 321 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); |
| 291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); | 322 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); |
| 292 if (new_stack_base == NULL) { | 323 if (new_stack_base == NULL) { |
| 293 return NULL; | 324 return NULL; |
| 294 } | 325 } |
| 295 *stack_base = new_stack_base; | 326 *stack_base = new_stack_base; |
| 296 intptr_t stack_content_size = old_stack_base - stack_pointer; | 327 intptr_t stack_content_size = old_stack_base - stack_pointer; |
| 297 return new_stack_base - stack_content_size; | 328 return new_stack_base - stack_content_size; |
| 298 } | 329 } |
| 299 | 330 |
| 300 #endif // V8_INTERPRETED_REGEXP | 331 #endif // V8_INTERPRETED_REGEXP |
| 301 | 332 |
| 302 } // namespace internal | 333 } // namespace internal |
| 303 } // namespace v8 | 334 } // namespace v8 |
| OLD | NEW |