Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-macro-assembler.h" | 5 #include "src/regexp/regexp-macro-assembler.h" |
| 6 | 6 |
| 7 #include "src/assembler.h" | 7 #include "src/assembler.h" |
| 8 #include "src/isolate-inl.h" | 8 #include "src/isolate-inl.h" |
| 9 #include "src/regexp/regexp-stack.h" | 9 #include "src/regexp/regexp-stack.h" |
| 10 #include "src/simulator.h" | 10 #include "src/simulator.h" |
| 11 | 11 |
| 12 #ifdef V8_I18N_SUPPORT | |
| 13 #include "unicode/uchar.h" | |
| 14 #endif // V8_I18N_SUPPORT | |
| 15 | |
| 12 namespace v8 { | 16 namespace v8 { |
| 13 namespace internal { | 17 namespace internal { |
| 14 | 18 |
| 15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) | 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) |
| 16 : slow_safe_compiler_(false), | 20 : slow_safe_compiler_(false), |
| 17 global_mode_(NOT_GLOBAL), | 21 global_mode_(NOT_GLOBAL), |
| 18 isolate_(isolate), | 22 isolate_(isolate), |
| 19 zone_(zone) {} | 23 zone_(zone) {} |
| 20 | 24 |
| 21 | 25 |
| 22 RegExpMacroAssembler::~RegExpMacroAssembler() { | 26 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 23 } | 27 } |
| 24 | 28 |
| 25 | 29 |
| 30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, | |
| 31 Address byte_offset2, | |
| 32 size_t byte_length, | |
| 33 Isolate* isolate) { | |
| 34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
| 35 isolate->regexp_macro_assembler_canonicalize(); | |
| 36 // This function is not allowed to cause a garbage collection. | |
| 37 // A GC might move the calling generated code and invalidate the | |
| 38 // return address on the stack. | |
| 39 DCHECK(byte_length % 2 == 0); | |
| 40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
| 41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
| 42 size_t length = byte_length >> 1; | |
| 43 | |
| 44 #ifdef V8_I18N_SUPPORT | |
| 45 if (isolate == nullptr) { | |
| 46 for (size_t i = 0; i < length; i++) { | |
| 47 uc32 c1 = substring1[i]; | |
| 48 uc32 c2 = substring2[i]; | |
| 49 if (unibrow::Utf16::IsLeadSurrogate(c1)) { | |
|
erikcorry
2016/01/25 10:26:37
This looks incomplete wrt unpaired surrogates at t
Yang
2016/01/25 11:46:37
I think this is correct. Unpaired lead surrogate a
erikcorry
2016/01/25 11:57:47
I guess we are here making use of the fact that th
Yang
2016/01/25 12:09:02
I added a comment about the fact that we rely on N
| |
| 50 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; | |
| 51 if (i + 1 < length) { | |
| 52 uc16 c1t = substring1[i + 1]; | |
| 53 uc16 c2t = substring2[i + 1]; | |
| 54 if (unibrow::Utf16::IsTrailSurrogate(c1t) && | |
| 55 unibrow::Utf16::IsTrailSurrogate(c2t)) { | |
| 56 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); | |
| 57 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); | |
| 58 i++; | |
| 59 } | |
| 60 } | |
| 61 } | |
| 62 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); | |
| 63 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); | |
| 64 if (c1 != c2) return 0; | |
| 65 } | |
| 66 return 1; | |
| 67 } | |
| 68 #endif // V8_I18N_SUPPORT | |
|
erikcorry
2016/01/25 10:26:37
Should this be a #else ?
Yang
2016/01/25 11:46:37
No. If we have ICU, we decide on whether to take t
| |
| 69 DCHECK_NOT_NULL(isolate); | |
| 70 for (size_t i = 0; i < length; i++) { | |
| 71 unibrow::uchar c1 = substring1[i]; | |
| 72 unibrow::uchar c2 = substring2[i]; | |
| 73 if (c1 != c2) { | |
| 74 unibrow::uchar s1[1] = { c1 }; | |
| 75 canonicalize->get(c1, '\0', s1); | |
| 76 if (s1[0] != c2) { | |
| 77 unibrow::uchar s2[1] = { c2 }; | |
| 78 canonicalize->get(c2, '\0', s2); | |
| 79 if (s1[0] != s2[0]) { | |
| 80 return 0; | |
| 81 } | |
| 82 } | |
| 83 } | |
| 84 } | |
| 85 return 1; | |
| 86 } | |
| 87 | |
| 88 | |
| 26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. | 89 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
| 27 | 90 |
| 28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, | 91 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
| 29 Zone* zone) | 92 Zone* zone) |
| 30 : RegExpMacroAssembler(isolate, zone) {} | 93 : RegExpMacroAssembler(isolate, zone) {} |
| 31 | 94 |
| 32 | 95 |
| 33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { | 96 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
| 34 } | 97 } |
| 35 | 98 |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 301 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 302 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 240 | 303 |
| 241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 305 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
| 245 }; | 308 }; |
| 246 | 309 |
| 247 | 310 |
| 248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
| 249 Address byte_offset1, | |
| 250 Address byte_offset2, | |
| 251 size_t byte_length, | |
| 252 Isolate* isolate) { | |
| 253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
| 254 isolate->regexp_macro_assembler_canonicalize(); | |
| 255 // This function is not allowed to cause a garbage collection. | |
| 256 // A GC might move the calling generated code and invalidate the | |
| 257 // return address on the stack. | |
| 258 DCHECK(byte_length % 2 == 0); | |
| 259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
| 260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
| 261 size_t length = byte_length >> 1; | |
| 262 | |
| 263 for (size_t i = 0; i < length; i++) { | |
| 264 unibrow::uchar c1 = substring1[i]; | |
| 265 unibrow::uchar c2 = substring2[i]; | |
| 266 if (c1 != c2) { | |
| 267 unibrow::uchar s1[1] = { c1 }; | |
| 268 canonicalize->get(c1, '\0', s1); | |
| 269 if (s1[0] != c2) { | |
| 270 unibrow::uchar s2[1] = { c2 }; | |
| 271 canonicalize->get(c2, '\0', s2); | |
| 272 if (s1[0] != s2[0]) { | |
| 273 return 0; | |
| 274 } | |
| 275 } | |
| 276 } | |
| 277 } | |
| 278 return 1; | |
| 279 } | |
| 280 | |
| 281 | |
| 282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, | 311 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
| 283 Address* stack_base, | 312 Address* stack_base, |
| 284 Isolate* isolate) { | 313 Isolate* isolate) { |
| 285 RegExpStack* regexp_stack = isolate->regexp_stack(); | 314 RegExpStack* regexp_stack = isolate->regexp_stack(); |
| 286 size_t size = regexp_stack->stack_capacity(); | 315 size_t size = regexp_stack->stack_capacity(); |
| 287 Address old_stack_base = regexp_stack->stack_base(); | 316 Address old_stack_base = regexp_stack->stack_base(); |
| 288 DCHECK(old_stack_base == *stack_base); | 317 DCHECK(old_stack_base == *stack_base); |
| 289 DCHECK(stack_pointer <= old_stack_base); | 318 DCHECK(stack_pointer <= old_stack_base); |
| 290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); | 319 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); |
| 291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); | 320 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); |
| 292 if (new_stack_base == NULL) { | 321 if (new_stack_base == NULL) { |
| 293 return NULL; | 322 return NULL; |
| 294 } | 323 } |
| 295 *stack_base = new_stack_base; | 324 *stack_base = new_stack_base; |
| 296 intptr_t stack_content_size = old_stack_base - stack_pointer; | 325 intptr_t stack_content_size = old_stack_base - stack_pointer; |
| 297 return new_stack_base - stack_content_size; | 326 return new_stack_base - stack_content_size; |
| 298 } | 327 } |
| 299 | 328 |
| 300 #endif // V8_INTERPRETED_REGEXP | 329 #endif // V8_INTERPRETED_REGEXP |
| 301 | 330 |
| 302 } // namespace internal | 331 } // namespace internal |
| 303 } // namespace v8 | 332 } // namespace v8 |
| OLD | NEW |