OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-macro-assembler.h" | 5 #include "src/regexp/regexp-macro-assembler.h" |
6 | 6 |
7 #include "src/assembler.h" | 7 #include "src/assembler.h" |
8 #include "src/isolate-inl.h" | 8 #include "src/isolate-inl.h" |
9 #include "src/regexp/regexp-stack.h" | 9 #include "src/regexp/regexp-stack.h" |
10 #include "src/simulator.h" | 10 #include "src/simulator.h" |
11 | 11 |
12 #ifdef V8_I18N_SUPPORT | |
13 #include "unicode/uchar.h" | |
14 #endif // V8_I18N_SUPPORT | |
15 | |
12 namespace v8 { | 16 namespace v8 { |
13 namespace internal { | 17 namespace internal { |
14 | 18 |
15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) | 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) |
16 : slow_safe_compiler_(false), | 20 : slow_safe_compiler_(false), |
17 global_mode_(NOT_GLOBAL), | 21 global_mode_(NOT_GLOBAL), |
18 isolate_(isolate), | 22 isolate_(isolate), |
19 zone_(zone) {} | 23 zone_(zone) {} |
20 | 24 |
21 | 25 |
22 RegExpMacroAssembler::~RegExpMacroAssembler() { | 26 RegExpMacroAssembler::~RegExpMacroAssembler() { |
23 } | 27 } |
24 | 28 |
25 | 29 |
30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, | |
31 Address byte_offset2, | |
32 size_t byte_length, | |
33 Isolate* isolate) { | |
34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
35 isolate->regexp_macro_assembler_canonicalize(); | |
36 // This function is not allowed to cause a garbage collection. | |
37 // A GC might move the calling generated code and invalidate the | |
38 // return address on the stack. | |
39 DCHECK(byte_length % 2 == 0); | |
40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
42 size_t length = byte_length >> 1; | |
43 | |
44 #ifdef V8_I18N_SUPPORT | |
45 if (isolate == nullptr) { | |
46 for (size_t i = 0; i < length; i++) { | |
47 uc32 c1 = substring1[i]; | |
48 uc32 c2 = substring2[i]; | |
49 if (unibrow::Utf16::IsLeadSurrogate(c1)) { | |
erikcorry
2016/01/25 10:26:37
This looks incomplete wrt unpaired surrogates at t
Yang
2016/01/25 11:46:37
I think this is correct. Unpaired lead surrogate a
erikcorry
2016/01/25 11:57:47
I guess we are here making use of the fact that th
Yang
2016/01/25 12:09:02
I added a comment about the fact that we rely on N
| |
50 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; | |
51 if (i + 1 < length) { | |
52 uc16 c1t = substring1[i + 1]; | |
53 uc16 c2t = substring2[i + 1]; | |
54 if (unibrow::Utf16::IsTrailSurrogate(c1t) && | |
55 unibrow::Utf16::IsTrailSurrogate(c2t)) { | |
56 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); | |
57 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); | |
58 i++; | |
59 } | |
60 } | |
61 } | |
62 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); | |
63 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); | |
64 if (c1 != c2) return 0; | |
65 } | |
66 return 1; | |
67 } | |
68 #endif // V8_I18N_SUPPORT | |
erikcorry
2016/01/25 10:26:37
Should this be a #else ?
Yang
2016/01/25 11:46:37
No. If we have ICU, we decide on whether to take t
| |
69 DCHECK_NOT_NULL(isolate); | |
70 for (size_t i = 0; i < length; i++) { | |
71 unibrow::uchar c1 = substring1[i]; | |
72 unibrow::uchar c2 = substring2[i]; | |
73 if (c1 != c2) { | |
74 unibrow::uchar s1[1] = { c1 }; | |
75 canonicalize->get(c1, '\0', s1); | |
76 if (s1[0] != c2) { | |
77 unibrow::uchar s2[1] = { c2 }; | |
78 canonicalize->get(c2, '\0', s2); | |
79 if (s1[0] != s2[0]) { | |
80 return 0; | |
81 } | |
82 } | |
83 } | |
84 } | |
85 return 1; | |
86 } | |
87 | |
88 | |
26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. | 89 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
27 | 90 |
28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, | 91 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
29 Zone* zone) | 92 Zone* zone) |
30 : RegExpMacroAssembler(isolate, zone) {} | 93 : RegExpMacroAssembler(isolate, zone) {} |
31 | 94 |
32 | 95 |
33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { | 96 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { |
34 } | 97 } |
35 | 98 |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 301 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 302 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
240 | 303 |
241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 305 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, | 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, |
245 }; | 308 }; |
246 | 309 |
247 | 310 |
248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
249 Address byte_offset1, | |
250 Address byte_offset2, | |
251 size_t byte_length, | |
252 Isolate* isolate) { | |
253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = | |
254 isolate->regexp_macro_assembler_canonicalize(); | |
255 // This function is not allowed to cause a garbage collection. | |
256 // A GC might move the calling generated code and invalidate the | |
257 // return address on the stack. | |
258 DCHECK(byte_length % 2 == 0); | |
259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); | |
260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); | |
261 size_t length = byte_length >> 1; | |
262 | |
263 for (size_t i = 0; i < length; i++) { | |
264 unibrow::uchar c1 = substring1[i]; | |
265 unibrow::uchar c2 = substring2[i]; | |
266 if (c1 != c2) { | |
267 unibrow::uchar s1[1] = { c1 }; | |
268 canonicalize->get(c1, '\0', s1); | |
269 if (s1[0] != c2) { | |
270 unibrow::uchar s2[1] = { c2 }; | |
271 canonicalize->get(c2, '\0', s2); | |
272 if (s1[0] != s2[0]) { | |
273 return 0; | |
274 } | |
275 } | |
276 } | |
277 } | |
278 return 1; | |
279 } | |
280 | |
281 | |
282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, | 311 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
283 Address* stack_base, | 312 Address* stack_base, |
284 Isolate* isolate) { | 313 Isolate* isolate) { |
285 RegExpStack* regexp_stack = isolate->regexp_stack(); | 314 RegExpStack* regexp_stack = isolate->regexp_stack(); |
286 size_t size = regexp_stack->stack_capacity(); | 315 size_t size = regexp_stack->stack_capacity(); |
287 Address old_stack_base = regexp_stack->stack_base(); | 316 Address old_stack_base = regexp_stack->stack_base(); |
288 DCHECK(old_stack_base == *stack_base); | 317 DCHECK(old_stack_base == *stack_base); |
289 DCHECK(stack_pointer <= old_stack_base); | 318 DCHECK(stack_pointer <= old_stack_base); |
290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); | 319 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); |
291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); | 320 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); |
292 if (new_stack_base == NULL) { | 321 if (new_stack_base == NULL) { |
293 return NULL; | 322 return NULL; |
294 } | 323 } |
295 *stack_base = new_stack_base; | 324 *stack_base = new_stack_base; |
296 intptr_t stack_content_size = old_stack_base - stack_pointer; | 325 intptr_t stack_content_size = old_stack_base - stack_pointer; |
297 return new_stack_base - stack_content_size; | 326 return new_stack_base - stack_content_size; |
298 } | 327 } |
299 | 328 |
300 #endif // V8_INTERPRETED_REGEXP | 329 #endif // V8_INTERPRETED_REGEXP |
301 | 330 |
302 } // namespace internal | 331 } // namespace internal |
303 } // namespace v8 | 332 } // namespace v8 |
OLD | NEW |