Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/regexp/regexp-macro-assembler.cc

Issue 1599303002: [regexp] implement case-insensitive unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fix mips Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-macro-assembler.h" 5 #include "src/regexp/regexp-macro-assembler.h"
6 6
7 #include "src/assembler.h" 7 #include "src/assembler.h"
8 #include "src/isolate-inl.h" 8 #include "src/isolate-inl.h"
9 #include "src/regexp/regexp-stack.h" 9 #include "src/regexp/regexp-stack.h"
10 #include "src/simulator.h" 10 #include "src/simulator.h"
11 11
12 #ifdef V8_I18N_SUPPORT
13 #include "unicode/uchar.h"
14 #endif // V8_I18N_SUPPORT
15
12 namespace v8 { 16 namespace v8 {
13 namespace internal { 17 namespace internal {
14 18
15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
16 : slow_safe_compiler_(false), 20 : slow_safe_compiler_(false),
17 global_mode_(NOT_GLOBAL), 21 global_mode_(NOT_GLOBAL),
18 isolate_(isolate), 22 isolate_(isolate),
19 zone_(zone) {} 23 zone_(zone) {}
20 24
21 25
22 RegExpMacroAssembler::~RegExpMacroAssembler() { 26 RegExpMacroAssembler::~RegExpMacroAssembler() {
23 } 27 }
24 28
25 29
30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
31 Address byte_offset2,
32 size_t byte_length,
33 Isolate* isolate) {
34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
35 isolate->regexp_macro_assembler_canonicalize();
36 // This function is not allowed to cause a garbage collection.
37 // A GC might move the calling generated code and invalidate the
38 // return address on the stack.
39 DCHECK(byte_length % 2 == 0);
40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
42 size_t length = byte_length >> 1;
43
44 #ifdef V8_I18N_SUPPORT
45 if (isolate == nullptr) {
46 for (size_t i = 0; i < length; i++) {
47 uc32 c1 = substring1[i];
48 uc32 c2 = substring2[i];
49 if (unibrow::Utf16::IsLeadSurrogate(c1)) {
erikcorry 2016/01/25 10:26:37 This looks incomplete wrt unpaired surrogates at t
Yang 2016/01/25 11:46:37 I think this is correct. Unpaired lead surrogate a
erikcorry 2016/01/25 11:57:47 I guess we are here making use of the fact that th
Yang 2016/01/25 12:09:02 I added a comment about the fact that we rely on N
50 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
51 if (i + 1 < length) {
52 uc16 c1t = substring1[i + 1];
53 uc16 c2t = substring2[i + 1];
54 if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
55 unibrow::Utf16::IsTrailSurrogate(c2t)) {
56 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
57 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
58 i++;
59 }
60 }
61 }
62 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
63 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
64 if (c1 != c2) return 0;
65 }
66 return 1;
67 }
68 #endif // V8_I18N_SUPPORT
erikcorry 2016/01/25 10:26:37 Should this be a #else ?
Yang 2016/01/25 11:46:37 No. If we have ICU, we decide on whether to take t
69 DCHECK_NOT_NULL(isolate);
70 for (size_t i = 0; i < length; i++) {
71 unibrow::uchar c1 = substring1[i];
72 unibrow::uchar c2 = substring2[i];
73 if (c1 != c2) {
74 unibrow::uchar s1[1] = { c1 };
75 canonicalize->get(c1, '\0', s1);
76 if (s1[0] != c2) {
77 unibrow::uchar s2[1] = { c2 };
78 canonicalize->get(c2, '\0', s2);
79 if (s1[0] != s2[0]) {
80 return 0;
81 }
82 }
83 }
84 }
85 return 1;
86 }
87
88
26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. 89 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
27 90
28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, 91 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
29 Zone* zone) 92 Zone* zone)
30 : RegExpMacroAssembler(isolate, zone) {} 93 : RegExpMacroAssembler(isolate, zone) {}
31 94
32 95
33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { 96 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
34 } 97 }
35 98
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 301 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 302 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
240 303
241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 305 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
245 }; 308 };
246 309
247 310
248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
249 Address byte_offset1,
250 Address byte_offset2,
251 size_t byte_length,
252 Isolate* isolate) {
253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
254 isolate->regexp_macro_assembler_canonicalize();
255 // This function is not allowed to cause a garbage collection.
256 // A GC might move the calling generated code and invalidate the
257 // return address on the stack.
258 DCHECK(byte_length % 2 == 0);
259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
261 size_t length = byte_length >> 1;
262
263 for (size_t i = 0; i < length; i++) {
264 unibrow::uchar c1 = substring1[i];
265 unibrow::uchar c2 = substring2[i];
266 if (c1 != c2) {
267 unibrow::uchar s1[1] = { c1 };
268 canonicalize->get(c1, '\0', s1);
269 if (s1[0] != c2) {
270 unibrow::uchar s2[1] = { c2 };
271 canonicalize->get(c2, '\0', s2);
272 if (s1[0] != s2[0]) {
273 return 0;
274 }
275 }
276 }
277 }
278 return 1;
279 }
280
281
282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, 311 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
283 Address* stack_base, 312 Address* stack_base,
284 Isolate* isolate) { 313 Isolate* isolate) {
285 RegExpStack* regexp_stack = isolate->regexp_stack(); 314 RegExpStack* regexp_stack = isolate->regexp_stack();
286 size_t size = regexp_stack->stack_capacity(); 315 size_t size = regexp_stack->stack_capacity();
287 Address old_stack_base = regexp_stack->stack_base(); 316 Address old_stack_base = regexp_stack->stack_base();
288 DCHECK(old_stack_base == *stack_base); 317 DCHECK(old_stack_base == *stack_base);
289 DCHECK(stack_pointer <= old_stack_base); 318 DCHECK(stack_pointer <= old_stack_base);
290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); 319 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); 320 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
292 if (new_stack_base == NULL) { 321 if (new_stack_base == NULL) {
293 return NULL; 322 return NULL;
294 } 323 }
295 *stack_base = new_stack_base; 324 *stack_base = new_stack_base;
296 intptr_t stack_content_size = old_stack_base - stack_pointer; 325 intptr_t stack_content_size = old_stack_base - stack_pointer;
297 return new_stack_base - stack_content_size; 326 return new_stack_base - stack_content_size;
298 } 327 }
299 328
300 #endif // V8_INTERPRETED_REGEXP 329 #endif // V8_INTERPRETED_REGEXP
301 330
302 } // namespace internal 331 } // namespace internal
303 } // namespace v8 332 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698