Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1035)

Side by Side Diff: src/regexp/regexp-macro-assembler.cc

Issue 1599303002: [regexp] implement case-insensitive unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fixes Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-macro-assembler.h" 5 #include "src/regexp/regexp-macro-assembler.h"
6 6
7 #include "src/assembler.h" 7 #include "src/assembler.h"
8 #include "src/isolate-inl.h" 8 #include "src/isolate-inl.h"
9 #include "src/regexp/regexp-stack.h" 9 #include "src/regexp/regexp-stack.h"
10 #include "src/simulator.h" 10 #include "src/simulator.h"
11 11
12 #ifdef V8_I18N_SUPPORT
13 #include "unicode/uchar.h"
14 #endif // V8_I18N_SUPPORT
15
12 namespace v8 { 16 namespace v8 {
13 namespace internal { 17 namespace internal {
14 18
15 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone) 19 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
16 : slow_safe_compiler_(false), 20 : slow_safe_compiler_(false),
17 global_mode_(NOT_GLOBAL), 21 global_mode_(NOT_GLOBAL),
18 isolate_(isolate), 22 isolate_(isolate),
19 zone_(zone) {} 23 zone_(zone) {}
20 24
21 25
22 RegExpMacroAssembler::~RegExpMacroAssembler() { 26 RegExpMacroAssembler::~RegExpMacroAssembler() {
23 } 27 }
24 28
25 29
30 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
31 Address byte_offset2,
32 size_t byte_length,
33 Isolate* isolate) {
34 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
35 isolate->regexp_macro_assembler_canonicalize();
36 // This function is not allowed to cause a garbage collection.
37 // A GC might move the calling generated code and invalidate the
38 // return address on the stack.
39 DCHECK(byte_length % 2 == 0);
40 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
41 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
42 size_t length = byte_length >> 1;
43
44 #ifdef V8_I18N_SUPPORT
45 if (isolate == nullptr) {
46 for (size_t i = 0; i < length; i++) {
47 uc32 c1 = substring1[i];
48 uc32 c2 = substring2[i];
49 if (unibrow::Utf16::IsLeadSurrogate(c1)) {
50 // Non-BMP characters do not have case-equivalents in the BMP.
51 // Both have to be non-BMP for them to be able to match.
52 if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
53 if (i + 1 < length) {
54 uc16 c1t = substring1[i + 1];
55 uc16 c2t = substring2[i + 1];
56 if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
57 unibrow::Utf16::IsTrailSurrogate(c2t)) {
58 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
59 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
60 i++;
61 }
62 }
63 }
64 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
65 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
66 if (c1 != c2) return 0;
67 }
68 return 1;
69 }
70 #endif // V8_I18N_SUPPORT
71 DCHECK_NOT_NULL(isolate);
72 for (size_t i = 0; i < length; i++) {
73 unibrow::uchar c1 = substring1[i];
74 unibrow::uchar c2 = substring2[i];
75 if (c1 != c2) {
76 unibrow::uchar s1[1] = {c1};
77 canonicalize->get(c1, '\0', s1);
78 if (s1[0] != c2) {
79 unibrow::uchar s2[1] = {c2};
80 canonicalize->get(c2, '\0', s2);
81 if (s1[0] != s2[0]) {
82 return 0;
83 }
84 }
85 }
86 }
87 return 1;
88 }
89
90
26 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. 91 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
27 92
28 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, 93 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
29 Zone* zone) 94 Zone* zone)
30 : RegExpMacroAssembler(isolate, zone) {} 95 : RegExpMacroAssembler(isolate, zone) {}
31 96
32 97
33 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { 98 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
34 } 99 }
35 100
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 303 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
239 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
240 305
241 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
242 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
243 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 308 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
244 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 309 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
245 }; 310 };
246 311
247 312
248 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
249 Address byte_offset1,
250 Address byte_offset2,
251 size_t byte_length,
252 Isolate* isolate) {
253 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
254 isolate->regexp_macro_assembler_canonicalize();
255 // This function is not allowed to cause a garbage collection.
256 // A GC might move the calling generated code and invalidate the
257 // return address on the stack.
258 DCHECK(byte_length % 2 == 0);
259 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
260 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
261 size_t length = byte_length >> 1;
262
263 for (size_t i = 0; i < length; i++) {
264 unibrow::uchar c1 = substring1[i];
265 unibrow::uchar c2 = substring2[i];
266 if (c1 != c2) {
267 unibrow::uchar s1[1] = { c1 };
268 canonicalize->get(c1, '\0', s1);
269 if (s1[0] != c2) {
270 unibrow::uchar s2[1] = { c2 };
271 canonicalize->get(c2, '\0', s2);
272 if (s1[0] != s2[0]) {
273 return 0;
274 }
275 }
276 }
277 }
278 return 1;
279 }
280
281
282 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, 313 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
283 Address* stack_base, 314 Address* stack_base,
284 Isolate* isolate) { 315 Isolate* isolate) {
285 RegExpStack* regexp_stack = isolate->regexp_stack(); 316 RegExpStack* regexp_stack = isolate->regexp_stack();
286 size_t size = regexp_stack->stack_capacity(); 317 size_t size = regexp_stack->stack_capacity();
287 Address old_stack_base = regexp_stack->stack_base(); 318 Address old_stack_base = regexp_stack->stack_base();
288 DCHECK(old_stack_base == *stack_base); 319 DCHECK(old_stack_base == *stack_base);
289 DCHECK(stack_pointer <= old_stack_base); 320 DCHECK(stack_pointer <= old_stack_base);
290 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size); 321 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
291 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); 322 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
292 if (new_stack_base == NULL) { 323 if (new_stack_base == NULL) {
293 return NULL; 324 return NULL;
294 } 325 }
295 *stack_base = new_stack_base; 326 *stack_base = new_stack_base;
296 intptr_t stack_content_size = old_stack_base - stack_pointer; 327 intptr_t stack_content_size = old_stack_base - stack_pointer;
297 return new_stack_base - stack_content_size; 328 return new_stack_base - stack_content_size;
298 } 329 }
299 330
300 #endif // V8_INTERPRETED_REGEXP 331 #endif // V8_INTERPRETED_REGEXP
301 332
302 } // namespace internal 333 } // namespace internal
303 } // namespace v8 334 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698