Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: test/mjsunit/harmony/unicode-character-ranges.js

Issue 1578253005: [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: addressed comments Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « test/cctest/test-regexp.cc ('k') | test/mjsunit/harmony/unicode-escapes-in-regexps.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Flags: --harmony-unicode-regexps --harmony-regexp-lookbehind
6
7 function testl(expectation, regexp, subject) {
8 if (regexp instanceof String) regexp = new RegExp(regexp, "u");
9 assertEquals(expectation, regexp.exec(subject));
10 }
11
12 function tests(expectation, string, subject) {
erikcorry 2016/01/20 13:48:51 string -> source or regexp_source
Yang 2016/01/20 14:04:14 Done.
13 testl(expectation, new RegExp(string, "u"), subject);
14 }
15
16 // Character ranges.
17 testl(["A"], /[A-D]/u, "A");
18 tests(["A"], "[A-D]", "A");
19 testl(["ABCD"], /[A-D]+/u, "ZABCDEF");
20 tests(["ABCD"], "[A-D]+", "ZABCDEF");
21
22 testl(["\u{12345}"], /[\u1234-\u{12345}]/u, "\u{12345}");
23 tests(["\u{12345}"], "[\u1234-\u{12345}]", "\u{12345}");
24 testl(null, /[^\u1234-\u{12345}]/u, "\u{12345}");
25 tests(null, "[^\u1234-\u{12345}]", "\u{12345}");
26
27 testl(["\u{1234}"], /[\u1234-\u{12345}]/u, "\u{1234}");
28 tests(["\u{1234}"], "[\u1234-\u{12345}]", "\u{1234}");
29 testl(null, /[^\u1234-\u{12345}]/u, "\u{1234}");
30 tests(null, "[^\u1234-\u{12345}]", "\u{1234}");
31
32 testl(null, /[\u1234-\u{12345}]/u, "\u{1233}");
33 tests(null, "[\u1234-\u{12345}]", "\u{1233}");
34 testl(["\u{1233}"], /[^\u1234-\u{12345}]/u, "\u{1233}");
35 tests(["\u{1233}"], "[^\u1234-\u{12345}]", "\u{1233}");
36
37 testl(["\u{12346}"], /[^\u1234-\u{12345}]/u, "\u{12346}");
38 tests(["\u{12346}"], "[^\u1234-\u{12345}]", "\u{12346}");
39 testl(null, /[\u1234-\u{12345}]/u, "\u{12346}");
40 tests(null, "[\u1234-\u{12345}]", "\u{12346}");
41
42 testl(["\u{12342}"], /[\u{12340}-\u{12345}]/u, "\u{12342}");
43 tests(["\u{12342}"], "[\u{12340}-\u{12345}]", "\u{12342}");
44 testl(null, /[^\u{12340}-\u{12345}]/u, "\u{12342}");
45 tests(null, "[^\u{12340}-\u{12345}]", "\u{12342}");
46
47 testl(["\u{ffff}"], /[\u{ff80}-\u{12345}]/u, "\u{ffff}");
48 tests(["\u{ffff}"], "[\u{ff80}-\u{12345}]", "\u{ffff}");
49 testl(null, /[^\u{ff80}-\u{12345}]/u, "\u{ffff}");
50 tests(null, "[^\u{ff80}-\u{12345}]", "\u{ffff}");
51
52 // Lone surrogate
53 testl(["\ud800"], /[^\u{ff80}-\u{12345}]/u, "\uff99\u{d800}A");
54 tests(["\udc00"], "[^\u{ff80}-\u{12345}]", "\uff99\u{dc00}A");
55 testl(["\udc01"], /[\u0100-\u{10ffff}]/u, "A\udc01");
56 testl(["\udc03"], /[\udc01-\udc03]/u, "\ud801\udc02\udc03");
57 testl(["\ud801"], /[\ud801-\ud803]/u, "\ud802\udc01\ud801");
58
59 // Paired sorrogate.
60 testl(null, /[^\u{ff80}-\u{12345}]/u, "\u{d800}\u{dc00}");
61 tests(null, "[^\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
62 testl(["\ud800\udc00"], /[\u{ff80}-\u{12345}]/u, "\u{d800}\u{dc00}");
63 tests(["\ud800\udc00"], "[\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
64 testl(["foo\u{10e6d}bar"], /foo\ud803\ude6dbar/u, "foo\u{10e6d}bar");
65
66 // Lone surrogates
67 testl(["\ud801\ud801"], /\ud801+/u, "\ud801\udc01\ud801\ud801");
68 testl(["\udc01\udc01"], /\udc01+/u, "\ud801\ud801\udc01\udc01\udc01");
69
70 testl(["\udc02\udc03A"], /\W\WA/u, "\ud801\udc01A\udc02\udc03A");
71 testl(["\ud801\ud802"], /\ud801./u, "\ud801\udc01\ud801\ud802");
72 testl(["\udc02\udc03A"], /[\ud800-\udfff][\ud800-\udfff]A/u,
73 "\ud801\udc01A\udc02\udc03A");
74
75 // Character classes
76 testl(null, /\w/u, "\ud801\udc01");
77 testl(["\ud801"], /[^\w]/, "\ud801\udc01");
78 testl(["\ud801\udc01"], /[^\w]/u, "\ud801\udc01");
79 testl(["\ud801"], /\W/, "\ud801\udc01");
80 testl(["\ud801\udc01"], /\W/u, "\ud801\udc01");
81
82 testl(["\ud800X"], /.X/u, "\ud800XaX");
83 testl(["aX"], /.(?<!\ud800)X/u, "\ud800XaX");
84 testl(["aX"], /.(?<![\ud800-\ud900])X/u, "\ud800XaX");
85
86 testl(null, /[]/u, "\u1234");
87 testl(["0abc"], /[^]abc/u, "0abc");
88 testl(["\u1234abc"], /[^]abc/u, "\u1234abc");
89 testl(["\u{12345}abc"], /[^]abc/u, "\u{12345}abc");
90
91 // Backward matches of lone surrogates.
92 testl(["B", "\ud803A"], /(?<=([\ud800-\ud900]A))B/u,
93 "\ud801\udc00AB\udc00AB\ud802\ud803AB");
erikcorry 2016/01/20 13:48:51 This is saying that if we try to match lone-surrog
Yang 2016/01/20 14:04:14 This is just (a random test) to check that charact
94 testl(["B", "\udc11A"], /(?<=([\udc00-\udd00]A))B/u,
95 "\ud801\udc00AB\udc11AB\ud802\ud803AB");
96 testl(["X", "\ud800C"], /(?<=(\ud800\w))X/u,
97 "\ud800\udc00AX\udc11BX\ud800\ud800CX");
98 testl(["C", "\ud800\ud800"], /(?<=(\ud800.))\w/u,
99 "\ud800\udc00AX\udc11BX\ud800\ud800CX");
100 testl(["X", "\udc01C"], /(?<=(\udc01\w))X/u,
101 "\ud800\udc01AX\udc11BX\udc01\udc01CX");
102 testl(["C", "\udc01\udc01"], /(?<=(\udc01.))./u,
103 "\ud800\udc01AX\udc11BX\udc01\udc01CX");
OLDNEW
« no previous file with comments | « test/cctest/test-regexp.cc ('k') | test/mjsunit/harmony/unicode-escapes-in-regexps.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698