OLD | NEW |
| (Empty) |
1 // Copyright 2014 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // ES6 extends the \uxxxx escape and also allows \u{xxxxx}. | |
6 | |
7 // Flags: --harmony-unicode-regexps | |
8 | |
9 function testRegexpHelper(r) { | |
10 assertTrue(r.test("foo")); | |
11 assertTrue(r.test("boo")); | |
12 assertFalse(r.test("moo")); | |
13 } | |
14 | |
15 | |
16 (function TestUnicodeEscapes() { | |
17 testRegexpHelper(/(\u0066|\u0062)oo/); | |
18 testRegexpHelper(/(\u0066|\u0062)oo/u); | |
19 testRegexpHelper(/(\u{0066}|\u{0062})oo/u); | |
20 testRegexpHelper(/(\u{66}|\u{000062})oo/u); | |
21 | |
22 // Note that we need \\ inside a string, otherwise it's interpreted as a | |
23 // unicode escape inside a string. | |
24 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo")); | |
25 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u")); | |
26 testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u")); | |
27 testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u")); | |
28 | |
29 // Though, unicode escapes via strings should work too. | |
30 testRegexpHelper(new RegExp("(\u0066|\u0062)oo")); | |
31 testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u")); | |
32 testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u")); | |
33 testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u")); | |
34 })(); | |
35 | |
36 | |
37 (function TestUnicodeEscapesInCharacterClasses() { | |
38 testRegexpHelper(/[\u0062-\u0066]oo/); | |
39 testRegexpHelper(/[\u0062-\u0066]oo/u); | |
40 testRegexpHelper(/[\u{0062}-\u{0066}]oo/u); | |
41 testRegexpHelper(/[\u{62}-\u{00000066}]oo/u); | |
42 | |
43 // Note that we need \\ inside a string, otherwise it's interpreted as a | |
44 // unicode escape inside a string. | |
45 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo")); | |
46 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u")); | |
47 testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u")); | |
48 testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u")); | |
49 | |
50 // Though, unicode escapes via strings should work too. | |
51 testRegexpHelper(new RegExp("[\u0062-\u0066]oo")); | |
52 testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u")); | |
53 testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u")); | |
54 testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u")); | |
55 })(); | |
56 | |
57 | |
58 (function TestBraceEscapesWithoutUnicodeFlag() { | |
59 // \u followed by illegal escape will be parsed as u. {x} will be the | |
60 // character count. | |
61 function helper1(r) { | |
62 assertFalse(r.test("fbar")); | |
63 assertFalse(r.test("fubar")); | |
64 assertTrue(r.test("fuubar")); | |
65 assertFalse(r.test("fuuubar")); | |
66 } | |
67 helper1(/f\u{2}bar/); | |
68 helper1(new RegExp("f\\u{2}bar")); | |
69 | |
70 function helper2(r) { | |
71 assertFalse(r.test("fbar")); | |
72 assertTrue(r.test("fubar")); | |
73 assertTrue(r.test("fuubar")); | |
74 assertFalse(r.test("fuuubar")); | |
75 } | |
76 | |
77 helper2(/f\u{1,2}bar/); | |
78 helper2(new RegExp("f\\u{1,2}bar")); | |
79 | |
80 function helper3(r) { | |
81 assertTrue(r.test("u")); | |
82 assertTrue(r.test("{")); | |
83 assertTrue(r.test("2")); | |
84 assertTrue(r.test("}")); | |
85 assertFalse(r.test("q")); | |
86 assertFalse(r.test("(")); | |
87 assertFalse(r.test(")")); | |
88 } | |
89 helper3(/[\u{2}]/); | |
90 helper3(new RegExp("[\\u{2}]")); | |
91 })(); | |
92 | |
93 | |
94 (function TestInvalidEscapes() { | |
95 // Without the u flag, invalid unicode escapes and other invalid escapes are | |
96 // treated as identity escapes. | |
97 function helper1(r) { | |
98 assertTrue(r.test("firstuxz89second")); | |
99 } | |
100 helper1(/first\u\x\z\8\9second/); | |
101 helper1(new RegExp("first\\u\\x\\z\\8\\9second")); | |
102 | |
103 function helper2(r) { | |
104 assertTrue(r.test("u")); | |
105 assertTrue(r.test("x")); | |
106 assertTrue(r.test("z")); | |
107 assertTrue(r.test("8")); | |
108 assertTrue(r.test("9")); | |
109 assertFalse(r.test("q")); | |
110 assertFalse(r.test("7")); | |
111 } | |
112 helper2(/[\u\x\z\8\9]/); | |
113 helper2(new RegExp("[\\u\\x\\z\\8\\9]")); | |
114 | |
115 // However, with the u flag, these are treated as invalid escapes. | |
116 assertThrows("/\\u/u", SyntaxError); | |
117 assertThrows("/\\u12/u", SyntaxError); | |
118 assertThrows("/\\ufoo/u", SyntaxError); | |
119 assertThrows("/\\x/u", SyntaxError); | |
120 assertThrows("/\\xfoo/u", SyntaxError); | |
121 assertThrows("/\\z/u", SyntaxError); | |
122 assertThrows("/\\8/u", SyntaxError); | |
123 assertThrows("/\\9/u", SyntaxError); | |
124 | |
125 assertThrows("new RegExp('\\\\u', 'u')", SyntaxError); | |
126 assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError); | |
127 assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError); | |
128 assertThrows("new RegExp('\\\\x', 'u')", SyntaxError); | |
129 assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError); | |
130 assertThrows("new RegExp('\\\\z', 'u')", SyntaxError); | |
131 assertThrows("new RegExp('\\\\8', 'u')", SyntaxError); | |
132 assertThrows("new RegExp('\\\\9', 'u')", SyntaxError); | |
133 })(); | |
134 | |
135 | |
136 (function TestTooBigHexEscape() { | |
137 // The hex number inside \u{} has a maximum value. | |
138 /\u{10ffff}/u | |
139 new RegExp("\\u{10ffff}", "u") | |
140 assertThrows("/\\u{110000}/u", SyntaxError); | |
141 assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError); | |
142 | |
143 // Without the u flag, they're of course fine ({x} is the count). | |
144 /\u{110000}/ | |
145 new RegExp("\\u{110000}") | |
146 })(); | |
147 | |
148 | |
149 (function TestSyntaxEscapes() { | |
150 // Syntax escapes work the same with or without the u flag. | |
151 function helper(r) { | |
152 assertTrue(r.test("foo[bar")); | |
153 assertFalse(r.test("foo]bar")); | |
154 } | |
155 helper(/foo\[bar/); | |
156 helper(new RegExp("foo\\[bar")); | |
157 helper(/foo\[bar/u); | |
158 helper(new RegExp("foo\\[bar", "u")); | |
159 })(); | |
160 | |
161 | |
162 (function TestUnicodeSurrogates() { | |
163 // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D]. | |
164 function helper(r) { | |
165 assertTrue(r.test("foo\u{10e6d}bar")); | |
166 } | |
167 helper(/foo\ud803\ude6dbar/u); | |
168 helper(new RegExp("foo\\ud803\\ude6dbar", "u")); | |
169 })(); | |
170 | |
171 | |
172 (function AllFlags() { | |
173 // Test that we can pass all possible regexp flags and they work properly. | |
174 function helper1(r) { | |
175 assertTrue(r.global); | |
176 assertTrue(r.ignoreCase); | |
177 assertTrue(r.multiline); | |
178 assertTrue(r.sticky); | |
179 assertTrue(r.unicode); | |
180 } | |
181 | |
182 helper1(/foo/gimyu); | |
183 helper1(new RegExp("foo", "gimyu")); | |
184 | |
185 function helper2(r) { | |
186 assertFalse(r.global); | |
187 assertFalse(r.ignoreCase); | |
188 assertFalse(r.multiline); | |
189 assertFalse(r.sticky); | |
190 assertFalse(r.unicode); | |
191 } | |
192 | |
193 helper2(/foo/); | |
194 helper2(new RegExp("foo")); | |
195 })(); | |
196 | |
197 | |
198 (function DuplicatedFlags() { | |
199 // Test that duplicating the u flag is not allowed. | |
200 assertThrows("/foo/ugu"); | |
201 assertThrows("new RegExp('foo', 'ugu')"); | |
202 })(); | |
203 | |
204 | |
205 (function ToString() { | |
206 // Test that the u flag is included in the string representation of regexps. | |
207 function helper(r) { | |
208 assertEquals(r.toString(), "/foo/u"); | |
209 } | |
210 helper(/foo/u); | |
211 helper(new RegExp("foo", "u")); | |
212 })(); | |
213 | |
214 // Non-BMP patterns. | |
215 // Single character atom. | |
216 assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}")); | |
217 assertTrue(/\u{12345}/u.test("\u{12345}")); | |
218 assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45")); | |
219 assertTrue(/\u{12345}/u.test("\ud808\udf45")); | |
220 assertFalse(new RegExp("\u{12345}", "u").test("\udf45")); | |
221 assertFalse(/\u{12345}/u.test("\udf45")); | |
222 | |
223 // Multi-character atom. | |
224 assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b")); | |
225 assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c")); | |
226 assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b")); | |
227 assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c")); | |
228 | |
229 // Disjunction. | |
230 assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test( | |
231 "a\u{12345}\u{23456}b")); | |
232 assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c")); | |
233 assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test( | |
234 "a\udf45\u{23456}b")); | |
235 assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c")); | |
236 | |
237 // Alternative. | |
238 assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b")); | |
239 assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c")); | |
240 assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db")); | |
241 assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c")); | |
242 | |
243 // Capture. | |
244 assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( | |
245 "\u{12345}b\u{12345}")); | |
246 assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}")); | |
247 assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( | |
248 "\u{12345}b\u{23456}")); | |
249 assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}")); | |
250 | |
251 // Quantifier. | |
252 assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}")); | |
253 assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}")); | |
254 assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45")); | |
255 assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45")); | |
256 assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}")); | |
257 assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45")); | |
258 assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45")); | |
259 | |
260 // Literal surrogates. | |
261 assertEquals(["\u{10000}\u{10000}"], | |
262 new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); | |
263 assertEquals(["\u{10000}\u{10000}"], | |
264 new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); | |
265 | |
266 assertEquals(["\u{10003}\u{50001}"], | |
267 new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec( | |
268 "\u{10003}\u{50001}")); | |
269 assertEquals(["\u{10003}\u{50001}"], | |
270 new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec( | |
271 "\u{10003}\u{50001}")); | |
272 | |
273 // Unicode escape sequences to represent a non-BMP character cannot have | |
274 // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence. | |
275 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); | |
276 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); | |
277 assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); | |
278 assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); | |
279 | |
280 assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}")); | |
281 assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}")); | |
282 assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}")); | |
283 assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}")); | |
284 | |
285 assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00")); | |
286 assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00")); | |
287 assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00")); | |
OLD | NEW |