| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // ES6 extends the \uxxxx escape and also allows \u{xxxxx}. | |
| 6 | |
| 7 // Flags: --harmony-unicode-regexps | |
| 8 | |
| 9 function testRegexpHelper(r) { | |
| 10 assertTrue(r.test("foo")); | |
| 11 assertTrue(r.test("boo")); | |
| 12 assertFalse(r.test("moo")); | |
| 13 } | |
| 14 | |
| 15 | |
| 16 (function TestUnicodeEscapes() { | |
| 17 testRegexpHelper(/(\u0066|\u0062)oo/); | |
| 18 testRegexpHelper(/(\u0066|\u0062)oo/u); | |
| 19 testRegexpHelper(/(\u{0066}|\u{0062})oo/u); | |
| 20 testRegexpHelper(/(\u{66}|\u{000062})oo/u); | |
| 21 | |
| 22 // Note that we need \\ inside a string, otherwise it's interpreted as a | |
| 23 // unicode escape inside a string. | |
| 24 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo")); | |
| 25 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u")); | |
| 26 testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u")); | |
| 27 testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u")); | |
| 28 | |
| 29 // Though, unicode escapes via strings should work too. | |
| 30 testRegexpHelper(new RegExp("(\u0066|\u0062)oo")); | |
| 31 testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u")); | |
| 32 testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u")); | |
| 33 testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u")); | |
| 34 })(); | |
| 35 | |
| 36 | |
| 37 (function TestUnicodeEscapesInCharacterClasses() { | |
| 38 testRegexpHelper(/[\u0062-\u0066]oo/); | |
| 39 testRegexpHelper(/[\u0062-\u0066]oo/u); | |
| 40 testRegexpHelper(/[\u{0062}-\u{0066}]oo/u); | |
| 41 testRegexpHelper(/[\u{62}-\u{00000066}]oo/u); | |
| 42 | |
| 43 // Note that we need \\ inside a string, otherwise it's interpreted as a | |
| 44 // unicode escape inside a string. | |
| 45 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo")); | |
| 46 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u")); | |
| 47 testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u")); | |
| 48 testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u")); | |
| 49 | |
| 50 // Though, unicode escapes via strings should work too. | |
| 51 testRegexpHelper(new RegExp("[\u0062-\u0066]oo")); | |
| 52 testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u")); | |
| 53 testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u")); | |
| 54 testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u")); | |
| 55 })(); | |
| 56 | |
| 57 | |
| 58 (function TestBraceEscapesWithoutUnicodeFlag() { | |
| 59 // \u followed by illegal escape will be parsed as u. {x} will be the | |
| 60 // character count. | |
| 61 function helper1(r) { | |
| 62 assertFalse(r.test("fbar")); | |
| 63 assertFalse(r.test("fubar")); | |
| 64 assertTrue(r.test("fuubar")); | |
| 65 assertFalse(r.test("fuuubar")); | |
| 66 } | |
| 67 helper1(/f\u{2}bar/); | |
| 68 helper1(new RegExp("f\\u{2}bar")); | |
| 69 | |
| 70 function helper2(r) { | |
| 71 assertFalse(r.test("fbar")); | |
| 72 assertTrue(r.test("fubar")); | |
| 73 assertTrue(r.test("fuubar")); | |
| 74 assertFalse(r.test("fuuubar")); | |
| 75 } | |
| 76 | |
| 77 helper2(/f\u{1,2}bar/); | |
| 78 helper2(new RegExp("f\\u{1,2}bar")); | |
| 79 | |
| 80 function helper3(r) { | |
| 81 assertTrue(r.test("u")); | |
| 82 assertTrue(r.test("{")); | |
| 83 assertTrue(r.test("2")); | |
| 84 assertTrue(r.test("}")); | |
| 85 assertFalse(r.test("q")); | |
| 86 assertFalse(r.test("(")); | |
| 87 assertFalse(r.test(")")); | |
| 88 } | |
| 89 helper3(/[\u{2}]/); | |
| 90 helper3(new RegExp("[\\u{2}]")); | |
| 91 })(); | |
| 92 | |
| 93 | |
| 94 (function TestInvalidEscapes() { | |
| 95 // Without the u flag, invalid unicode escapes and other invalid escapes are | |
| 96 // treated as identity escapes. | |
| 97 function helper1(r) { | |
| 98 assertTrue(r.test("firstuxz89second")); | |
| 99 } | |
| 100 helper1(/first\u\x\z\8\9second/); | |
| 101 helper1(new RegExp("first\\u\\x\\z\\8\\9second")); | |
| 102 | |
| 103 function helper2(r) { | |
| 104 assertTrue(r.test("u")); | |
| 105 assertTrue(r.test("x")); | |
| 106 assertTrue(r.test("z")); | |
| 107 assertTrue(r.test("8")); | |
| 108 assertTrue(r.test("9")); | |
| 109 assertFalse(r.test("q")); | |
| 110 assertFalse(r.test("7")); | |
| 111 } | |
| 112 helper2(/[\u\x\z\8\9]/); | |
| 113 helper2(new RegExp("[\\u\\x\\z\\8\\9]")); | |
| 114 | |
| 115 // However, with the u flag, these are treated as invalid escapes. | |
| 116 assertThrows("/\\u/u", SyntaxError); | |
| 117 assertThrows("/\\u12/u", SyntaxError); | |
| 118 assertThrows("/\\ufoo/u", SyntaxError); | |
| 119 assertThrows("/\\x/u", SyntaxError); | |
| 120 assertThrows("/\\xfoo/u", SyntaxError); | |
| 121 assertThrows("/\\z/u", SyntaxError); | |
| 122 assertThrows("/\\8/u", SyntaxError); | |
| 123 assertThrows("/\\9/u", SyntaxError); | |
| 124 | |
| 125 assertThrows("new RegExp('\\\\u', 'u')", SyntaxError); | |
| 126 assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError); | |
| 127 assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError); | |
| 128 assertThrows("new RegExp('\\\\x', 'u')", SyntaxError); | |
| 129 assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError); | |
| 130 assertThrows("new RegExp('\\\\z', 'u')", SyntaxError); | |
| 131 assertThrows("new RegExp('\\\\8', 'u')", SyntaxError); | |
| 132 assertThrows("new RegExp('\\\\9', 'u')", SyntaxError); | |
| 133 })(); | |
| 134 | |
| 135 | |
| 136 (function TestTooBigHexEscape() { | |
| 137 // The hex number inside \u{} has a maximum value. | |
| 138 /\u{10ffff}/u | |
| 139 new RegExp("\\u{10ffff}", "u") | |
| 140 assertThrows("/\\u{110000}/u", SyntaxError); | |
| 141 assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError); | |
| 142 | |
| 143 // Without the u flag, they're of course fine ({x} is the count). | |
| 144 /\u{110000}/ | |
| 145 new RegExp("\\u{110000}") | |
| 146 })(); | |
| 147 | |
| 148 | |
| 149 (function TestSyntaxEscapes() { | |
| 150 // Syntax escapes work the same with or without the u flag. | |
| 151 function helper(r) { | |
| 152 assertTrue(r.test("foo[bar")); | |
| 153 assertFalse(r.test("foo]bar")); | |
| 154 } | |
| 155 helper(/foo\[bar/); | |
| 156 helper(new RegExp("foo\\[bar")); | |
| 157 helper(/foo\[bar/u); | |
| 158 helper(new RegExp("foo\\[bar", "u")); | |
| 159 })(); | |
| 160 | |
| 161 | |
| 162 (function TestUnicodeSurrogates() { | |
| 163 // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D]. | |
| 164 function helper(r) { | |
| 165 assertTrue(r.test("foo\u{10e6d}bar")); | |
| 166 } | |
| 167 helper(/foo\ud803\ude6dbar/u); | |
| 168 helper(new RegExp("foo\\ud803\\ude6dbar", "u")); | |
| 169 })(); | |
| 170 | |
| 171 | |
| 172 (function AllFlags() { | |
| 173 // Test that we can pass all possible regexp flags and they work properly. | |
| 174 function helper1(r) { | |
| 175 assertTrue(r.global); | |
| 176 assertTrue(r.ignoreCase); | |
| 177 assertTrue(r.multiline); | |
| 178 assertTrue(r.sticky); | |
| 179 assertTrue(r.unicode); | |
| 180 } | |
| 181 | |
| 182 helper1(/foo/gimyu); | |
| 183 helper1(new RegExp("foo", "gimyu")); | |
| 184 | |
| 185 function helper2(r) { | |
| 186 assertFalse(r.global); | |
| 187 assertFalse(r.ignoreCase); | |
| 188 assertFalse(r.multiline); | |
| 189 assertFalse(r.sticky); | |
| 190 assertFalse(r.unicode); | |
| 191 } | |
| 192 | |
| 193 helper2(/foo/); | |
| 194 helper2(new RegExp("foo")); | |
| 195 })(); | |
| 196 | |
| 197 | |
| 198 (function DuplicatedFlags() { | |
| 199 // Test that duplicating the u flag is not allowed. | |
| 200 assertThrows("/foo/ugu"); | |
| 201 assertThrows("new RegExp('foo', 'ugu')"); | |
| 202 })(); | |
| 203 | |
| 204 | |
| 205 (function ToString() { | |
| 206 // Test that the u flag is included in the string representation of regexps. | |
| 207 function helper(r) { | |
| 208 assertEquals(r.toString(), "/foo/u"); | |
| 209 } | |
| 210 helper(/foo/u); | |
| 211 helper(new RegExp("foo", "u")); | |
| 212 })(); | |
| 213 | |
| 214 // Non-BMP patterns. | |
| 215 // Single character atom. | |
| 216 assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}")); | |
| 217 assertTrue(/\u{12345}/u.test("\u{12345}")); | |
| 218 assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45")); | |
| 219 assertTrue(/\u{12345}/u.test("\ud808\udf45")); | |
| 220 assertFalse(new RegExp("\u{12345}", "u").test("\udf45")); | |
| 221 assertFalse(/\u{12345}/u.test("\udf45")); | |
| 222 | |
| 223 // Multi-character atom. | |
| 224 assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b")); | |
| 225 assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c")); | |
| 226 assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b")); | |
| 227 assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c")); | |
| 228 | |
| 229 // Disjunction. | |
| 230 assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test( | |
| 231 "a\u{12345}\u{23456}b")); | |
| 232 assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c")); | |
| 233 assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test( | |
| 234 "a\udf45\u{23456}b")); | |
| 235 assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c")); | |
| 236 | |
| 237 // Alternative. | |
| 238 assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b")); | |
| 239 assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c")); | |
| 240 assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db")); | |
| 241 assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c")); | |
| 242 | |
| 243 // Capture. | |
| 244 assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( | |
| 245 "\u{12345}b\u{12345}")); | |
| 246 assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}")); | |
| 247 assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( | |
| 248 "\u{12345}b\u{23456}")); | |
| 249 assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}")); | |
| 250 | |
| 251 // Quantifier. | |
| 252 assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}")); | |
| 253 assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}")); | |
| 254 assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45")); | |
| 255 assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45")); | |
| 256 assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}")); | |
| 257 assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45")); | |
| 258 assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45")); | |
| 259 | |
| 260 // Literal surrogates. | |
| 261 assertEquals(["\u{10000}\u{10000}"], | |
| 262 new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); | |
| 263 assertEquals(["\u{10000}\u{10000}"], | |
| 264 new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); | |
| 265 | |
| 266 assertEquals(["\u{10003}\u{50001}"], | |
| 267 new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec( | |
| 268 "\u{10003}\u{50001}")); | |
| 269 assertEquals(["\u{10003}\u{50001}"], | |
| 270 new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec( | |
| 271 "\u{10003}\u{50001}")); | |
| 272 | |
| 273 // Unicode escape sequences to represent a non-BMP character cannot have | |
| 274 // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence. | |
| 275 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); | |
| 276 assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); | |
| 277 assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); | |
| 278 assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); | |
| 279 | |
| 280 assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}")); | |
| 281 assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}")); | |
| 282 assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}")); | |
| 283 assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}")); | |
| 284 | |
| 285 assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00")); | |
| 286 assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00")); | |
| 287 assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00")); | |
| OLD | NEW |