| OLD | NEW |
| (Empty) |
| 1 // Copyright 2006 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Test parse.cc, dump.cc, and tostring.cc. | |
| 6 | |
| 7 #include <string> | |
| 8 #include <vector> | |
| 9 #include "util/test.h" | |
| 10 #include "re2/regexp.h" | |
| 11 | |
| 12 namespace re2 { | |
| 13 | |
| 14 static const Regexp::ParseFlags TestZeroFlags = Regexp::ParseFlags(1<<30); | |
| 15 | |
| 16 struct Test { | |
| 17 const char* regexp; | |
| 18 const char* parse; | |
| 19 Regexp::ParseFlags flags; | |
| 20 }; | |
| 21 | |
| 22 static Regexp::ParseFlags kTestFlags = Regexp::MatchNL | | |
| 23 Regexp::PerlX | | |
| 24 Regexp::PerlClasses | | |
| 25 Regexp::UnicodeGroups; | |
| 26 | |
| 27 static Test tests[] = { | |
| 28 // Base cases | |
| 29 { "a", "lit{a}" }, | |
| 30 { "a.", "cat{lit{a}dot{}}" }, | |
| 31 { "a.b", "cat{lit{a}dot{}lit{b}}" }, | |
| 32 { "ab", "str{ab}" }, | |
| 33 { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" }, | |
| 34 { "abc", "str{abc}" }, | |
| 35 { "a|^", "alt{lit{a}bol{}}" }, | |
| 36 { "a|b", "cc{0x61-0x62}" }, | |
| 37 { "(a)", "cap{lit{a}}" }, | |
| 38 { "(a)|b", "alt{cap{lit{a}}lit{b}}" }, | |
| 39 { "a*", "star{lit{a}}" }, | |
| 40 { "a+", "plus{lit{a}}" }, | |
| 41 { "a?", "que{lit{a}}" }, | |
| 42 { "a{2}", "rep{2,2 lit{a}}" }, | |
| 43 { "a{2,3}", "rep{2,3 lit{a}}" }, | |
| 44 { "a{2,}", "rep{2,-1 lit{a}}" }, | |
| 45 { "a*?", "nstar{lit{a}}" }, | |
| 46 { "a+?", "nplus{lit{a}}" }, | |
| 47 { "a??", "nque{lit{a}}" }, | |
| 48 { "a{2}?", "nrep{2,2 lit{a}}" }, | |
| 49 { "a{2,3}?", "nrep{2,3 lit{a}}" }, | |
| 50 { "a{2,}?", "nrep{2,-1 lit{a}}" }, | |
| 51 { "", "emp{}" }, | |
| 52 { "|", "emp{}" }, // alt{emp{}emp{}} but got factored | |
| 53 { "|x|", "alt{emp{}lit{x}emp{}}" }, | |
| 54 { ".", "dot{}" }, | |
| 55 { "^", "bol{}" }, | |
| 56 { "$", "eol{}" }, | |
| 57 { "\\|", "lit{|}" }, | |
| 58 { "\\(", "lit{(}" }, | |
| 59 { "\\)", "lit{)}" }, | |
| 60 { "\\*", "lit{*}" }, | |
| 61 { "\\+", "lit{+}" }, | |
| 62 { "\\?", "lit{?}" }, | |
| 63 { "{", "lit{{}" }, | |
| 64 { "}", "lit{}}" }, | |
| 65 { "\\.", "lit{.}" }, | |
| 66 { "\\^", "lit{^}" }, | |
| 67 { "\\$", "lit{$}" }, | |
| 68 { "\\\\", "lit{\\}" }, | |
| 69 { "[ace]", "cc{0x61 0x63 0x65}" }, | |
| 70 { "[abc]", "cc{0x61-0x63}" }, | |
| 71 { "[a-z]", "cc{0x61-0x7a}" }, | |
| 72 { "[a]", "lit{a}" }, | |
| 73 { "\\-", "lit{-}" }, | |
| 74 { "-", "lit{-}" }, | |
| 75 { "\\_", "lit{_}" }, | |
| 76 | |
| 77 // Posix and Perl extensions | |
| 78 { "[[:lower:]]", "cc{0x61-0x7a}" }, | |
| 79 { "[a-z]", "cc{0x61-0x7a}" }, | |
| 80 { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" }, | |
| 81 { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" }, | |
| 82 { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
| 83 { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
| 84 { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10
ffff}" }, | |
| 85 { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10
ffff}" }, | |
| 86 { "\\d", "cc{0x30-0x39}" }, | |
| 87 { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" }, | |
| 88 { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" }, | |
| 89 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" }, | |
| 90 { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" }, | |
| 91 { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" }, | |
| 92 { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" }, | |
| 93 { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212
b-0x10ffff}" }, | |
| 94 { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" }, | |
| 95 { "\\C", "byte{}" }, | |
| 96 | |
| 97 // Unicode, negatives, and a double negative. | |
| 98 { "\\p{Braille}", "cc{0x2800-0x28ff}" }, | |
| 99 { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" }, | |
| 100 { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" }, | |
| 101 { "\\P{^Braille}", "cc{0x2800-0x28ff}" }, | |
| 102 | |
| 103 // More interesting regular expressions. | |
| 104 { "a{,2}", "str{a{,2}}" }, | |
| 105 { "\\.\\^\\$\\\\", "str{.^$\\}" }, | |
| 106 { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" }, | |
| 107 { "[^a]", "cc{0-0x60 0x62-0x10ffff}" }, | |
| 108 { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8 | |
| 109 { "a*{", "cat{star{lit{a}}lit{{}}" }, | |
| 110 | |
| 111 // Test precedences | |
| 112 { "(?:ab)*", "star{str{ab}}" }, | |
| 113 { "(ab)*", "star{cap{str{ab}}}" }, | |
| 114 { "ab|cd", "alt{str{ab}str{cd}}" }, | |
| 115 { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, | |
| 116 | |
| 117 // Test flattening. | |
| 118 { "(?:a)", "lit{a}" }, | |
| 119 { "(?:ab)(?:cd)", "str{abcd}" }, | |
| 120 { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, | |
| 121 { "a|c", "cc{0x61 0x63}" }, | |
| 122 { "a|[cd]", "cc{0x61 0x63-0x64}" }, | |
| 123 { "a|.", "dot{}" }, | |
| 124 { "[ab]|c", "cc{0x61-0x63}" }, | |
| 125 { "[ab]|[cd]", "cc{0x61-0x64}" }, | |
| 126 { "[ab]|.", "dot{}" }, | |
| 127 { ".|c", "dot{}" }, | |
| 128 { ".|[cd]", "dot{}" }, | |
| 129 { ".|.", "dot{}" }, | |
| 130 | |
| 131 // Test Perl quoted literals | |
| 132 { "\\Q+|*?{[\\E", "str{+|*?{[}" }, | |
| 133 { "\\Q+\\E+", "plus{lit{+}}" }, | |
| 134 { "\\Q\\\\E", "lit{\\}" }, | |
| 135 { "\\Q\\\\\\E", "str{\\\\}" }, | |
| 136 { "\\Qa\\E*", "star{lit{a}}" }, | |
| 137 { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" }, | |
| 138 { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" }, | |
| 139 | |
| 140 // Test Perl \A and \z | |
| 141 { "(?m)^", "bol{}" }, | |
| 142 { "(?m)$", "eol{}" }, | |
| 143 { "(?-m)^", "bot{}" }, | |
| 144 { "(?-m)$", "eot{}" }, | |
| 145 { "(?m)\\A", "bot{}" }, | |
| 146 { "(?m)\\z", "eot{\\z}" }, | |
| 147 { "(?-m)\\A", "bot{}" }, | |
| 148 { "(?-m)\\z", "eot{\\z}" }, | |
| 149 | |
| 150 // Test named captures | |
| 151 { "(?P<name>a)", "cap{name:lit{a}}" }, | |
| 152 | |
| 153 // Case-folded literals | |
| 154 { "[Aa]", "litfold{a}" }, | |
| 155 | |
| 156 // Strings | |
| 157 { "abcde", "str{abcde}" }, | |
| 158 { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, | |
| 159 | |
| 160 // Reported bug involving \n leaking in despite use of NeverNL. | |
| 161 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
| 162 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
| 163 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 164 { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCa
se }, | |
| 165 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
| 166 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
| 167 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 168 { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::
FoldCase }, | |
| 169 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
| 170 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
| 171 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 172 { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Rege
xp::FoldCase }, | |
| 173 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
| 174 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
| 175 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 176 { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::Fold
Case }, | |
| 177 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, | |
| 178 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, | |
| 179 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 180 { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::Fold
Case }, | |
| 181 { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 182 { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::
FoldCase }, | |
| 183 { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 184 { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp
::FoldCase }, | |
| 185 { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 186 { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Rege
xp::FoldCase }, | |
| 187 { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, | |
| 188 { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Re
gexp::FoldCase }, | |
| 189 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 190 Regexp::PerlClasses }, | |
| 191 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 192 Regexp::PerlClasses | Regexp::FoldCase }, | |
| 193 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 194 Regexp::PerlClasses | Regexp::NeverNL }, | |
| 195 { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 196 Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, | |
| 197 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 198 Regexp::PerlClasses }, | |
| 199 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 200 Regexp::PerlClasses | Regexp::FoldCase }, | |
| 201 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 202 Regexp::PerlClasses | Regexp::NeverNL }, | |
| 203 { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", | |
| 204 Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, | |
| 205 }; | |
| 206 | |
| 207 bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) { | |
| 208 return Regexp::Equal(a, b); | |
| 209 } | |
| 210 | |
| 211 void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags, | |
| 212 const string& title) { | |
| 213 Regexp** re = new Regexp*[ntests]; | |
| 214 for (int i = 0; i < ntests; i++) { | |
| 215 RegexpStatus status; | |
| 216 Regexp::ParseFlags f = flags; | |
| 217 if (tests[i].flags != 0) { | |
| 218 f = tests[i].flags & ~TestZeroFlags; | |
| 219 } | |
| 220 re[i] = Regexp::Parse(tests[i].regexp, f, &status); | |
| 221 CHECK(re[i] != NULL) << " " << tests[i].regexp << " " | |
| 222 << status.Text(); | |
| 223 string s = re[i]->Dump(); | |
| 224 EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp | |
| 225 << "\nparse: " << string(tests[i].parse) << " s: " << s << " flag=" << f; | |
| 226 } | |
| 227 | |
| 228 for (int i = 0; i < ntests; i++) { | |
| 229 for (int j = 0; j < ntests; j++) { | |
| 230 EXPECT_EQ(string(tests[i].parse) == string(tests[j].parse), | |
| 231 RegexpEqualTestingOnly(re[i], re[j])) | |
| 232 << "Regexp: " << tests[i].regexp << " " << tests[j].regexp; | |
| 233 } | |
| 234 } | |
| 235 | |
| 236 for (int i = 0; i < ntests; i++) | |
| 237 re[i]->Decref(); | |
| 238 delete[] re; | |
| 239 } | |
| 240 | |
| 241 // Test that regexps parse to expected structures. | |
| 242 TEST(TestParse, SimpleRegexps) { | |
| 243 TestParse(tests, arraysize(tests), kTestFlags, "simple"); | |
| 244 } | |
| 245 | |
| 246 Test foldcase_tests[] = { | |
| 247 { "AbCdE", "strfold{abcde}" }, | |
| 248 { "[Aa]", "litfold{a}" }, | |
| 249 { "a", "litfold{a}" }, | |
| 250 | |
| 251 // 0x17F is an old English long s (looks like an f) and folds to s. | |
| 252 // 0x212A is the Kelvin symbol and folds to k. | |
| 253 { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...] | |
| 254 { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
| 255 { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | |
| 256 }; | |
| 257 | |
| 258 // Test that parsing with FoldCase works. | |
| 259 TEST(TestParse, FoldCase) { | |
| 260 TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldca
se"); | |
| 261 } | |
| 262 | |
| 263 Test literal_tests[] = { | |
| 264 { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" }, | |
| 265 }; | |
| 266 | |
| 267 // Test that parsing with Literal works. | |
| 268 TEST(TestParse, Literal) { | |
| 269 TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal")
; | |
| 270 } | |
| 271 | |
| 272 Test matchnl_tests[] = { | |
| 273 { ".", "dot{}" }, | |
| 274 { "\n", "lit{\n}" }, | |
| 275 { "[^a]", "cc{0-0x60 0x62-0x10ffff}" }, | |
| 276 { "[a\\n]", "cc{0xa 0x61}" }, | |
| 277 }; | |
| 278 | |
| 279 // Test that parsing with MatchNL works. | |
| 280 // (Also tested above during simple cases.) | |
| 281 TEST(TestParse, MatchNL) { | |
| 282 TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with Matc
hNL"); | |
| 283 } | |
| 284 | |
| 285 Test nomatchnl_tests[] = { | |
| 286 { ".", "cc{0-0x9 0xb-0x10ffff}" }, | |
| 287 { "\n", "lit{\n}" }, | |
| 288 { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" }, | |
| 289 { "[a\\n]", "cc{0xa 0x61}" }, | |
| 290 }; | |
| 291 | |
| 292 // Test that parsing without MatchNL works. | |
| 293 TEST(TestParse, NoMatchNL) { | |
| 294 TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "
without MatchNL"); | |
| 295 } | |
| 296 | |
| 297 Test prefix_tests[] = { | |
| 298 { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" }, | |
| 299 { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" }, | |
| 300 { "abc|abd|aef|bcx|bcy", | |
| 301 "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}" | |
| 302 "cat{str{bc}cc{0x78-0x79}}}" }, | |
| 303 { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" }, | |
| 304 { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" }, | |
| 305 { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" }, | |
| 306 { "(?:xx|yy)c|(?:xx|yy)d", | |
| 307 "cat{alt{str{xx}str{yy}}cc{0x63-0x64}}" }, | |
| 308 { "x{2}|x{2}[0-9]", | |
| 309 "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" }, | |
| 310 { "x{2}y|x{2}[0-9]y", | |
| 311 "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" }, | |
| 312 { "n|r|rs", | |
| 313 "alt{lit{n}cat{lit{r}alt{emp{}lit{s}}}}" }, | |
| 314 { "n|rs|r", | |
| 315 "alt{lit{n}cat{lit{r}alt{lit{s}emp{}}}}" }, | |
| 316 { "r|rs|n", | |
| 317 "alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" }, | |
| 318 { "rs|r|n", | |
| 319 "alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" }, | |
| 320 }; | |
| 321 | |
| 322 // Test that prefix factoring works. | |
| 323 TEST(TestParse, Prefix) { | |
| 324 TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix"); | |
| 325 } | |
| 326 | |
| 327 Test nested_tests[] = { | |
| 328 { "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))", | |
| 329 "cap{cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2
cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}}}}}}}}" }, | |
| 330 { "((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
| 331 "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap
{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{1,1 lit{x}}}}}}}}}}}}}}}}}}}}}" }, | |
| 332 { "((((((((((x{0}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
| 333 "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap
{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{0,0 lit{x}}}}}}}}}}}}}}}}}}}}}" }, | |
| 334 { "((((((x{2}){2}){2}){5}){5}){5})", | |
| 335 "cap{rep{5,5 cap{rep{5,5 cap{rep{5,5 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit
{x}}}}}}}}}}}}}" }, | |
| 336 }; | |
| 337 | |
| 338 // Test that nested repetition works. | |
| 339 TEST(TestParse, Nested) { | |
| 340 TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested"); | |
| 341 } | |
| 342 | |
| 343 // Invalid regular expressions | |
| 344 const char* badtests[] = { | |
| 345 "(", | |
| 346 ")", | |
| 347 "(a", | |
| 348 "(a|b|", | |
| 349 "(a|b", | |
| 350 "[a-z", | |
| 351 "([a-z)", | |
| 352 "x{1001}", | |
| 353 "\xff", // Invalid UTF-8 | |
| 354 "[\xff]", | |
| 355 "[\\\xff]", | |
| 356 "\\\xff", | |
| 357 "(?P<name>a", | |
| 358 "(?P<name>", | |
| 359 "(?P<name", | |
| 360 "(?P<x y>a)", | |
| 361 "(?P<>a)", | |
| 362 "[a-Z]", | |
| 363 "(?i)[a-Z]", | |
| 364 "a{100000}", | |
| 365 "a{100000,}", | |
| 366 "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", | |
| 367 "(((x{7}){11}){13})", | |
| 368 "\\Q\\E*", | |
| 369 }; | |
| 370 | |
| 371 // Valid in Perl, bad in POSIX | |
| 372 const char* only_perl[] = { | |
| 373 "[a-b-c]", | |
| 374 "\\Qabc\\E", | |
| 375 "\\Q*+?{[\\E", | |
| 376 "\\Q\\\\E", | |
| 377 "\\Q\\\\\\E", | |
| 378 "\\Q\\\\\\\\E", | |
| 379 "\\Q\\\\\\\\\\E", | |
| 380 "(?:a)", | |
| 381 "(?P<name>a)", | |
| 382 }; | |
| 383 | |
| 384 // Valid in POSIX, bad in Perl. | |
| 385 const char* only_posix[] = { | |
| 386 "a++", | |
| 387 "a**", | |
| 388 "a?*", | |
| 389 "a+*", | |
| 390 "a{1}*", | |
| 391 }; | |
| 392 | |
| 393 // Test that parser rejects bad regexps. | |
| 394 TEST(TestParse, InvalidRegexps) { | |
| 395 for (int i = 0; i < arraysize(badtests); i++) { | |
| 396 CHECK(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL) | |
| 397 << " " << badtests[i]; | |
| 398 CHECK(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL) | |
| 399 << " " << badtests[i]; | |
| 400 } | |
| 401 for (int i = 0; i < arraysize(only_posix); i++) { | |
| 402 CHECK(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL) | |
| 403 << " " << only_posix[i]; | |
| 404 Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL); | |
| 405 CHECK(re) << " " << only_posix[i]; | |
| 406 re->Decref(); | |
| 407 } | |
| 408 for (int i = 0; i < arraysize(only_perl); i++) { | |
| 409 CHECK(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL) | |
| 410 << " " << only_perl[i]; | |
| 411 Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL); | |
| 412 CHECK(re) << " " << only_perl[i]; | |
| 413 re->Decref(); | |
| 414 } | |
| 415 } | |
| 416 | |
| 417 // Test that ToString produces original regexp or equivalent one. | |
| 418 TEST(TestToString, EquivalentParse) { | |
| 419 for (int i = 0; i < arraysize(tests); i++) { | |
| 420 RegexpStatus status; | |
| 421 Regexp::ParseFlags f = kTestFlags; | |
| 422 if (tests[i].flags != 0) { | |
| 423 f = tests[i].flags & ~TestZeroFlags; | |
| 424 } | |
| 425 Regexp* re = Regexp::Parse(tests[i].regexp, f, &status); | |
| 426 CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text(); | |
| 427 string s = re->Dump(); | |
| 428 EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << str
ing(tests[i].parse) << " " << s; | |
| 429 string t = re->ToString(); | |
| 430 if (t != tests[i].regexp) { | |
| 431 // If ToString didn't return the original regexp, | |
| 432 // it must have found one with fewer parens. | |
| 433 // Unfortunately we can't check the length here, because | |
| 434 // ToString produces "\\{" for a literal brace, | |
| 435 // but "{" is a shorter equivalent. | |
| 436 // CHECK_LT(t.size(), strlen(tests[i].regexp)) | |
| 437 // << " t=" << t << " regexp=" << tests[i].regexp; | |
| 438 | |
| 439 // Test that if we parse the new regexp we get the same structure. | |
| 440 Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status); | |
| 441 CHECK(nre != NULL) << " reparse " << t << " " << status.Text(); | |
| 442 string ss = nre->Dump(); | |
| 443 string tt = nre->ToString(); | |
| 444 if (s != ss || t != tt) | |
| 445 LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t; | |
| 446 EXPECT_EQ(s, ss); | |
| 447 EXPECT_EQ(t, tt); | |
| 448 nre->Decref(); | |
| 449 } | |
| 450 re->Decref(); | |
| 451 } | |
| 452 } | |
| 453 | |
| 454 // Test that capture error args are correct. | |
| 455 TEST(NamedCaptures, ErrorArgs) { | |
| 456 RegexpStatus status; | |
| 457 Regexp* re; | |
| 458 | |
| 459 re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status); | |
| 460 EXPECT_TRUE(re == NULL); | |
| 461 EXPECT_EQ(status.code(), kRegexpBadNamedCapture); | |
| 462 EXPECT_EQ(status.error_arg(), "(?P<name"); | |
| 463 | |
| 464 re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status); | |
| 465 EXPECT_TRUE(re == NULL); | |
| 466 EXPECT_EQ(status.code(), kRegexpBadNamedCapture); | |
| 467 EXPECT_EQ(status.error_arg(), "(?P<space bar>"); | |
| 468 } | |
| 469 | |
| 470 } // namespace re2 | |
| OLD | NEW |