| Index: third_party/re2/re2/testing/simplify_test.cc
|
| diff --git a/third_party/re2/re2/testing/simplify_test.cc b/third_party/re2/re2/testing/simplify_test.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d54837c950723c4f0b16ba9420e3b126f10a00d6
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/testing/simplify_test.cc
|
| @@ -0,0 +1,167 @@
|
| +// Copyright 2006 The RE2 Authors. All Rights Reserved.
|
| +// Use of this source code is governed by a BSD-style
|
| +// license that can be found in the LICENSE file.
|
| +
|
| +// Test simplify.cc.
|
| +
|
| +#include <string>
|
| +#include <vector>
|
| +#include "util/test.h"
|
| +#include "re2/regexp.h"
|
| +
|
| +namespace re2 {
|
| +
|
| +struct Test {
|
| + const char* regexp;
|
| + const char* simplified;
|
| +};
|
| +
|
| +static Test tests[] = {
|
| + // Already-simple constructs
|
| + { "a", "a" },
|
| + { "ab", "ab" },
|
| + { "a|b", "[a-b]" },
|
| + { "ab|cd", "ab|cd" },
|
| + { "(ab)*", "(ab)*" },
|
| + { "(ab)+", "(ab)+" },
|
| + { "(ab)?", "(ab)?" },
|
| + { ".", "." },
|
| + { "^", "^" },
|
| + { "$", "$" },
|
| + { "[ac]", "[ac]" },
|
| + { "[^ac]", "[^ac]" },
|
| +
|
| + // Posix character classes
|
| + { "[[:alnum:]]", "[0-9A-Za-z]" },
|
| + { "[[:alpha:]]", "[A-Za-z]" },
|
| + { "[[:blank:]]", "[\\t ]" },
|
| + { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
|
| + { "[[:digit:]]", "[0-9]" },
|
| + { "[[:graph:]]", "[!-~]" },
|
| + { "[[:lower:]]", "[a-z]" },
|
| + { "[[:print:]]", "[ -~]" },
|
| + { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
|
| + { "[[:space:]]" , "[\\t-\\r ]" },
|
| + { "[[:upper:]]", "[A-Z]" },
|
| + { "[[:xdigit:]]", "[0-9A-Fa-f]" },
|
| +
|
| + // Perl character classes
|
| + { "\\d", "[0-9]" },
|
| + { "\\s", "[\\t-\\n\\f-\\r ]" },
|
| + { "\\w", "[0-9A-Z_a-z]" },
|
| + { "\\D", "[^0-9]" },
|
| + { "\\S", "[^\\t-\\n\\f-\\r ]" },
|
| + { "\\W", "[^0-9A-Z_a-z]" },
|
| + { "[\\d]", "[0-9]" },
|
| + { "[\\s]", "[\\t-\\n\\f-\\r ]" },
|
| + { "[\\w]", "[0-9A-Z_a-z]" },
|
| + { "[\\D]", "[^0-9]" },
|
| + { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
|
| + { "[\\W]", "[^0-9A-Z_a-z]" },
|
| +
|
| + // Posix repetitions
|
| + { "a{1}", "a" },
|
| + { "a{2}", "aa" },
|
| + { "a{5}", "aaaaa" },
|
| + { "a{0,1}", "a?" },
|
| + // The next three are illegible because Simplify inserts (?:)
|
| + // parens instead of () parens to avoid creating extra
|
| + // captured subexpressions. The comments show a version fewer parens.
|
| + { "(a){0,2}", "(?:(a)(a)?)?" }, // (aa?)?
|
| + { "(a){0,4}", "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // (a(a(aa?)?)?)?
|
| + { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // aa(a(a(aa?)?)?)?
|
| + { "a{0,2}", "(?:aa?)?" }, // (aa?)?
|
| + { "a{0,4}", "(?:a(?:a(?:aa?)?)?)?" }, // (a(a(aa?)?)?)?
|
| + { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" }, // aa(a(a(aa?)?)?)?
|
| + { "a{0,}", "a*" },
|
| + { "a{1,}", "a+" },
|
| + { "a{2,}", "aa+" },
|
| + { "a{5,}", "aaaaa+" },
|
| +
|
| + // Test that operators simplify their arguments.
|
| + // (Simplify used to not simplify arguments to a {} repeat.)
|
| + { "(?:a{1,}){1,}", "a+" },
|
| + { "(a{1,}b{1,})", "(a+b+)" },
|
| + { "a{1,}|b{1,}", "a+|b+" },
|
| + { "(?:a{1,})*", "(?:a+)*" },
|
| + { "(?:a{1,})+", "a+" },
|
| + { "(?:a{1,})?", "(?:a+)?" },
|
| + { "a{0}", "" },
|
| +
|
| + // Character class simplification
|
| + { "[ab]", "[a-b]" },
|
| + { "[a-za-za-z]", "[a-z]" },
|
| + { "[A-Za-zA-Za-z]", "[A-Za-z]" },
|
| + { "[ABCDEFGH]", "[A-H]" },
|
| + { "[AB-CD-EF-GH]", "[A-H]" },
|
| + { "[W-ZP-XE-R]", "[E-Z]" },
|
| + { "[a-ee-gg-m]", "[a-m]" },
|
| + { "[a-ea-ha-m]", "[a-m]" },
|
| + { "[a-ma-ha-e]", "[a-m]" },
|
| + { "[a-zA-Z0-9 -~]", "[ -~]" },
|
| +
|
| + // Empty character classes
|
| + { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
|
| +
|
| + // Full character classes
|
| + { "[[:cntrl:][:^cntrl:]]", "." },
|
| +
|
| + // Unicode case folding.
|
| + { "(?i)A", "[Aa]" },
|
| + { "(?i)a", "[Aa]" },
|
| + { "(?i)K", "[Kk\\x{212a}]" },
|
| + { "(?i)k", "[Kk\\x{212a}]" },
|
| + { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
|
| + { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
|
| + { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
|
| + { "(?i)[\\x00-\\x{10ffff}]", "." },
|
| +
|
| + // Empty string as a regular expression.
|
| + // Empty string must be preserved inside parens in order
|
| + // to make submatches work right, so these are less
|
| + // interesting than they used to be. ToString inserts
|
| + // explicit (?:) in place of non-parenthesized empty strings,
|
| + // to make them easier to spot for other parsers.
|
| + { "(a|b|)", "([a-b]|(?:))" },
|
| + { "(|)", "()" },
|
| + { "a()", "a()" },
|
| + { "(()|())", "(()|())" },
|
| + { "(a|)", "(a|(?:))" },
|
| + { "ab()cd()", "ab()cd()" },
|
| + { "()", "()" },
|
| + { "()*", "()*" },
|
| + { "()+", "()+" },
|
| + { "()?" , "()?" },
|
| + { "(){0}", "" },
|
| + { "(){1}", "()" },
|
| + { "(){1,}", "()+" },
|
| + { "(){0,2}", "(?:()()?)?" },
|
| +};
|
| +
|
| +TEST(TestSimplify, SimpleRegexps) {
|
| + for (int i = 0; i < arraysize(tests); i++) {
|
| + RegexpStatus status;
|
| + VLOG(1) << "Testing " << tests[i].regexp;
|
| + Regexp* re = Regexp::Parse(tests[i].regexp,
|
| + Regexp::MatchNL | (Regexp::LikePerl &
|
| + ~Regexp::OneLine),
|
| + &status);
|
| + CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();
|
| + Regexp* sre = re->Simplify();
|
| + CHECK(sre != NULL);
|
| +
|
| + // Check that already-simple regexps don't allocate new ones.
|
| + if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
|
| + CHECK(re == sre) << " " << tests[i].regexp
|
| + << " " << re->ToString() << " " << sre->ToString();
|
| + }
|
| +
|
| + EXPECT_EQ(tests[i].simplified, sre->ToString())
|
| + << " " << tests[i].regexp << " " << sre->Dump();
|
| +
|
| + re->Decref();
|
| + sre->Decref();
|
| + }
|
| +}
|
| +
|
| +} // namespace re2
|
|
|