| Index: third_party/re2/re2/testing/simplify_test.cc
|
| diff --git a/third_party/re2/re2/testing/simplify_test.cc b/third_party/re2/re2/testing/simplify_test.cc
|
| index d54837c950723c4f0b16ba9420e3b126f10a00d6..9db41eebc418f58290a6aaf1ced8913808a9df34 100644
|
| --- a/third_party/re2/re2/testing/simplify_test.cc
|
| +++ b/third_party/re2/re2/testing/simplify_test.cc
|
| @@ -136,6 +136,99 @@ static Test tests[] = {
|
| { "(){1}", "()" },
|
| { "(){1,}", "()+" },
|
| { "(){0,2}", "(?:()()?)?" },
|
| +
|
| + // Test that coalescing occurs and that the resulting repeats are simplified.
|
| + // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
|
| + { "a*a*", "a*" },
|
| + { "a*a+", "a+" },
|
| + { "a*a?", "a*" },
|
| + { "a*a{2}", "aa+" },
|
| + { "a*a{2,}", "aa+" },
|
| + { "a*a{2,3}", "aa+" },
|
| + { "a+a*", "a+" },
|
| + { "a+a+", "aa+" },
|
| + { "a+a?", "a+" },
|
| + { "a+a{2}", "aaa+" },
|
| + { "a+a{2,}", "aaa+" },
|
| + { "a+a{2,3}", "aaa+" },
|
| + { "a?a*", "a*" },
|
| + { "a?a+", "a+" },
|
| + { "a?a?", "(?:aa?)?" },
|
| + { "a?a{2}", "aaa?" },
|
| + { "a?a{2,}", "aa+" },
|
| + { "a?a{2,3}", "aa(?:aa?)?" },
|
| + { "a{2}a*", "aa+" },
|
| + { "a{2}a+", "aaa+" },
|
| + { "a{2}a?", "aaa?" },
|
| + { "a{2}a{2}", "aaaa" },
|
| + { "a{2}a{2,}", "aaaa+" },
|
| + { "a{2}a{2,3}", "aaaaa?" },
|
| + { "a{2,}a*", "aa+" },
|
| + { "a{2,}a+", "aaa+" },
|
| + { "a{2,}a?", "aa+" },
|
| + { "a{2,}a{2}", "aaaa+" },
|
| + { "a{2,}a{2,}", "aaaa+" },
|
| + { "a{2,}a{2,3}", "aaaa+" },
|
| + { "a{2,3}a*", "aa+" },
|
| + { "a{2,3}a+", "aaa+" },
|
| + { "a{2,3}a?", "aa(?:aa?)?" },
|
| + { "a{2,3}a{2}", "aaaaa?" },
|
| + { "a{2,3}a{2,}", "aaaa+" },
|
| + { "a{2,3}a{2,3}", "aaaa(?:aa?)?" },
|
| + // With a char class, any char and any byte:
|
| + { "\\d*\\d*", "[0-9]*" },
|
| + { ".*.*", ".*" },
|
| + { "\\C*\\C*", "\\C*" },
|
| + // FoldCase works, but must be consistent:
|
| + { "(?i)A*a*", "[Aa]*" },
|
| + { "(?i)a+A+", "[Aa][Aa]+" },
|
| + { "(?i)A*(?-i)a*", "[Aa]*a*" },
|
| + { "(?i)a+(?-i)A+", "[Aa]+A+" },
|
| + // NonGreedy works, but must be consistent:
|
| + { "a*?a*?", "a*?" },
|
| + { "a+?a+?", "aa+?" },
|
| + { "a*?a*", "a*?a*" },
|
| + { "a+a+?", "a+a+?" },
|
| + // The second element is the literal, char class, any char or any byte:
|
| + { "a*a", "a+" },
|
| + { "\\d*\\d", "[0-9]+" },
|
| + { ".*.", ".+" },
|
| + { "\\C*\\C", "\\C+" },
|
| + // FoldCase works, but must be consistent:
|
| + { "(?i)A*a", "[Aa]+" },
|
| + { "(?i)a+A", "[Aa][Aa]+" },
|
| + { "(?i)A*(?-i)a", "[Aa]*a" },
|
| + { "(?i)a+(?-i)A", "[Aa]+A" },
|
| + // The second element is a literal string that begins with the literal:
|
| + { "a*aa", "aa+" },
|
| + { "a*aab", "aa+b" },
|
| + // FoldCase works, but must be consistent:
|
| + { "(?i)a*aa", "[Aa][Aa]+" },
|
| + { "(?i)a*aab", "[Aa][Aa]+[Bb]" },
|
| + { "(?i)a*(?-i)aa", "[Aa]*aa" },
|
| + { "(?i)a*(?-i)aab", "[Aa]*aab" },
|
| + // Negative tests with mismatching ops:
|
| + { "a*b*", "a*b*" },
|
| + { "\\d*\\D*", "[0-9]*[^0-9]*" },
|
| + { "a+b", "a+b" },
|
| + { "\\d+\\D", "[0-9]+[^0-9]" },
|
| + { "a?bb", "a?bb" },
|
| + // Negative tests with capturing groups:
|
| + { "(a*)a*", "(a*)a*" },
|
| + { "a+(a)", "a+(a)" },
|
| + { "(a?)(aa)", "(a?)(aa)" },
|
| + // Just for fun:
|
| + { "aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a", "aaaaaaaaaaaaaaaa+" },
|
| +
|
| + // During coalescing, the child of the repeat changes, so we build a new
|
| + // repeat. The new repeat must have the min and max of the old repeat.
|
| + // Failure to copy them results in min=0 and max=0 -> empty match.
|
| + { "(?:a*aab){2}", "aa+baa+b" },
|
| +
|
| + // During coalescing, the child of the capture changes, so we build a new
|
| + // capture. The new capture must have the cap of the old capture.
|
| + // Failure to copy it results in cap=0 -> ToString() logs a fatal error.
|
| + { "(a*aab)", "(aa+b)" },
|
| };
|
|
|
| TEST(TestSimplify, SimpleRegexps) {
|
|
|