Index: third_party/re2/re2/testing/simplify_test.cc |
diff --git a/third_party/re2/re2/testing/simplify_test.cc b/third_party/re2/re2/testing/simplify_test.cc |
index d54837c950723c4f0b16ba9420e3b126f10a00d6..9db41eebc418f58290a6aaf1ced8913808a9df34 100644 |
--- a/third_party/re2/re2/testing/simplify_test.cc |
+++ b/third_party/re2/re2/testing/simplify_test.cc |
@@ -136,6 +136,99 @@ static Test tests[] = { |
{ "(){1}", "()" }, |
{ "(){1,}", "()+" }, |
{ "(){0,2}", "(?:()()?)?" }, |
+ |
+ // Test that coalescing occurs and that the resulting repeats are simplified. |
+ // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal: |
+ { "a*a*", "a*" }, |
+ { "a*a+", "a+" }, |
+ { "a*a?", "a*" }, |
+ { "a*a{2}", "aa+" }, |
+ { "a*a{2,}", "aa+" }, |
+ { "a*a{2,3}", "aa+" }, |
+ { "a+a*", "a+" }, |
+ { "a+a+", "aa+" }, |
+ { "a+a?", "a+" }, |
+ { "a+a{2}", "aaa+" }, |
+ { "a+a{2,}", "aaa+" }, |
+ { "a+a{2,3}", "aaa+" }, |
+ { "a?a*", "a*" }, |
+ { "a?a+", "a+" }, |
+ { "a?a?", "(?:aa?)?" }, |
+ { "a?a{2}", "aaa?" }, |
+ { "a?a{2,}", "aa+" }, |
+ { "a?a{2,3}", "aa(?:aa?)?" }, |
+ { "a{2}a*", "aa+" }, |
+ { "a{2}a+", "aaa+" }, |
+ { "a{2}a?", "aaa?" }, |
+ { "a{2}a{2}", "aaaa" }, |
+ { "a{2}a{2,}", "aaaa+" }, |
+ { "a{2}a{2,3}", "aaaaa?" }, |
+ { "a{2,}a*", "aa+" }, |
+ { "a{2,}a+", "aaa+" }, |
+ { "a{2,}a?", "aa+" }, |
+ { "a{2,}a{2}", "aaaa+" }, |
+ { "a{2,}a{2,}", "aaaa+" }, |
+ { "a{2,}a{2,3}", "aaaa+" }, |
+ { "a{2,3}a*", "aa+" }, |
+ { "a{2,3}a+", "aaa+" }, |
+ { "a{2,3}a?", "aa(?:aa?)?" }, |
+ { "a{2,3}a{2}", "aaaaa?" }, |
+ { "a{2,3}a{2,}", "aaaa+" }, |
+ { "a{2,3}a{2,3}", "aaaa(?:aa?)?" }, |
+ // With a char class, any char and any byte: |
+ { "\\d*\\d*", "[0-9]*" }, |
+ { ".*.*", ".*" }, |
+ { "\\C*\\C*", "\\C*" }, |
+ // FoldCase works, but must be consistent: |
+ { "(?i)A*a*", "[Aa]*" }, |
+ { "(?i)a+A+", "[Aa][Aa]+" }, |
+ { "(?i)A*(?-i)a*", "[Aa]*a*" }, |
+ { "(?i)a+(?-i)A+", "[Aa]+A+" }, |
+ // NonGreedy works, but must be consistent: |
+ { "a*?a*?", "a*?" }, |
+ { "a+?a+?", "aa+?" }, |
+ { "a*?a*", "a*?a*" }, |
+ { "a+a+?", "a+a+?" }, |
+ // The second element is the literal, char class, any char or any byte: |
+ { "a*a", "a+" }, |
+ { "\\d*\\d", "[0-9]+" }, |
+ { ".*.", ".+" }, |
+ { "\\C*\\C", "\\C+" }, |
+ // FoldCase works, but must be consistent: |
+ { "(?i)A*a", "[Aa]+" }, |
+ { "(?i)a+A", "[Aa][Aa]+" }, |
+ { "(?i)A*(?-i)a", "[Aa]*a" }, |
+ { "(?i)a+(?-i)A", "[Aa]+A" }, |
+ // The second element is a literal string that begins with the literal: |
+ { "a*aa", "aa+" }, |
+ { "a*aab", "aa+b" }, |
+ // FoldCase works, but must be consistent: |
+ { "(?i)a*aa", "[Aa][Aa]+" }, |
+ { "(?i)a*aab", "[Aa][Aa]+[Bb]" }, |
+ { "(?i)a*(?-i)aa", "[Aa]*aa" }, |
+ { "(?i)a*(?-i)aab", "[Aa]*aab" }, |
+ // Negative tests with mismatching ops: |
+ { "a*b*", "a*b*" }, |
+ { "\\d*\\D*", "[0-9]*[^0-9]*" }, |
+ { "a+b", "a+b" }, |
+ { "\\d+\\D", "[0-9]+[^0-9]" }, |
+ { "a?bb", "a?bb" }, |
+ // Negative tests with capturing groups: |
+ { "(a*)a*", "(a*)a*" }, |
+ { "a+(a)", "a+(a)" }, |
+ { "(a?)(aa)", "(a?)(aa)" }, |
+ // Just for fun: |
+ { "aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a", "aaaaaaaaaaaaaaaa+" }, |
+ |
+ // During coalescing, the child of the repeat changes, so we build a new |
+ // repeat. The new repeat must have the min and max of the old repeat. |
+ // Failure to copy them results in min=0 and max=0 -> empty match. |
+ { "(?:a*aab){2}", "aa+baa+b" }, |
+ |
+ // During coalescing, the child of the capture changes, so we build a new |
+ // capture. The new capture must have the cap of the old capture. |
+ // Failure to copy it results in cap=0 -> ToString() logs a fatal error. |
+ { "(a*aab)", "(aa+b)" }, |
}; |
TEST(TestSimplify, SimpleRegexps) { |