| OLD | NEW |
| (Empty) |
| 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Exhaustive testing of regular expression matching. | |
| 6 | |
| 7 #include "util/test.h" | |
| 8 #include "re2/re2.h" | |
| 9 #include "re2/testing/exhaustive_tester.h" | |
| 10 | |
| 11 DECLARE_string(regexp_engines); | |
| 12 | |
| 13 namespace re2 { | |
| 14 | |
| 15 // Test empty string matches (aka "(?:)") | |
| 16 TEST(EmptyString, Exhaustive) { | |
| 17 ExhaustiveTest(2, 2, Split(" ", "(?:) a"), | |
| 18 RegexpGenerator::EgrepOps(), | |
| 19 5, Split("", "ab"), "", ""); | |
| 20 } | |
| 21 | |
| 22 // Test escaped versions of regexp syntax. | |
| 23 TEST(Punctuation, Literals) { | |
| 24 vector<string> alphabet = Explode("()*+?{}[]\\^$."); | |
| 25 vector<string> escaped = alphabet; | |
| 26 for (size_t i = 0; i < escaped.size(); i++) | |
| 27 escaped[i] = "\\" + escaped[i]; | |
| 28 ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), | |
| 29 2, alphabet, "", ""); | |
| 30 } | |
| 31 | |
| 32 // Test ^ $ . \A \z in presence of line endings. | |
| 33 // Have to wrap the empty-width ones in (?:) so that | |
| 34 // they can be repeated -- PCRE rejects ^* but allows (?:^)* | |
| 35 TEST(LineEnds, Exhaustive) { | |
| 36 ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"), | |
| 37 RegexpGenerator::EgrepOps(), | |
| 38 4, Explode("ab\n"), "", ""); | |
| 39 } | |
| 40 | |
| 41 // Test what does and does not match \n. | |
| 42 // This would be a good test, except that PCRE seems to have a bug: | |
| 43 // in single-byte character set mode (the default), | |
| 44 // [^a] matches \n, but in UTF-8 mode it does not. | |
| 45 // So when we run the test, the tester complains that | |
| 46 // we don't agree with PCRE, but it's PCRE that is at fault. | |
| 47 // For what it's worth, Perl gets this right (matches | |
| 48 // regardless of whether UTF-8 input is selected): | |
| 49 // | |
| 50 // #!/usr/bin/perl | |
| 51 // use POSIX qw(locale_h); | |
| 52 // print "matches in latin1\n" if "\n" =~ /[^a]/; | |
| 53 // setlocale("en_US.utf8"); | |
| 54 // print "matches in utf8\n" if "\n" =~ /[^a]/; | |
| 55 // | |
| 56 // The rule chosen for RE2 is that by default, like Perl, | |
| 57 // dot does not match \n but negated character classes [^a] do. | |
| 58 // (?s) will allow dot to match \n; there is no way in RE2 | |
| 59 // to stop [^a] from matching \n, though the underlying library | |
| 60 // provides a mechanism, and RE2 could add new syntax if needed. | |
| 61 // | |
| 62 // TEST(Newlines, Exhaustive) { | |
| 63 // vector<string> empty_vector; | |
| 64 // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), | |
| 65 // RegexpGenerator::EgrepOps(), | |
| 66 // 4, Explode("a\n"), ""); | |
| 67 // } | |
| 68 | |
| 69 } // namespace re2 | |
| 70 | |
| OLD | NEW |