OLD | NEW |
| (Empty) |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Exhaustive testing of regular expression matching. | |
6 | |
7 #include "util/test.h" | |
8 #include "re2/re2.h" | |
9 #include "re2/testing/exhaustive_tester.h" | |
10 | |
11 DECLARE_string(regexp_engines); | |
12 | |
13 namespace re2 { | |
14 | |
15 // Test empty string matches (aka "(?:)") | |
16 TEST(EmptyString, Exhaustive) { | |
17 ExhaustiveTest(2, 2, Split(" ", "(?:) a"), | |
18 RegexpGenerator::EgrepOps(), | |
19 5, Split("", "ab"), "", ""); | |
20 } | |
21 | |
22 // Test escaped versions of regexp syntax. | |
23 TEST(Punctuation, Literals) { | |
24 vector<string> alphabet = Explode("()*+?{}[]\\^$."); | |
25 vector<string> escaped = alphabet; | |
26 for (size_t i = 0; i < escaped.size(); i++) | |
27 escaped[i] = "\\" + escaped[i]; | |
28 ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), | |
29 2, alphabet, "", ""); | |
30 } | |
31 | |
32 // Test ^ $ . \A \z in presence of line endings. | |
33 // Have to wrap the empty-width ones in (?:) so that | |
34 // they can be repeated -- PCRE rejects ^* but allows (?:^)* | |
35 TEST(LineEnds, Exhaustive) { | |
36 ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"), | |
37 RegexpGenerator::EgrepOps(), | |
38 4, Explode("ab\n"), "", ""); | |
39 } | |
40 | |
41 // Test what does and does not match \n. | |
42 // This would be a good test, except that PCRE seems to have a bug: | |
43 // in single-byte character set mode (the default), | |
44 // [^a] matches \n, but in UTF-8 mode it does not. | |
45 // So when we run the test, the tester complains that | |
46 // we don't agree with PCRE, but it's PCRE that is at fault. | |
47 // For what it's worth, Perl gets this right (matches | |
48 // regardless of whether UTF-8 input is selected): | |
49 // | |
50 // #!/usr/bin/perl | |
51 // use POSIX qw(locale_h); | |
52 // print "matches in latin1\n" if "\n" =~ /[^a]/; | |
53 // setlocale("en_US.utf8"); | |
54 // print "matches in utf8\n" if "\n" =~ /[^a]/; | |
55 // | |
56 // The rule chosen for RE2 is that by default, like Perl, | |
57 // dot does not match \n but negated character classes [^a] do. | |
58 // (?s) will allow dot to match \n; there is no way in RE2 | |
59 // to stop [^a] from matching \n, though the underlying library | |
60 // provides a mechanism, and RE2 could add new syntax if needed. | |
61 // | |
62 // TEST(Newlines, Exhaustive) { | |
63 // vector<string> empty_vector; | |
64 // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), | |
65 // RegexpGenerator::EgrepOps(), | |
66 // 4, Explode("a\n"), ""); | |
67 // } | |
68 | |
69 } // namespace re2 | |
70 | |
OLD | NEW |