OLD | NEW |
| (Empty) |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Exhaustive testing of regular expression matching. | |
6 | |
7 #include "util/test.h" | |
8 #include "re2/testing/exhaustive_tester.h" | |
9 | |
10 namespace re2 { | |
11 | |
12 // Test simple character classes by themselves. | |
13 TEST(CharacterClasses, Exhaustive) { | |
14 vector<string> atoms = Split(" ", | |
15 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); | |
16 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), | |
17 5, Explode("ab"), "", ""); | |
18 } | |
19 | |
20 // Test simple character classes inside a___b (for example, a[a]b). | |
21 TEST(CharacterClasses, ExhaustiveAB) { | |
22 vector<string> atoms = Split(" ", | |
23 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); | |
24 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), | |
25 5, Explode("ab"), "a%sb", ""); | |
26 } | |
27 | |
28 // Returns UTF8 for Rune r | |
29 static string UTF8(Rune r) { | |
30 char buf[UTFmax+1]; | |
31 buf[runetochar(buf, &r)] = 0; | |
32 return string(buf); | |
33 } | |
34 | |
35 // Returns a vector of "interesting" UTF8 characters. | |
36 // Unicode is now too big to just return all of them, | |
37 // so UTF8Characters return a set likely to be good test cases. | |
38 static const vector<string>& InterestingUTF8() { | |
39 static bool init; | |
40 static vector<string> v; | |
41 | |
42 if (init) | |
43 return v; | |
44 | |
45 init = true; | |
46 // All the Latin1 equivalents are interesting. | |
47 for (int i = 1; i < 256; i++) | |
48 v.push_back(UTF8(i)); | |
49 | |
50 // After that, the codes near bit boundaries are | |
51 // interesting, because they span byte sequence lengths. | |
52 for (int j = 0; j < 8; j++) | |
53 v.push_back(UTF8(256 + j)); | |
54 for (int i = 512; i < Runemax; i <<= 1) | |
55 for (int j = -8; j < 8; j++) | |
56 v.push_back(UTF8(i + j)); | |
57 | |
58 // The codes near Runemax, including Runemax itself, are interesting. | |
59 for (int j = -8; j <= 0; j++) | |
60 v.push_back(UTF8(Runemax + j)); | |
61 | |
62 return v; | |
63 } | |
64 | |
65 // Test interesting UTF-8 characters against character classes. | |
66 TEST(InterestingUTF8, SingleOps) { | |
67 vector<string> atoms = Split(" ", | |
68 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " | |
69 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " | |
70 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " | |
71 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); | |
72 vector<string> ops; // no ops | |
73 ExhaustiveTest(1, 0, atoms, ops, | |
74 1, InterestingUTF8(), "", ""); | |
75 } | |
76 | |
77 // Test interesting UTF-8 characters against character classes, | |
78 // but wrap everything inside AB. | |
79 TEST(InterestingUTF8, AB) { | |
80 vector<string> atoms = Split(" ", | |
81 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " | |
82 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " | |
83 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " | |
84 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); | |
85 vector<string> ops; // no ops | |
86 vector<string> alpha = InterestingUTF8(); | |
87 for (size_t i = 0; i < alpha.size(); i++) | |
88 alpha[i] = "a" + alpha[i] + "b"; | |
89 ExhaustiveTest(1, 0, atoms, ops, | |
90 1, alpha, "a%sb", ""); | |
91 } | |
92 | |
93 } // namespace re2 | |
94 | |
OLD | NEW |