| OLD | NEW |
| (Empty) |
| 1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Benchmarks for regular expression implementations. | |
| 6 | |
| 7 #include "util/test.h" | |
| 8 #include "re2/prog.h" | |
| 9 #include "re2/re2.h" | |
| 10 #include "re2/regexp.h" | |
| 11 #include "util/pcre.h" | |
| 12 #include "util/benchmark.h" | |
| 13 | |
| 14 namespace re2 { | |
| 15 void Test(); | |
| 16 void MemoryUsage(); | |
| 17 } // namespace re2 | |
| 18 | |
| 19 typedef testing::MallocCounter MallocCounter; | |
| 20 | |
| 21 namespace re2 { | |
| 22 | |
| 23 void Test() { | |
| 24 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL); | |
| 25 CHECK(re); | |
| 26 Prog* prog = re->CompileToProg(0); | |
| 27 CHECK(prog); | |
| 28 CHECK(prog->IsOnePass()); | |
| 29 const char* text = "650-253-0001"; | |
| 30 StringPiece sp[4]; | |
| 31 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4
)); | |
| 32 CHECK_EQ(sp[0], "650-253-0001"); | |
| 33 CHECK_EQ(sp[1], "650"); | |
| 34 CHECK_EQ(sp[2], "253"); | |
| 35 CHECK_EQ(sp[3], "0001"); | |
| 36 delete prog; | |
| 37 re->Decref(); | |
| 38 LOG(INFO) << "test passed\n"; | |
| 39 } | |
| 40 | |
| 41 void MemoryUsage() { | |
| 42 const char* regexp = "(\\d+)-(\\d+)-(\\d+)"; | |
| 43 const char* text = "650-253-0001"; | |
| 44 { | |
| 45 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
| 46 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 47 CHECK(re); | |
| 48 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly, | |
| 49 // because LOG(INFO) might do a big allocation before they get evaluated. | |
| 50 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 51 mc.Reset(); | |
| 52 | |
| 53 Prog* prog = re->CompileToProg(0); | |
| 54 CHECK(prog); | |
| 55 CHECK(prog->IsOnePass()); | |
| 56 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 57 mc.Reset(); | |
| 58 | |
| 59 StringPiece sp[4]; | |
| 60 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
| 61 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 62 delete prog; | |
| 63 re->Decref(); | |
| 64 } | |
| 65 | |
| 66 { | |
| 67 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
| 68 | |
| 69 PCRE re(regexp, PCRE::UTF8); | |
| 70 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 71 PCRE::FullMatch(text, re); | |
| 72 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 73 } | |
| 74 | |
| 75 { | |
| 76 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
| 77 | |
| 78 PCRE* re = new PCRE(regexp, PCRE::UTF8); | |
| 79 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 80 PCRE::FullMatch(text, *re); | |
| 81 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 82 delete re; | |
| 83 } | |
| 84 | |
| 85 { | |
| 86 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
| 87 | |
| 88 RE2 re(regexp); | |
| 89 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 90 RE2::FullMatch(text, re); | |
| 91 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
| 92 } | |
| 93 | |
| 94 fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n", | |
| 95 static_cast<int>(sizeof(PCRE)), | |
| 96 static_cast<int>(sizeof(RE2)), | |
| 97 static_cast<int>(sizeof(Prog)), | |
| 98 static_cast<int>(sizeof(Prog::Inst))); | |
| 99 } | |
| 100 | |
| 101 // Regular expression implementation wrappers. | |
| 102 // Defined at bottom of file, but they are repetitive | |
| 103 // and not interesting. | |
| 104 | |
| 105 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text, | |
| 106 Prog::Anchor anchor, bool expect_match); | |
| 107 | |
| 108 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, | |
| 109 SearchPCRE, SearchRE2, | |
| 110 SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBi
tState, | |
| 111 SearchCachedPCRE, SearchCachedRE2; | |
| 112 | |
| 113 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text); | |
| 114 | |
| 115 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, | |
| 116 Parse1PCRE, Parse1RE2, | |
| 117 Parse1Backtrack, | |
| 118 Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState, | |
| 119 Parse1CachedPCRE, Parse1CachedRE2, | |
| 120 Parse1CachedBacktrack; | |
| 121 | |
| 122 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, | |
| 123 Parse3PCRE, Parse3RE2, | |
| 124 Parse3Backtrack, | |
| 125 Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState, | |
| 126 Parse3CachedPCRE, Parse3CachedRE2, | |
| 127 Parse3CachedBacktrack; | |
| 128 | |
| 129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; | |
| 130 | |
| 131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; | |
| 132 | |
| 133 // Benchmark: failed search for regexp in random text. | |
| 134 | |
| 135 // Generate random text that won't contain the search string, | |
| 136 // to test worst-case search behavior. | |
| 137 void MakeText(string* text, int nbytes) { | |
| 138 srand(1); | |
| 139 text->resize(nbytes); | |
| 140 for (int i = 0; i < nbytes; i++) { | |
| 141 // Generate a one-byte rune that isn't a control character (e.g. '\n'). | |
| 142 // Clipping to 0x20 introduces some bias, but we don't need uniformity. | |
| 143 int byte = rand() & 0x7F; | |
| 144 if (byte < 0x20) | |
| 145 byte = 0x20; | |
| 146 (*text)[i] = byte; | |
| 147 } | |
| 148 } | |
| 149 | |
| 150 // Makes text of size nbytes, then calls run to search | |
| 151 // the text for regexp iters times. | |
| 152 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { | |
| 153 StopBenchmarkTiming(); | |
| 154 string s; | |
| 155 MakeText(&s, nbytes); | |
| 156 BenchmarkMemoryUsage(); | |
| 157 StartBenchmarkTiming(); | |
| 158 search(iters, regexp, s, Prog::kUnanchored, false); | |
| 159 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
| 160 } | |
| 161 | |
| 162 // These two are easy because they start with an A, | |
| 163 // giving the search loop something to memchr for. | |
| 164 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
| 165 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" | |
| 166 | |
| 167 // This is a little harder, since it starts with a character class | |
| 168 // and thus can't be memchr'ed. Could look for ABC and work backward, | |
| 169 // but no one does that. | |
| 170 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
| 171 | |
| 172 // This is a fair amount harder, because of the leading [ -~]*. | |
| 173 // A bad backtracking implementation will take O(text^2) time to | |
| 174 // figure out there's no match. | |
| 175 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
| 176 | |
| 177 // This stresses engines that are trying to track parentheses. | |
| 178 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \ | |
| 179 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" | |
| 180 | |
| 181 void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCached
DFA); } | |
| 182 void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCached
NFA); } | |
| 183 void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCached
PCRE); } | |
| 184 void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCached
RE2); } | |
| 185 | |
| 186 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 187 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs(
)); | |
| 188 #ifdef USEPCRE | |
| 189 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 190 #endif | |
| 191 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 192 | |
| 193 void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCached
DFA); } | |
| 194 void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCached
NFA); } | |
| 195 void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCached
PCRE); } | |
| 196 void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCached
RE2); } | |
| 197 | |
| 198 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 199 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs(
)); | |
| 200 #ifdef USEPCRE | |
| 201 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 202 #endif | |
| 203 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
| 204 | |
| 205 void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCach
edDFA); } | |
| 206 void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCach
edNFA); } | |
| 207 void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCach
edPCRE); } | |
| 208 void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCach
edRE2); } | |
| 209 | |
| 210 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
| 211 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
| 212 #ifdef USEPCRE | |
| 213 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
| 214 #endif | |
| 215 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
| 216 | |
| 217 void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDF
A); } | |
| 218 void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNF
A); } | |
| 219 void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPC
RE); } | |
| 220 void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE
2); } | |
| 221 | |
| 222 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs())
; | |
| 223 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()
); | |
| 224 #ifdef USEPCRE | |
| 225 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); | |
| 226 #endif | |
| 227 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs())
; | |
| 228 | |
| 229 void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCach
edDFA); } | |
| 230 void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCach
edNFA); } | |
| 231 void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCach
edPCRE); } | |
| 232 void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCach
edRE2); } | |
| 233 | |
| 234 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
| 235 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
| 236 #ifdef USEPCRE | |
| 237 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs()); | |
| 238 #endif | |
| 239 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
| 240 | |
| 241 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) { | |
| 242 StopBenchmarkTiming(); | |
| 243 string s; | |
| 244 s.append(nbytes/2, 'x'); | |
| 245 string regexp = "^" + s + ".*$"; | |
| 246 string t; | |
| 247 MakeText(&t, nbytes/2); | |
| 248 s += t; | |
| 249 BenchmarkMemoryUsage(); | |
| 250 StartBenchmarkTiming(); | |
| 251 search(iters, regexp.c_str(), s, Prog::kUnanchored, true); | |
| 252 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
| 253 } | |
| 254 | |
| 255 void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCa
chedDFA); } | |
| 256 void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCa
chedNFA); } | |
| 257 void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCa
chedPCRE); } | |
| 258 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCa
chedRE2); } | |
| 259 | |
| 260 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | |
| 261 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | |
| 262 #ifdef USEPCRE | |
| 263 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | |
| 264 #endif | |
| 265 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | |
| 266 | |
| 267 // Benchmark: FindAndConsume | |
| 268 | |
| 269 void FindAndConsume(int iters, int nbytes) { | |
| 270 StopBenchmarkTiming(); | |
| 271 string s; | |
| 272 MakeText(&s, nbytes); | |
| 273 s.append("Hello World"); | |
| 274 StartBenchmarkTiming(); | |
| 275 RE2 re("((Hello World))"); | |
| 276 for (int i = 0; i < iters; i++) { | |
| 277 StringPiece t = s; | |
| 278 StringPiece u; | |
| 279 CHECK(RE2::FindAndConsume(&t, re, &u)); | |
| 280 CHECK_EQ(u, "Hello World"); | |
| 281 } | |
| 282 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
| 283 } | |
| 284 | |
| 285 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 286 | |
| 287 // Benchmark: successful anchored search. | |
| 288 | |
| 289 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
) { | |
| 290 StopBenchmarkTiming(); | |
| 291 string s; | |
| 292 MakeText(&s, nbytes); | |
| 293 BenchmarkMemoryUsage(); | |
| 294 StartBenchmarkTiming(); | |
| 295 search(iters, regexp, s, Prog::kAnchored, true); | |
| 296 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
| 297 } | |
| 298 | |
| 299 // Unambiguous search (RE2 can use OnePass). | |
| 300 | |
| 301 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA
); } | |
| 302 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOne
Pass); } | |
| 303 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCR
E); } | |
| 304 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2
); } | |
| 305 | |
| 306 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 307 #ifdef USEPCRE | |
| 308 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 309 #endif | |
| 310 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 311 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); | |
| 312 | |
| 313 void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedDFA); } | |
| 314 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedOnePass); } | |
| 315 void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedPCRE); } | |
| 316 void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedRE2); } | |
| 317 | |
| 318 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
| 319 #ifdef USEPCRE | |
| 320 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
| 321 #endif | |
| 322 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
| 323 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs(
)); | |
| 324 | |
| 325 // Ambiguous search (RE2 cannot use OnePass). | |
| 326 | |
| 327 void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchD
FA); } | |
| 328 void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchP
CRE); } | |
| 329 void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchR
E2); } | |
| 330 void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", Se
archBitState); } | |
| 331 | |
| 332 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 333 #ifdef USEPCRE | |
| 334 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 335 #endif | |
| 336 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 337 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); | |
| 338 | |
| 339 void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedDFA); } | |
| 340 void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedPCRE); } | |
| 341 void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedRE2); } | |
| 342 | |
| 343 BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
| 344 #ifdef USEPCRE | |
| 345 BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
| 346 #endif | |
| 347 BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
| 348 | |
| 349 // Benchmark: use regexp to find phone number. | |
| 350 | |
| 351 void SearchDigits(int iters, SearchImpl* search) { | |
| 352 StringPiece s("650-253-0001"); | |
| 353 BenchmarkMemoryUsage(); | |
| 354 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true); | |
| 355 SetBenchmarkItemsProcessed(iters); | |
| 356 } | |
| 357 | |
| 358 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); } | |
| 359 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); } | |
| 360 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); } | |
| 361 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); } | |
| 362 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); } | |
| 363 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); } | |
| 364 | |
| 365 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs()); | |
| 366 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs()); | |
| 367 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs()); | |
| 368 #ifdef USEPCRE | |
| 369 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs()); | |
| 370 #endif | |
| 371 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs()); | |
| 372 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs()); | |
| 373 | |
| 374 // Benchmark: use regexp to parse digit fields in phone number. | |
| 375 | |
| 376 void Parse3Digits(int iters, | |
| 377 void (*parse3)(int, const char*, const StringPiece&)) { | |
| 378 BenchmarkMemoryUsage(); | |
| 379 parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001"); | |
| 380 SetBenchmarkItemsProcessed(iters); | |
| 381 } | |
| 382 | |
| 383 void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); } | |
| 384 void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); } | |
| 385 void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); } | |
| 386 void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); } | |
| 387 void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); } | |
| 388 void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); } | |
| 389 | |
| 390 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs()); | |
| 391 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs()); | |
| 392 #ifdef USEPCRE | |
| 393 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs()); | |
| 394 #endif | |
| 395 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs()); | |
| 396 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs()); | |
| 397 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs()); | |
| 398 | |
| 399 void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); } | |
| 400 void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass
); } | |
| 401 void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE);
} | |
| 402 void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); } | |
| 403 void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktra
ck); } | |
| 404 void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState
); } | |
| 405 | |
| 406 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs()); | |
| 407 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs()); | |
| 408 #ifdef USEPCRE | |
| 409 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs()); | |
| 410 #endif | |
| 411 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs()); | |
| 412 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs()); | |
| 413 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs()); | |
| 414 | |
| 415 void Parse3DigitDs(int iters, | |
| 416 void (*parse3)(int, const char*, const StringPiece&)) { | |
| 417 BenchmarkMemoryUsage(); | |
| 418 parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001"); | |
| 419 SetBenchmarkItemsProcessed(iters); | |
| 420 } | |
| 421 | |
| 422 void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); } | |
| 423 void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); } | |
| 424 void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); } | |
| 425 void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); } | |
| 426 void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack);
} | |
| 427 void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); } | |
| 428 | |
| 429 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs()); | |
| 430 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs()); | |
| 431 #ifdef USEPCRE | |
| 432 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs()); | |
| 433 #endif | |
| 434 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs()); | |
| 435 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs()); | |
| 436 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs()); | |
| 437 | |
| 438 void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA);
} | |
| 439 void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePa
ss); } | |
| 440 void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE)
; } | |
| 441 void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2);
} | |
| 442 void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBackt
rack); } | |
| 443 void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitSta
te); } | |
| 444 | |
| 445 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs()); | |
| 446 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs()); | |
| 447 #ifdef USEPCRE | |
| 448 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs()); | |
| 449 #endif | |
| 450 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs()); | |
| 451 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs()); | |
| 452 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs()); | |
| 453 | |
| 454 // Benchmark: splitting off leading number field. | |
| 455 | |
| 456 void Parse1Split(int iters, | |
| 457 void (*parse1)(int, const char*, const StringPiece&)) { | |
| 458 BenchmarkMemoryUsage(); | |
| 459 parse1(iters, "[0-9]+-(.*)", "650-253-0001"); | |
| 460 SetBenchmarkItemsProcessed(iters); | |
| 461 } | |
| 462 | |
| 463 void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); } | |
| 464 void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); } | |
| 465 void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); } | |
| 466 void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); } | |
| 467 void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); } | |
| 468 | |
| 469 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs()); | |
| 470 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs()); | |
| 471 #ifdef USEPCRE | |
| 472 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs()); | |
| 473 #endif | |
| 474 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs()); | |
| 475 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs()); | |
| 476 | |
| 477 void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); } | |
| 478 void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass);
} | |
| 479 void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); } | |
| 480 void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); } | |
| 481 void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitS
tate); } | |
| 482 | |
| 483 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs()); | |
| 484 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs()); | |
| 485 #ifdef USEPCRE | |
| 486 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs()); | |
| 487 #endif | |
| 488 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs()); | |
| 489 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs()); | |
| 490 | |
| 491 // Benchmark: splitting off leading number field but harder (ambiguous regexp). | |
| 492 | |
| 493 void Parse1SplitHard(int iters, | |
| 494 void (*run)(int, const char*, const StringPiece&)) { | |
| 495 BenchmarkMemoryUsage(); | |
| 496 run(iters, "[0-9]+.(.*)", "650-253-0001"); | |
| 497 SetBenchmarkItemsProcessed(iters); | |
| 498 } | |
| 499 | |
| 500 void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); } | |
| 501 void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); } | |
| 502 void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); } | |
| 503 void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState
); } | |
| 504 | |
| 505 #ifdef USEPCRE | |
| 506 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs()); | |
| 507 #endif | |
| 508 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs()); | |
| 509 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs()); | |
| 510 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs()); | |
| 511 | |
| 512 void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA
); } | |
| 513 void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCR
E); } | |
| 514 void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2
); } | |
| 515 void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1Cach
edBitState); } | |
| 516 void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1Cac
hedBacktrack); } | |
| 517 | |
| 518 #ifdef USEPCRE | |
| 519 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs()); | |
| 520 #endif | |
| 521 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs()); | |
| 522 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs()); | |
| 523 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs()); | |
| 524 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs()); | |
| 525 | |
| 526 // Benchmark: Parse1SplitHard, big text, small match. | |
| 527 | |
| 528 void Parse1SplitBig1(int iters, | |
| 529 void (*run)(int, const char*, const StringPiece&)) { | |
| 530 string s; | |
| 531 s.append(100000, 'x'); | |
| 532 s.append("650-253-0001"); | |
| 533 BenchmarkMemoryUsage(); | |
| 534 run(iters, "[0-9]+.(.*)", s); | |
| 535 SetBenchmarkItemsProcessed(iters); | |
| 536 } | |
| 537 | |
| 538 void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1Cac
hedPCRE); } | |
| 539 void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1Cac
hedRE2); } | |
| 540 | |
| 541 #ifdef USEPCRE | |
| 542 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs()); | |
| 543 #endif | |
| 544 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs()); | |
| 545 | |
| 546 // Benchmark: Parse1SplitHard, big text, big match. | |
| 547 | |
| 548 void Parse1SplitBig2(int iters, | |
| 549 void (*run)(int, const char*, const StringPiece&)) { | |
| 550 string s; | |
| 551 s.append("650-253-"); | |
| 552 s.append(100000, '0'); | |
| 553 BenchmarkMemoryUsage(); | |
| 554 run(iters, "[0-9]+.(.*)", s); | |
| 555 SetBenchmarkItemsProcessed(iters); | |
| 556 } | |
| 557 | |
| 558 void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1Cac
hedPCRE); } | |
| 559 void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1Cac
hedRE2); } | |
| 560 | |
| 561 #ifdef USEPCRE | |
| 562 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs()); | |
| 563 #endif | |
| 564 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs()); | |
| 565 | |
| 566 // Benchmark: measure time required to parse (but not execute) | |
| 567 // a simple regular expression. | |
| 568 | |
| 569 void ParseRegexp(int iters, const string& regexp) { | |
| 570 for (int i = 0; i < iters; i++) { | |
| 571 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 572 CHECK(re); | |
| 573 re->Decref(); | |
| 574 } | |
| 575 } | |
| 576 | |
| 577 void SimplifyRegexp(int iters, const string& regexp) { | |
| 578 for (int i = 0; i < iters; i++) { | |
| 579 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 580 CHECK(re); | |
| 581 Regexp* sre = re->Simplify(); | |
| 582 CHECK(sre); | |
| 583 sre->Decref(); | |
| 584 re->Decref(); | |
| 585 } | |
| 586 } | |
| 587 | |
| 588 void NullWalkRegexp(int iters, const string& regexp) { | |
| 589 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 590 CHECK(re); | |
| 591 for (int i = 0; i < iters; i++) { | |
| 592 re->NullWalk(); | |
| 593 } | |
| 594 re->Decref(); | |
| 595 } | |
| 596 | |
| 597 void SimplifyCompileRegexp(int iters, const string& regexp) { | |
| 598 for (int i = 0; i < iters; i++) { | |
| 599 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 600 CHECK(re); | |
| 601 Regexp* sre = re->Simplify(); | |
| 602 CHECK(sre); | |
| 603 Prog* prog = sre->CompileToProg(0); | |
| 604 CHECK(prog); | |
| 605 delete prog; | |
| 606 sre->Decref(); | |
| 607 re->Decref(); | |
| 608 } | |
| 609 } | |
| 610 | |
| 611 void CompileRegexp(int iters, const string& regexp) { | |
| 612 for (int i = 0; i < iters; i++) { | |
| 613 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 614 CHECK(re); | |
| 615 Prog* prog = re->CompileToProg(0); | |
| 616 CHECK(prog); | |
| 617 delete prog; | |
| 618 re->Decref(); | |
| 619 } | |
| 620 } | |
| 621 | |
| 622 void CompileToProg(int iters, const string& regexp) { | |
| 623 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 624 CHECK(re); | |
| 625 for (int i = 0; i < iters; i++) { | |
| 626 Prog* prog = re->CompileToProg(0); | |
| 627 CHECK(prog); | |
| 628 delete prog; | |
| 629 } | |
| 630 re->Decref(); | |
| 631 } | |
| 632 | |
| 633 void CompileByteMap(int iters, const string& regexp) { | |
| 634 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 635 CHECK(re); | |
| 636 Prog* prog = re->CompileToProg(0); | |
| 637 CHECK(prog); | |
| 638 for (int i = 0; i < iters; i++) { | |
| 639 prog->ComputeByteMap(); | |
| 640 } | |
| 641 delete prog; | |
| 642 re->Decref(); | |
| 643 } | |
| 644 | |
| 645 void CompilePCRE(int iters, const string& regexp) { | |
| 646 for (int i = 0; i < iters; i++) { | |
| 647 PCRE re(regexp, PCRE::UTF8); | |
| 648 CHECK_EQ(re.error(), ""); | |
| 649 } | |
| 650 } | |
| 651 | |
| 652 void CompileRE2(int iters, const string& regexp) { | |
| 653 for (int i = 0; i < iters; i++) { | |
| 654 RE2 re(regexp); | |
| 655 CHECK_EQ(re.error(), ""); | |
| 656 } | |
| 657 } | |
| 658 | |
| 659 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&))
{ | |
| 660 run(iters, regexp); | |
| 661 SetBenchmarkItemsProcessed(iters); | |
| 662 } | |
| 663 | |
| 664 } // namespace re2 | |
| 665 | |
| 666 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile bench
marks"); | |
| 667 | |
| 668 namespace re2 { | |
| 669 | |
| 670 void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE
); } | |
| 671 void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp
); } | |
| 672 void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyReg
exp); } | |
| 673 void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToPr
og); } | |
| 674 void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByt
eMap); } | |
| 675 void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRege
xp); } | |
| 676 void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, Simp
lifyCompileRegexp); } | |
| 677 void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkReg
exp); } | |
| 678 void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2)
; } | |
| 679 | |
| 680 #ifdef USEPCRE | |
| 681 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs()); | |
| 682 #endif | |
| 683 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); | |
| 684 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); | |
| 685 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); | |
| 686 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); | |
| 687 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); | |
| 688 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); | |
| 689 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); | |
| 690 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); | |
| 691 | |
| 692 // Makes text of size nbytes, then calls run to search | |
| 693 // the text for regexp iters times. | |
| 694 void SearchPhone(int iters, int nbytes, ParseImpl* search) { | |
| 695 StopBenchmarkTiming(); | |
| 696 string s; | |
| 697 MakeText(&s, nbytes); | |
| 698 s.append("(650) 253-0001"); | |
| 699 BenchmarkMemoryUsage(); | |
| 700 StartBenchmarkTiming(); | |
| 701 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); | |
| 702 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
| 703 } | |
| 704 | |
| 705 void SearchPhone_CachedPCRE(int i, int n) { | |
| 706 SearchPhone(i, n, SearchParse2CachedPCRE); | |
| 707 } | |
| 708 void SearchPhone_CachedRE2(int i, int n) { | |
| 709 SearchPhone(i, n, SearchParse2CachedRE2); | |
| 710 } | |
| 711 | |
| 712 #ifdef USEPCRE | |
| 713 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 714 #endif | |
| 715 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
| 716 | |
| 717 /* | |
| 718 TODO(rsc): Make this work again. | |
| 719 | |
| 720 // Generates and returns a string over binary alphabet {0,1} that contains | |
| 721 // all possible binary sequences of length n as subsequences. The obvious | |
| 722 // brute force method would generate a string of length n * 2^n, but this | |
| 723 // generates a string of length n + 2^n - 1 called a De Bruijn cycle. | |
| 724 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. | |
| 725 static string DeBruijnString(int n) { | |
| 726 CHECK_LT(n, 8*sizeof(int)); | |
| 727 CHECK_GT(n, 0); | |
| 728 | |
| 729 vector<bool> did(1<<n); | |
| 730 for (int i = 0; i < 1<<n; i++) | |
| 731 did[i] = false; | |
| 732 | |
| 733 string s; | |
| 734 for (int i = 0; i < n-1; i++) | |
| 735 s.append("0"); | |
| 736 int bits = 0; | |
| 737 int mask = (1<<n) - 1; | |
| 738 for (int i = 0; i < (1<<n); i++) { | |
| 739 bits <<= 1; | |
| 740 bits &= mask; | |
| 741 if (!did[bits|1]) { | |
| 742 bits |= 1; | |
| 743 s.append("1"); | |
| 744 } else { | |
| 745 s.append("0"); | |
| 746 } | |
| 747 CHECK(!did[bits]); | |
| 748 did[bits] = true; | |
| 749 } | |
| 750 return s; | |
| 751 } | |
| 752 | |
| 753 void CacheFill(int iters, int n, SearchImpl *srch) { | |
| 754 string s = DeBruijnString(n+1); | |
| 755 string t; | |
| 756 for (int i = n+1; i < 20; i++) { | |
| 757 t = s + s; | |
| 758 swap(s, t); | |
| 759 } | |
| 760 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s, | |
| 761 Prog::kUnanchored, true); | |
| 762 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size()); | |
| 763 } | |
| 764 | |
| 765 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); } | |
| 766 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); } | |
| 767 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); } | |
| 768 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); } | |
| 769 | |
| 770 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers | |
| 771 // for the static BenchmarkRegisterer, which makes it unusable inside | |
| 772 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a | |
| 773 // to make the identifiers distinct (only possible when 'a' is a simple | |
| 774 // expression like 2, not like 1+1). | |
| 775 #define MY_BENCHMARK_WITH_ARG(n, a) \ | |
| 776 bool __benchmark_ ## n ## a = \ | |
| 777 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, Num
CPUs()); | |
| 778 | |
| 779 #define DO24(A, B) \ | |
| 780 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \ | |
| 781 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \ | |
| 782 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \ | |
| 783 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24); | |
| 784 | |
| 785 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE) | |
| 786 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA) | |
| 787 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2) | |
| 788 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA) | |
| 789 | |
| 790 #undef DO24 | |
| 791 #undef MY_BENCHMARK_WITH_ARG | |
| 792 */ | |
| 793 | |
| 794 //////////////////////////////////////////////////////////////////////// | |
| 795 // | |
| 796 // Implementation routines. Sad that there are so many, | |
| 797 // but all the interfaces are slightly different. | |
| 798 | |
| 799 // Runs implementation to search for regexp in text, iters times. | |
| 800 // Expect_match says whether the regexp should be found. | |
| 801 // Anchored says whether to run an anchored search. | |
| 802 | |
| 803 void SearchDFA(int iters, const char* regexp, const StringPiece& text, | |
| 804 Prog::Anchor anchor, bool expect_match) { | |
| 805 for (int i = 0; i < iters; i++) { | |
| 806 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 807 CHECK(re); | |
| 808 Prog* prog = re->CompileToProg(0); | |
| 809 CHECK(prog); | |
| 810 bool failed = false; | |
| 811 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch, | |
| 812 NULL, &failed, NULL), | |
| 813 expect_match); | |
| 814 CHECK(!failed); | |
| 815 delete prog; | |
| 816 re->Decref(); | |
| 817 } | |
| 818 } | |
| 819 | |
| 820 void SearchNFA(int iters, const char* regexp, const StringPiece& text, | |
| 821 Prog::Anchor anchor, bool expect_match) { | |
| 822 for (int i = 0; i < iters; i++) { | |
| 823 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 824 CHECK(re); | |
| 825 Prog* prog = re->CompileToProg(0); | |
| 826 CHECK(prog); | |
| 827 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), | |
| 828 expect_match); | |
| 829 delete prog; | |
| 830 re->Decref(); | |
| 831 } | |
| 832 } | |
| 833 | |
| 834 void SearchOnePass(int iters, const char* regexp, const StringPiece& text, | |
| 835 Prog::Anchor anchor, bool expect_match) { | |
| 836 for (int i = 0; i < iters; i++) { | |
| 837 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 838 CHECK(re); | |
| 839 Prog* prog = re->CompileToProg(0); | |
| 840 CHECK(prog); | |
| 841 CHECK(prog->IsOnePass()); | |
| 842 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0)
, | |
| 843 expect_match); | |
| 844 delete prog; | |
| 845 re->Decref(); | |
| 846 } | |
| 847 } | |
| 848 | |
| 849 void SearchBitState(int iters, const char* regexp, const StringPiece& text, | |
| 850 Prog::Anchor anchor, bool expect_match) { | |
| 851 for (int i = 0; i < iters; i++) { | |
| 852 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 853 CHECK(re); | |
| 854 Prog* prog = re->CompileToProg(0); | |
| 855 CHECK(prog); | |
| 856 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0
), | |
| 857 expect_match); | |
| 858 delete prog; | |
| 859 re->Decref(); | |
| 860 } | |
| 861 } | |
| 862 | |
| 863 void SearchPCRE(int iters, const char* regexp, const StringPiece& text, | |
| 864 Prog::Anchor anchor, bool expect_match) { | |
| 865 for (int i = 0; i < iters; i++) { | |
| 866 PCRE re(regexp, PCRE::UTF8); | |
| 867 CHECK_EQ(re.error(), ""); | |
| 868 if (anchor == Prog::kAnchored) | |
| 869 CHECK_EQ(PCRE::FullMatch(text, re), expect_match); | |
| 870 else | |
| 871 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); | |
| 872 } | |
| 873 } | |
| 874 | |
| 875 void SearchRE2(int iters, const char* regexp, const StringPiece& text, | |
| 876 Prog::Anchor anchor, bool expect_match) { | |
| 877 for (int i = 0; i < iters; i++) { | |
| 878 RE2 re(regexp); | |
| 879 CHECK_EQ(re.error(), ""); | |
| 880 if (anchor == Prog::kAnchored) | |
| 881 CHECK_EQ(RE2::FullMatch(text, re), expect_match); | |
| 882 else | |
| 883 CHECK_EQ(RE2::PartialMatch(text, re), expect_match); | |
| 884 } | |
| 885 } | |
| 886 | |
| 887 // SearchCachedXXX is like SearchXXX but only does the | |
| 888 // regexp parsing and compiling once. This lets us measure | |
| 889 // search time without the per-regexp overhead. | |
| 890 | |
| 891 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text, | |
| 892 Prog::Anchor anchor, bool expect_match) { | |
| 893 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 894 CHECK(re); | |
| 895 Prog* prog = re->CompileToProg(1LL<<31); | |
| 896 CHECK(prog); | |
| 897 for (int i = 0; i < iters; i++) { | |
| 898 bool failed = false; | |
| 899 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, | |
| 900 Prog::kFirstMatch, NULL, &failed, NULL), | |
| 901 expect_match); | |
| 902 CHECK(!failed); | |
| 903 } | |
| 904 delete prog; | |
| 905 re->Decref(); | |
| 906 } | |
| 907 | |
| 908 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text, | |
| 909 Prog::Anchor anchor, bool expect_match) { | |
| 910 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 911 CHECK(re); | |
| 912 Prog* prog = re->CompileToProg(0); | |
| 913 CHECK(prog); | |
| 914 for (int i = 0; i < iters; i++) { | |
| 915 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), | |
| 916 expect_match); | |
| 917 } | |
| 918 delete prog; | |
| 919 re->Decref(); | |
| 920 } | |
| 921 | |
| 922 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text, | |
| 923 Prog::Anchor anchor, bool expect_match) { | |
| 924 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 925 CHECK(re); | |
| 926 Prog* prog = re->CompileToProg(0); | |
| 927 CHECK(prog); | |
| 928 CHECK(prog->IsOnePass()); | |
| 929 for (int i = 0; i < iters; i++) | |
| 930 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0)
, | |
| 931 expect_match); | |
| 932 delete prog; | |
| 933 re->Decref(); | |
| 934 } | |
| 935 | |
| 936 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text
, | |
| 937 Prog::Anchor anchor, bool expect_match) { | |
| 938 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 939 CHECK(re); | |
| 940 Prog* prog = re->CompileToProg(0); | |
| 941 CHECK(prog); | |
| 942 for (int i = 0; i < iters; i++) | |
| 943 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0
), | |
| 944 expect_match); | |
| 945 delete prog; | |
| 946 re->Decref(); | |
| 947 } | |
| 948 | |
| 949 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text, | |
| 950 Prog::Anchor anchor, bool expect_match) { | |
| 951 PCRE re(regexp, PCRE::UTF8); | |
| 952 CHECK_EQ(re.error(), ""); | |
| 953 for (int i = 0; i < iters; i++) { | |
| 954 if (anchor == Prog::kAnchored) | |
| 955 CHECK_EQ(PCRE::FullMatch(text, re), expect_match); | |
| 956 else | |
| 957 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); | |
| 958 } | |
| 959 } | |
| 960 | |
| 961 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text, | |
| 962 Prog::Anchor anchor, bool expect_match) { | |
| 963 RE2 re(regexp); | |
| 964 CHECK_EQ(re.error(), ""); | |
| 965 for (int i = 0; i < iters; i++) { | |
| 966 if (anchor == Prog::kAnchored) | |
| 967 CHECK_EQ(RE2::FullMatch(text, re), expect_match); | |
| 968 else | |
| 969 CHECK_EQ(RE2::PartialMatch(text, re), expect_match); | |
| 970 } | |
| 971 } | |
| 972 | |
| 973 | |
| 974 // Runs implementation to full match regexp against text, | |
| 975 // extracting three submatches. Expects match always. | |
| 976 | |
| 977 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) { | |
| 978 for (int i = 0; i < iters; i++) { | |
| 979 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 980 CHECK(re); | |
| 981 Prog* prog = re->CompileToProg(0); | |
| 982 CHECK(prog); | |
| 983 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 984 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4))
; | |
| 985 delete prog; | |
| 986 re->Decref(); | |
| 987 } | |
| 988 } | |
| 989 | |
| 990 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) { | |
| 991 for (int i = 0; i < iters; i++) { | |
| 992 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 993 CHECK(re); | |
| 994 Prog* prog = re->CompileToProg(0); | |
| 995 CHECK(prog); | |
| 996 CHECK(prog->IsOnePass()); | |
| 997 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 998 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
| 999 delete prog; | |
| 1000 re->Decref(); | |
| 1001 } | |
| 1002 } | |
| 1003 | |
| 1004 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) { | |
| 1005 for (int i = 0; i < iters; i++) { | |
| 1006 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1007 CHECK(re); | |
| 1008 Prog* prog = re->CompileToProg(0); | |
| 1009 CHECK(prog); | |
| 1010 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1011 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 4)); | |
| 1012 delete prog; | |
| 1013 re->Decref(); | |
| 1014 } | |
| 1015 } | |
| 1016 | |
| 1017 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) { | |
| 1018 for (int i = 0; i < iters; i++) { | |
| 1019 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1020 CHECK(re); | |
| 1021 Prog* prog = re->CompileToProg(0); | |
| 1022 CHECK(prog); | |
| 1023 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1024 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 4)); | |
| 1025 delete prog; | |
| 1026 re->Decref(); | |
| 1027 } | |
| 1028 } | |
| 1029 | |
| 1030 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) { | |
| 1031 for (int i = 0; i < iters; i++) { | |
| 1032 PCRE re(regexp, PCRE::UTF8); | |
| 1033 CHECK_EQ(re.error(), ""); | |
| 1034 StringPiece sp1, sp2, sp3; | |
| 1035 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
| 1036 } | |
| 1037 } | |
| 1038 | |
| 1039 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) { | |
| 1040 for (int i = 0; i < iters; i++) { | |
| 1041 RE2 re(regexp); | |
| 1042 CHECK_EQ(re.error(), ""); | |
| 1043 StringPiece sp1, sp2, sp3; | |
| 1044 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
| 1045 } | |
| 1046 } | |
| 1047 | |
| 1048 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) { | |
| 1049 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1050 CHECK(re); | |
| 1051 Prog* prog = re->CompileToProg(0); | |
| 1052 CHECK(prog); | |
| 1053 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1054 for (int i = 0; i < iters; i++) { | |
| 1055 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4))
; | |
| 1056 } | |
| 1057 delete prog; | |
| 1058 re->Decref(); | |
| 1059 } | |
| 1060 | |
| 1061 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text)
{ | |
| 1062 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1063 CHECK(re); | |
| 1064 Prog* prog = re->CompileToProg(0); | |
| 1065 CHECK(prog); | |
| 1066 CHECK(prog->IsOnePass()); | |
| 1067 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1068 for (int i = 0; i < iters; i++) | |
| 1069 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
| 1070 delete prog; | |
| 1071 re->Decref(); | |
| 1072 } | |
| 1073 | |
| 1074 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text
) { | |
| 1075 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1076 CHECK(re); | |
| 1077 Prog* prog = re->CompileToProg(0); | |
| 1078 CHECK(prog); | |
| 1079 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1080 for (int i = 0; i < iters; i++) | |
| 1081 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 4)); | |
| 1082 delete prog; | |
| 1083 re->Decref(); | |
| 1084 } | |
| 1085 | |
| 1086 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& tex
t) { | |
| 1087 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1088 CHECK(re); | |
| 1089 Prog* prog = re->CompileToProg(0); | |
| 1090 CHECK(prog); | |
| 1091 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
| 1092 for (int i = 0; i < iters; i++) | |
| 1093 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 4)); | |
| 1094 delete prog; | |
| 1095 re->Decref(); | |
| 1096 } | |
| 1097 | |
| 1098 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) { | |
| 1099 PCRE re(regexp, PCRE::UTF8); | |
| 1100 CHECK_EQ(re.error(), ""); | |
| 1101 StringPiece sp1, sp2, sp3; | |
| 1102 for (int i = 0; i < iters; i++) { | |
| 1103 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
| 1104 } | |
| 1105 } | |
| 1106 | |
| 1107 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) { | |
| 1108 RE2 re(regexp); | |
| 1109 CHECK_EQ(re.error(), ""); | |
| 1110 StringPiece sp1, sp2, sp3; | |
| 1111 for (int i = 0; i < iters; i++) { | |
| 1112 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
| 1113 } | |
| 1114 } | |
| 1115 | |
| 1116 | |
| 1117 // Runs implementation to full match regexp against text, | |
| 1118 // extracting three submatches. Expects match always. | |
| 1119 | |
| 1120 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) { | |
| 1121 for (int i = 0; i < iters; i++) { | |
| 1122 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1123 CHECK(re); | |
| 1124 Prog* prog = re->CompileToProg(0); | |
| 1125 CHECK(prog); | |
| 1126 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1127 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2))
; | |
| 1128 delete prog; | |
| 1129 re->Decref(); | |
| 1130 } | |
| 1131 } | |
| 1132 | |
| 1133 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) { | |
| 1134 for (int i = 0; i < iters; i++) { | |
| 1135 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1136 CHECK(re); | |
| 1137 Prog* prog = re->CompileToProg(0); | |
| 1138 CHECK(prog); | |
| 1139 CHECK(prog->IsOnePass()); | |
| 1140 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1141 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
2)); | |
| 1142 delete prog; | |
| 1143 re->Decref(); | |
| 1144 } | |
| 1145 } | |
| 1146 | |
| 1147 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) { | |
| 1148 for (int i = 0; i < iters; i++) { | |
| 1149 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1150 CHECK(re); | |
| 1151 Prog* prog = re->CompileToProg(0); | |
| 1152 CHECK(prog); | |
| 1153 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1154 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 2)); | |
| 1155 delete prog; | |
| 1156 re->Decref(); | |
| 1157 } | |
| 1158 } | |
| 1159 | |
| 1160 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) { | |
| 1161 for (int i = 0; i < iters; i++) { | |
| 1162 PCRE re(regexp, PCRE::UTF8); | |
| 1163 CHECK_EQ(re.error(), ""); | |
| 1164 StringPiece sp1; | |
| 1165 CHECK(PCRE::FullMatch(text, re, &sp1)); | |
| 1166 } | |
| 1167 } | |
| 1168 | |
| 1169 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) { | |
| 1170 for (int i = 0; i < iters; i++) { | |
| 1171 RE2 re(regexp); | |
| 1172 CHECK_EQ(re.error(), ""); | |
| 1173 StringPiece sp1; | |
| 1174 CHECK(RE2::FullMatch(text, re, &sp1)); | |
| 1175 } | |
| 1176 } | |
| 1177 | |
| 1178 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) { | |
| 1179 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1180 CHECK(re); | |
| 1181 Prog* prog = re->CompileToProg(0); | |
| 1182 CHECK(prog); | |
| 1183 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1184 for (int i = 0; i < iters; i++) { | |
| 1185 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2))
; | |
| 1186 } | |
| 1187 delete prog; | |
| 1188 re->Decref(); | |
| 1189 } | |
| 1190 | |
| 1191 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text)
{ | |
| 1192 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1193 CHECK(re); | |
| 1194 Prog* prog = re->CompileToProg(0); | |
| 1195 CHECK(prog); | |
| 1196 CHECK(prog->IsOnePass()); | |
| 1197 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1198 for (int i = 0; i < iters; i++) | |
| 1199 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
2)); | |
| 1200 delete prog; | |
| 1201 re->Decref(); | |
| 1202 } | |
| 1203 | |
| 1204 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text
) { | |
| 1205 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1206 CHECK(re); | |
| 1207 Prog* prog = re->CompileToProg(0); | |
| 1208 CHECK(prog); | |
| 1209 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1210 for (int i = 0; i < iters; i++) | |
| 1211 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 2)); | |
| 1212 delete prog; | |
| 1213 re->Decref(); | |
| 1214 } | |
| 1215 | |
| 1216 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& tex
t) { | |
| 1217 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
| 1218 CHECK(re); | |
| 1219 Prog* prog = re->CompileToProg(0); | |
| 1220 CHECK(prog); | |
| 1221 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
| 1222 for (int i = 0; i < iters; i++) | |
| 1223 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 2)); | |
| 1224 delete prog; | |
| 1225 re->Decref(); | |
| 1226 } | |
| 1227 | |
| 1228 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) { | |
| 1229 PCRE re(regexp, PCRE::UTF8); | |
| 1230 CHECK_EQ(re.error(), ""); | |
| 1231 StringPiece sp1; | |
| 1232 for (int i = 0; i < iters; i++) { | |
| 1233 CHECK(PCRE::FullMatch(text, re, &sp1)); | |
| 1234 } | |
| 1235 } | |
| 1236 | |
| 1237 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) { | |
| 1238 RE2 re(regexp); | |
| 1239 CHECK_EQ(re.error(), ""); | |
| 1240 StringPiece sp1; | |
| 1241 for (int i = 0; i < iters; i++) { | |
| 1242 CHECK(RE2::FullMatch(text, re, &sp1)); | |
| 1243 } | |
| 1244 } | |
| 1245 | |
| 1246 void SearchParse2CachedPCRE(int iters, const char* regexp, | |
| 1247 const StringPiece& text) { | |
| 1248 PCRE re(regexp, PCRE::UTF8); | |
| 1249 CHECK_EQ(re.error(), ""); | |
| 1250 for (int i = 0; i < iters; i++) { | |
| 1251 StringPiece sp1, sp2; | |
| 1252 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2)); | |
| 1253 } | |
| 1254 } | |
| 1255 | |
| 1256 void SearchParse2CachedRE2(int iters, const char* regexp, | |
| 1257 const StringPiece& text) { | |
| 1258 RE2 re(regexp); | |
| 1259 CHECK_EQ(re.error(), ""); | |
| 1260 for (int i = 0; i < iters; i++) { | |
| 1261 StringPiece sp1, sp2; | |
| 1262 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2)); | |
| 1263 } | |
| 1264 } | |
| 1265 | |
| 1266 void SearchParse1CachedPCRE(int iters, const char* regexp, | |
| 1267 const StringPiece& text) { | |
| 1268 PCRE re(regexp, PCRE::UTF8); | |
| 1269 CHECK_EQ(re.error(), ""); | |
| 1270 for (int i = 0; i < iters; i++) { | |
| 1271 StringPiece sp1; | |
| 1272 CHECK(PCRE::PartialMatch(text, re, &sp1)); | |
| 1273 } | |
| 1274 } | |
| 1275 | |
| 1276 void SearchParse1CachedRE2(int iters, const char* regexp, | |
| 1277 const StringPiece& text) { | |
| 1278 RE2 re(regexp); | |
| 1279 CHECK_EQ(re.error(), ""); | |
| 1280 for (int i = 0; i < iters; i++) { | |
| 1281 StringPiece sp1; | |
| 1282 CHECK(RE2::PartialMatch(text, re, &sp1)); | |
| 1283 } | |
| 1284 } | |
| 1285 | |
| 1286 void EmptyPartialMatchPCRE(int n) { | |
| 1287 PCRE re(""); | |
| 1288 for (int i = 0; i < n; i++) { | |
| 1289 PCRE::PartialMatch("", re); | |
| 1290 } | |
| 1291 } | |
| 1292 | |
| 1293 void EmptyPartialMatchRE2(int n) { | |
| 1294 RE2 re(""); | |
| 1295 for (int i = 0; i < n; i++) { | |
| 1296 RE2::PartialMatch("", re); | |
| 1297 } | |
| 1298 } | |
| 1299 #ifdef USEPCRE | |
| 1300 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1301 #endif | |
| 1302 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1303 | |
| 1304 void SimplePartialMatchPCRE(int n) { | |
| 1305 PCRE re("abcdefg"); | |
| 1306 for (int i = 0; i < n; i++) { | |
| 1307 PCRE::PartialMatch("abcdefg", re); | |
| 1308 } | |
| 1309 } | |
| 1310 | |
| 1311 void SimplePartialMatchRE2(int n) { | |
| 1312 RE2 re("abcdefg"); | |
| 1313 for (int i = 0; i < n; i++) { | |
| 1314 RE2::PartialMatch("abcdefg", re); | |
| 1315 } | |
| 1316 } | |
| 1317 #ifdef USEPCRE | |
| 1318 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1319 #endif | |
| 1320 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1321 | |
| 1322 static string http_text = | |
| 1323 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" | |
| 1324 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1"; | |
| 1325 | |
| 1326 void HTTPPartialMatchPCRE(int n) { | |
| 1327 StringPiece a; | |
| 1328 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
| 1329 for (int i = 0; i < n; i++) { | |
| 1330 PCRE::PartialMatch(http_text, re, &a); | |
| 1331 } | |
| 1332 } | |
| 1333 | |
| 1334 void HTTPPartialMatchRE2(int n) { | |
| 1335 StringPiece a; | |
| 1336 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
| 1337 for (int i = 0; i < n; i++) { | |
| 1338 RE2::PartialMatch(http_text, re, &a); | |
| 1339 } | |
| 1340 } | |
| 1341 | |
| 1342 #ifdef USEPCRE | |
| 1343 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1344 #endif | |
| 1345 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1346 | |
| 1347 static string http_smalltext = | |
| 1348 "GET /abc HTTP/1.1"; | |
| 1349 | |
| 1350 void SmallHTTPPartialMatchPCRE(int n) { | |
| 1351 StringPiece a; | |
| 1352 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
| 1353 for (int i = 0; i < n; i++) { | |
| 1354 PCRE::PartialMatch(http_text, re, &a); | |
| 1355 } | |
| 1356 } | |
| 1357 | |
| 1358 void SmallHTTPPartialMatchRE2(int n) { | |
| 1359 StringPiece a; | |
| 1360 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
| 1361 for (int i = 0; i < n; i++) { | |
| 1362 RE2::PartialMatch(http_text, re, &a); | |
| 1363 } | |
| 1364 } | |
| 1365 | |
| 1366 #ifdef USEPCRE | |
| 1367 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1368 #endif | |
| 1369 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1370 | |
| 1371 void DotMatchPCRE(int n) { | |
| 1372 StringPiece a; | |
| 1373 PCRE re("(?-s)^(.+)"); | |
| 1374 for (int i = 0; i < n; i++) { | |
| 1375 PCRE::PartialMatch(http_text, re, &a); | |
| 1376 } | |
| 1377 } | |
| 1378 | |
| 1379 void DotMatchRE2(int n) { | |
| 1380 StringPiece a; | |
| 1381 RE2 re("(?-s)^(.+)"); | |
| 1382 for (int i = 0; i < n; i++) { | |
| 1383 RE2::PartialMatch(http_text, re, &a); | |
| 1384 } | |
| 1385 } | |
| 1386 | |
| 1387 #ifdef USEPCRE | |
| 1388 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1389 #endif | |
| 1390 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1391 | |
| 1392 void ASCIIMatchPCRE(int n) { | |
| 1393 StringPiece a; | |
| 1394 PCRE re("(?-s)^([ -~]+)"); | |
| 1395 for (int i = 0; i < n; i++) { | |
| 1396 PCRE::PartialMatch(http_text, re, &a); | |
| 1397 } | |
| 1398 } | |
| 1399 | |
| 1400 void ASCIIMatchRE2(int n) { | |
| 1401 StringPiece a; | |
| 1402 RE2 re("(?-s)^([ -~]+)"); | |
| 1403 for (int i = 0; i < n; i++) { | |
| 1404 RE2::PartialMatch(http_text, re, &a); | |
| 1405 } | |
| 1406 } | |
| 1407 | |
| 1408 #ifdef USEPCRE | |
| 1409 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); | |
| 1410 #endif | |
| 1411 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); | |
| 1412 | |
| 1413 void FullMatchPCRE(int iter, int n, const char *regexp) { | |
| 1414 StopBenchmarkTiming(); | |
| 1415 string s; | |
| 1416 MakeText(&s, n); | |
| 1417 s += "ABCDEFGHIJ"; | |
| 1418 BenchmarkMemoryUsage(); | |
| 1419 PCRE re(regexp); | |
| 1420 StartBenchmarkTiming(); | |
| 1421 for (int i = 0; i < iter; i++) | |
| 1422 CHECK(PCRE::FullMatch(s, re)); | |
| 1423 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); | |
| 1424 } | |
| 1425 | |
| 1426 void FullMatchRE2(int iter, int n, const char *regexp) { | |
| 1427 StopBenchmarkTiming(); | |
| 1428 string s; | |
| 1429 MakeText(&s, n); | |
| 1430 s += "ABCDEFGHIJ"; | |
| 1431 BenchmarkMemoryUsage(); | |
| 1432 RE2 re(regexp, RE2::Latin1); | |
| 1433 StartBenchmarkTiming(); | |
| 1434 for (int i = 0; i < iter; i++) | |
| 1435 CHECK(RE2::FullMatch(s, re)); | |
| 1436 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); | |
| 1437 } | |
| 1438 | |
| 1439 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*");
} | |
| 1440 void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*");
} | |
| 1441 | |
| 1442 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s
).*$"); } | |
| 1443 void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)
.*$"); } | |
| 1444 | |
| 1445 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?
s)((.*)()()($))"); } | |
| 1446 void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s
)((.*)()()($))"); } | |
| 1447 | |
| 1448 #ifdef USEPCRE | |
| 1449 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20); | |
| 1450 #endif | |
| 1451 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20); | |
| 1452 | |
| 1453 #ifdef USEPCRE | |
| 1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); | |
| 1455 #endif | |
| 1456 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); | |
| 1457 | |
| 1458 #ifdef USEPCRE | |
| 1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); | |
| 1460 #endif | |
| 1461 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); | |
| 1462 | |
| 1463 } // namespace re2 | |
| OLD | NEW |