OLD | NEW |
1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Benchmarks for regular expression implementations. | 5 // Benchmarks for regular expression implementations. |
6 | 6 |
7 #include "util/test.h" | 7 #include "util/test.h" |
8 #include "re2/prog.h" | 8 #include "re2/prog.h" |
9 #include "re2/re2.h" | 9 #include "re2/re2.h" |
10 #include "re2/regexp.h" | 10 #include "re2/regexp.h" |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 | 128 |
129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; | 129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; |
130 | 130 |
131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; | 131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; |
132 | 132 |
133 // Benchmark: failed search for regexp in random text. | 133 // Benchmark: failed search for regexp in random text. |
134 | 134 |
135 // Generate random text that won't contain the search string, | 135 // Generate random text that won't contain the search string, |
136 // to test worst-case search behavior. | 136 // to test worst-case search behavior. |
137 void MakeText(string* text, int nbytes) { | 137 void MakeText(string* text, int nbytes) { |
| 138 srand(1); |
138 text->resize(nbytes); | 139 text->resize(nbytes); |
139 srand(0); | |
140 for (int i = 0; i < nbytes; i++) { | 140 for (int i = 0; i < nbytes; i++) { |
141 if (!rand()%30) | 141 // Generate a one-byte rune that isn't a control character (e.g. '\n'). |
142 (*text)[i] = '\n'; | 142 // Clipping to 0x20 introduces some bias, but we don't need uniformity. |
143 else | 143 int byte = rand() & 0x7F; |
144 (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20; | 144 if (byte < 0x20) |
| 145 byte = 0x20; |
| 146 (*text)[i] = byte; |
145 } | 147 } |
146 } | 148 } |
147 | 149 |
148 // Makes text of size nbytes, then calls run to search | 150 // Makes text of size nbytes, then calls run to search |
149 // the text for regexp iters times. | 151 // the text for regexp iters times. |
150 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { | 152 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { |
151 StopBenchmarkTiming(); | 153 StopBenchmarkTiming(); |
152 string s; | 154 string s; |
153 MakeText(&s, nbytes); | 155 MakeText(&s, nbytes); |
154 BenchmarkMemoryUsage(); | 156 BenchmarkMemoryUsage(); |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
256 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCa
chedRE2); } | 258 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCa
chedRE2); } |
257 | 259 |
258 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | 260 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs
()); |
259 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | 261 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPU
s()); |
260 #ifdef USEPCRE | 262 #ifdef USEPCRE |
261 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | 263 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPU
s()); |
262 #endif | 264 #endif |
263 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | 265 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs
()); |
264 | 266 |
265 // Benchmark: FindAndConsume | 267 // Benchmark: FindAndConsume |
| 268 |
266 void FindAndConsume(int iters, int nbytes) { | 269 void FindAndConsume(int iters, int nbytes) { |
267 StopBenchmarkTiming(); | 270 StopBenchmarkTiming(); |
268 string s; | 271 string s; |
269 MakeText(&s, nbytes); | 272 MakeText(&s, nbytes); |
270 s.append("Hello World"); | 273 s.append("Hello World"); |
271 StartBenchmarkTiming(); | 274 StartBenchmarkTiming(); |
272 RE2 re("((Hello World))"); | 275 RE2 re("((Hello World))"); |
273 for (int i = 0; i < iters; i++) { | 276 for (int i = 0; i < iters; i++) { |
274 StringPiece t = s; | 277 StringPiece t = s; |
275 StringPiece u; | 278 StringPiece u; |
276 CHECK(RE2::FindAndConsume(&t, re, &u)); | 279 CHECK(RE2::FindAndConsume(&t, re, &u)); |
277 CHECK_EQ(u, "Hello World"); | 280 CHECK_EQ(u, "Hello World"); |
278 } | 281 } |
279 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | 282 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
280 } | 283 } |
281 | 284 |
282 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); | 285 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
283 | 286 |
284 // Benchmark: successful anchored search. | 287 // Benchmark: successful anchored search. |
285 | 288 |
286 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
) { | 289 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
) { |
| 290 StopBenchmarkTiming(); |
287 string s; | 291 string s; |
288 MakeText(&s, nbytes); | 292 MakeText(&s, nbytes); |
289 BenchmarkMemoryUsage(); | 293 BenchmarkMemoryUsage(); |
| 294 StartBenchmarkTiming(); |
290 search(iters, regexp, s, Prog::kAnchored, true); | 295 search(iters, regexp, s, Prog::kAnchored, true); |
291 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | 296 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); |
292 } | 297 } |
293 | 298 |
294 // Unambiguous search (RE2 can use OnePass). | 299 // Unambiguous search (RE2 can use OnePass). |
295 | 300 |
296 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA
); } | 301 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA
); } |
297 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOne
Pass); } | 302 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOne
Pass); } |
298 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCR
E); } | 303 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCR
E); } |
299 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2
); } | 304 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2
); } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
337 | 342 |
338 BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | 343 BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCP
Us()); |
339 #ifdef USEPCRE | 344 #ifdef USEPCRE |
340 BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | 345 BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCP
Us()); |
341 #endif | 346 #endif |
342 BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | 347 BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCP
Us()); |
343 | 348 |
344 // Benchmark: use regexp to find phone number. | 349 // Benchmark: use regexp to find phone number. |
345 | 350 |
346 void SearchDigits(int iters, SearchImpl* search) { | 351 void SearchDigits(int iters, SearchImpl* search) { |
347 const char *text = "650-253-0001"; | 352 StringPiece s("650-253-0001"); |
348 int len = strlen(text); | |
349 BenchmarkMemoryUsage(); | 353 BenchmarkMemoryUsage(); |
350 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", | 354 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true); |
351 StringPiece(text, len), Prog::kAnchored, true); | |
352 SetBenchmarkItemsProcessed(iters); | 355 SetBenchmarkItemsProcessed(iters); |
353 } | 356 } |
354 | 357 |
355 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); } | 358 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); } |
356 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); } | 359 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); } |
357 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); } | 360 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); } |
358 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); } | 361 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); } |
359 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); } | 362 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); } |
360 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); } | 363 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); } |
361 | 364 |
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
679 #endif | 682 #endif |
680 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); | 683 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); |
681 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); | 684 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); |
682 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); | 685 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); |
683 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); | 686 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); |
684 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); | 687 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); |
685 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); | 688 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); |
686 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); | 689 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); |
687 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); | 690 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); |
688 | 691 |
689 | |
690 // Makes text of size nbytes, then calls run to search | 692 // Makes text of size nbytes, then calls run to search |
691 // the text for regexp iters times. | 693 // the text for regexp iters times. |
692 void SearchPhone(int iters, int nbytes, ParseImpl* search) { | 694 void SearchPhone(int iters, int nbytes, ParseImpl* search) { |
693 StopBenchmarkTiming(); | 695 StopBenchmarkTiming(); |
694 string s; | 696 string s; |
695 MakeText(&s, nbytes); | 697 MakeText(&s, nbytes); |
696 s.append("(650) 253-0001"); | 698 s.append("(650) 253-0001"); |
697 BenchmarkMemoryUsage(); | 699 BenchmarkMemoryUsage(); |
698 StartBenchmarkTiming(); | 700 StartBenchmarkTiming(); |
699 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); | 701 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); |
(...skipping 752 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1452 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); | 1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); |
1453 #endif | 1455 #endif |
1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); | 1456 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); |
1455 | 1457 |
1456 #ifdef USEPCRE | 1458 #ifdef USEPCRE |
1457 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); | 1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); |
1458 #endif | 1460 #endif |
1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); | 1461 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); |
1460 | 1462 |
1461 } // namespace re2 | 1463 } // namespace re2 |
OLD | NEW |