OLD | NEW |
| (Empty) |
1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Benchmarks for regular expression implementations. | |
6 | |
7 #include "util/test.h" | |
8 #include "re2/prog.h" | |
9 #include "re2/re2.h" | |
10 #include "re2/regexp.h" | |
11 #include "util/pcre.h" | |
12 #include "util/benchmark.h" | |
13 | |
14 namespace re2 { | |
15 void Test(); | |
16 void MemoryUsage(); | |
17 } // namespace re2 | |
18 | |
19 typedef testing::MallocCounter MallocCounter; | |
20 | |
21 namespace re2 { | |
22 | |
23 void Test() { | |
24 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL); | |
25 CHECK(re); | |
26 Prog* prog = re->CompileToProg(0); | |
27 CHECK(prog); | |
28 CHECK(prog->IsOnePass()); | |
29 const char* text = "650-253-0001"; | |
30 StringPiece sp[4]; | |
31 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4
)); | |
32 CHECK_EQ(sp[0], "650-253-0001"); | |
33 CHECK_EQ(sp[1], "650"); | |
34 CHECK_EQ(sp[2], "253"); | |
35 CHECK_EQ(sp[3], "0001"); | |
36 delete prog; | |
37 re->Decref(); | |
38 LOG(INFO) << "test passed\n"; | |
39 } | |
40 | |
41 void MemoryUsage() { | |
42 const char* regexp = "(\\d+)-(\\d+)-(\\d+)"; | |
43 const char* text = "650-253-0001"; | |
44 { | |
45 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
46 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
47 CHECK(re); | |
48 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly, | |
49 // because LOG(INFO) might do a big allocation before they get evaluated. | |
50 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
51 mc.Reset(); | |
52 | |
53 Prog* prog = re->CompileToProg(0); | |
54 CHECK(prog); | |
55 CHECK(prog->IsOnePass()); | |
56 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
57 mc.Reset(); | |
58 | |
59 StringPiece sp[4]; | |
60 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
61 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
62 delete prog; | |
63 re->Decref(); | |
64 } | |
65 | |
66 { | |
67 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
68 | |
69 PCRE re(regexp, PCRE::UTF8); | |
70 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
71 PCRE::FullMatch(text, re); | |
72 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
73 } | |
74 | |
75 { | |
76 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
77 | |
78 PCRE* re = new PCRE(regexp, PCRE::UTF8); | |
79 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
80 PCRE::FullMatch(text, *re); | |
81 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
82 delete re; | |
83 } | |
84 | |
85 { | |
86 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); | |
87 | |
88 RE2 re(regexp); | |
89 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
90 RE2::FullMatch(text, re); | |
91 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.Pea
kHeapGrowth()); | |
92 } | |
93 | |
94 fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n", | |
95 static_cast<int>(sizeof(PCRE)), | |
96 static_cast<int>(sizeof(RE2)), | |
97 static_cast<int>(sizeof(Prog)), | |
98 static_cast<int>(sizeof(Prog::Inst))); | |
99 } | |
100 | |
101 // Regular expression implementation wrappers. | |
102 // Defined at bottom of file, but they are repetitive | |
103 // and not interesting. | |
104 | |
105 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text, | |
106 Prog::Anchor anchor, bool expect_match); | |
107 | |
108 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, | |
109 SearchPCRE, SearchRE2, | |
110 SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBi
tState, | |
111 SearchCachedPCRE, SearchCachedRE2; | |
112 | |
113 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text); | |
114 | |
115 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, | |
116 Parse1PCRE, Parse1RE2, | |
117 Parse1Backtrack, | |
118 Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState, | |
119 Parse1CachedPCRE, Parse1CachedRE2, | |
120 Parse1CachedBacktrack; | |
121 | |
122 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, | |
123 Parse3PCRE, Parse3RE2, | |
124 Parse3Backtrack, | |
125 Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState, | |
126 Parse3CachedPCRE, Parse3CachedRE2, | |
127 Parse3CachedBacktrack; | |
128 | |
129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; | |
130 | |
131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; | |
132 | |
133 // Benchmark: failed search for regexp in random text. | |
134 | |
135 // Generate random text that won't contain the search string, | |
136 // to test worst-case search behavior. | |
137 void MakeText(string* text, int nbytes) { | |
138 srand(1); | |
139 text->resize(nbytes); | |
140 for (int i = 0; i < nbytes; i++) { | |
141 // Generate a one-byte rune that isn't a control character (e.g. '\n'). | |
142 // Clipping to 0x20 introduces some bias, but we don't need uniformity. | |
143 int byte = rand() & 0x7F; | |
144 if (byte < 0x20) | |
145 byte = 0x20; | |
146 (*text)[i] = byte; | |
147 } | |
148 } | |
149 | |
150 // Makes text of size nbytes, then calls run to search | |
151 // the text for regexp iters times. | |
152 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) { | |
153 StopBenchmarkTiming(); | |
154 string s; | |
155 MakeText(&s, nbytes); | |
156 BenchmarkMemoryUsage(); | |
157 StartBenchmarkTiming(); | |
158 search(iters, regexp, s, Prog::kUnanchored, false); | |
159 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
160 } | |
161 | |
162 // These two are easy because they start with an A, | |
163 // giving the search loop something to memchr for. | |
164 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
165 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" | |
166 | |
167 // This is a little harder, since it starts with a character class | |
168 // and thus can't be memchr'ed. Could look for ABC and work backward, | |
169 // but no one does that. | |
170 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
171 | |
172 // This is a fair amount harder, because of the leading [ -~]*. | |
173 // A bad backtracking implementation will take O(text^2) time to | |
174 // figure out there's no match. | |
175 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" | |
176 | |
177 // This stresses engines that are trying to track parentheses. | |
178 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \ | |
179 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" | |
180 | |
181 void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCached
DFA); } | |
182 void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCached
NFA); } | |
183 void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCached
PCRE); } | |
184 void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCached
RE2); } | |
185 | |
186 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
187 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs(
)); | |
188 #ifdef USEPCRE | |
189 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
190 #endif | |
191 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
192 | |
193 void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCached
DFA); } | |
194 void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCached
NFA); } | |
195 void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCached
PCRE); } | |
196 void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCached
RE2); } | |
197 | |
198 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
199 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs(
)); | |
200 #ifdef USEPCRE | |
201 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
202 #endif | |
203 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()
); | |
204 | |
205 void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCach
edDFA); } | |
206 void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCach
edNFA); } | |
207 void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCach
edPCRE); } | |
208 void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCach
edRE2); } | |
209 | |
210 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
211 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
212 #ifdef USEPCRE | |
213 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
214 #endif | |
215 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
216 | |
217 void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDF
A); } | |
218 void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNF
A); } | |
219 void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPC
RE); } | |
220 void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE
2); } | |
221 | |
222 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs())
; | |
223 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()
); | |
224 #ifdef USEPCRE | |
225 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); | |
226 #endif | |
227 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs())
; | |
228 | |
229 void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCach
edDFA); } | |
230 void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCach
edNFA); } | |
231 void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCach
edPCRE); } | |
232 void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCach
edRE2); } | |
233 | |
234 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
235 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs
()); | |
236 #ifdef USEPCRE | |
237 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs()); | |
238 #endif | |
239 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs(
)); | |
240 | |
241 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) { | |
242 StopBenchmarkTiming(); | |
243 string s; | |
244 s.append(nbytes/2, 'x'); | |
245 string regexp = "^" + s + ".*$"; | |
246 string t; | |
247 MakeText(&t, nbytes/2); | |
248 s += t; | |
249 BenchmarkMemoryUsage(); | |
250 StartBenchmarkTiming(); | |
251 search(iters, regexp.c_str(), s, Prog::kUnanchored, true); | |
252 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
253 } | |
254 | |
255 void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCa
chedDFA); } | |
256 void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCa
chedNFA); } | |
257 void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCa
chedPCRE); } | |
258 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCa
chedRE2); } | |
259 | |
260 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | |
261 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | |
262 #ifdef USEPCRE | |
263 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPU
s()); | |
264 #endif | |
265 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs
()); | |
266 | |
267 // Benchmark: FindAndConsume | |
268 | |
269 void FindAndConsume(int iters, int nbytes) { | |
270 StopBenchmarkTiming(); | |
271 string s; | |
272 MakeText(&s, nbytes); | |
273 s.append("Hello World"); | |
274 StartBenchmarkTiming(); | |
275 RE2 re("((Hello World))"); | |
276 for (int i = 0; i < iters; i++) { | |
277 StringPiece t = s; | |
278 StringPiece u; | |
279 CHECK(RE2::FindAndConsume(&t, re, &u)); | |
280 CHECK_EQ(u, "Hello World"); | |
281 } | |
282 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
283 } | |
284 | |
285 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
286 | |
287 // Benchmark: successful anchored search. | |
288 | |
289 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search
) { | |
290 StopBenchmarkTiming(); | |
291 string s; | |
292 MakeText(&s, nbytes); | |
293 BenchmarkMemoryUsage(); | |
294 StartBenchmarkTiming(); | |
295 search(iters, regexp, s, Prog::kAnchored, true); | |
296 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
297 } | |
298 | |
299 // Unambiguous search (RE2 can use OnePass). | |
300 | |
301 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA
); } | |
302 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOne
Pass); } | |
303 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCR
E); } | |
304 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2
); } | |
305 | |
306 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
307 #ifdef USEPCRE | |
308 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
309 #endif | |
310 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
311 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); | |
312 | |
313 void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedDFA); } | |
314 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedOnePass); } | |
315 void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedPCRE); } | |
316 void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", Sea
rchCachedRE2); } | |
317 | |
318 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
319 #ifdef USEPCRE | |
320 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
321 #endif | |
322 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs
()); | |
323 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs(
)); | |
324 | |
325 // Ambiguous search (RE2 cannot use OnePass). | |
326 | |
327 void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchD
FA); } | |
328 void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchP
CRE); } | |
329 void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchR
E2); } | |
330 void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", Se
archBitState); } | |
331 | |
332 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
333 #ifdef USEPCRE | |
334 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
335 #endif | |
336 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
337 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); | |
338 | |
339 void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedDFA); } | |
340 void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedPCRE); } | |
341 void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$",
SearchCachedRE2); } | |
342 | |
343 BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
344 #ifdef USEPCRE | |
345 BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
346 #endif | |
347 BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCP
Us()); | |
348 | |
349 // Benchmark: use regexp to find phone number. | |
350 | |
351 void SearchDigits(int iters, SearchImpl* search) { | |
352 StringPiece s("650-253-0001"); | |
353 BenchmarkMemoryUsage(); | |
354 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true); | |
355 SetBenchmarkItemsProcessed(iters); | |
356 } | |
357 | |
358 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); } | |
359 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); } | |
360 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); } | |
361 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); } | |
362 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); } | |
363 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); } | |
364 | |
365 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs()); | |
366 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs()); | |
367 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs()); | |
368 #ifdef USEPCRE | |
369 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs()); | |
370 #endif | |
371 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs()); | |
372 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs()); | |
373 | |
374 // Benchmark: use regexp to parse digit fields in phone number. | |
375 | |
376 void Parse3Digits(int iters, | |
377 void (*parse3)(int, const char*, const StringPiece&)) { | |
378 BenchmarkMemoryUsage(); | |
379 parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001"); | |
380 SetBenchmarkItemsProcessed(iters); | |
381 } | |
382 | |
383 void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); } | |
384 void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); } | |
385 void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); } | |
386 void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); } | |
387 void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); } | |
388 void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); } | |
389 | |
390 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs()); | |
391 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs()); | |
392 #ifdef USEPCRE | |
393 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs()); | |
394 #endif | |
395 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs()); | |
396 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs()); | |
397 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs()); | |
398 | |
399 void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); } | |
400 void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass
); } | |
401 void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE);
} | |
402 void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); } | |
403 void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktra
ck); } | |
404 void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState
); } | |
405 | |
406 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs()); | |
407 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs()); | |
408 #ifdef USEPCRE | |
409 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs()); | |
410 #endif | |
411 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs()); | |
412 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs()); | |
413 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs()); | |
414 | |
415 void Parse3DigitDs(int iters, | |
416 void (*parse3)(int, const char*, const StringPiece&)) { | |
417 BenchmarkMemoryUsage(); | |
418 parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001"); | |
419 SetBenchmarkItemsProcessed(iters); | |
420 } | |
421 | |
422 void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); } | |
423 void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); } | |
424 void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); } | |
425 void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); } | |
426 void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack);
} | |
427 void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); } | |
428 | |
429 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs()); | |
430 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs()); | |
431 #ifdef USEPCRE | |
432 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs()); | |
433 #endif | |
434 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs()); | |
435 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs()); | |
436 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs()); | |
437 | |
438 void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA);
} | |
439 void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePa
ss); } | |
440 void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE)
; } | |
441 void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2);
} | |
442 void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBackt
rack); } | |
443 void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitSta
te); } | |
444 | |
445 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs()); | |
446 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs()); | |
447 #ifdef USEPCRE | |
448 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs()); | |
449 #endif | |
450 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs()); | |
451 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs()); | |
452 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs()); | |
453 | |
454 // Benchmark: splitting off leading number field. | |
455 | |
456 void Parse1Split(int iters, | |
457 void (*parse1)(int, const char*, const StringPiece&)) { | |
458 BenchmarkMemoryUsage(); | |
459 parse1(iters, "[0-9]+-(.*)", "650-253-0001"); | |
460 SetBenchmarkItemsProcessed(iters); | |
461 } | |
462 | |
463 void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); } | |
464 void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); } | |
465 void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); } | |
466 void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); } | |
467 void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); } | |
468 | |
469 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs()); | |
470 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs()); | |
471 #ifdef USEPCRE | |
472 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs()); | |
473 #endif | |
474 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs()); | |
475 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs()); | |
476 | |
477 void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); } | |
478 void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass);
} | |
479 void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); } | |
480 void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); } | |
481 void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitS
tate); } | |
482 | |
483 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs()); | |
484 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs()); | |
485 #ifdef USEPCRE | |
486 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs()); | |
487 #endif | |
488 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs()); | |
489 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs()); | |
490 | |
491 // Benchmark: splitting off leading number field but harder (ambiguous regexp). | |
492 | |
493 void Parse1SplitHard(int iters, | |
494 void (*run)(int, const char*, const StringPiece&)) { | |
495 BenchmarkMemoryUsage(); | |
496 run(iters, "[0-9]+.(.*)", "650-253-0001"); | |
497 SetBenchmarkItemsProcessed(iters); | |
498 } | |
499 | |
500 void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); } | |
501 void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); } | |
502 void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); } | |
503 void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState
); } | |
504 | |
505 #ifdef USEPCRE | |
506 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs()); | |
507 #endif | |
508 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs()); | |
509 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs()); | |
510 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs()); | |
511 | |
512 void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA
); } | |
513 void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCR
E); } | |
514 void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2
); } | |
515 void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1Cach
edBitState); } | |
516 void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1Cac
hedBacktrack); } | |
517 | |
518 #ifdef USEPCRE | |
519 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs()); | |
520 #endif | |
521 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs()); | |
522 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs()); | |
523 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs()); | |
524 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs()); | |
525 | |
526 // Benchmark: Parse1SplitHard, big text, small match. | |
527 | |
528 void Parse1SplitBig1(int iters, | |
529 void (*run)(int, const char*, const StringPiece&)) { | |
530 string s; | |
531 s.append(100000, 'x'); | |
532 s.append("650-253-0001"); | |
533 BenchmarkMemoryUsage(); | |
534 run(iters, "[0-9]+.(.*)", s); | |
535 SetBenchmarkItemsProcessed(iters); | |
536 } | |
537 | |
538 void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1Cac
hedPCRE); } | |
539 void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1Cac
hedRE2); } | |
540 | |
541 #ifdef USEPCRE | |
542 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs()); | |
543 #endif | |
544 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs()); | |
545 | |
546 // Benchmark: Parse1SplitHard, big text, big match. | |
547 | |
548 void Parse1SplitBig2(int iters, | |
549 void (*run)(int, const char*, const StringPiece&)) { | |
550 string s; | |
551 s.append("650-253-"); | |
552 s.append(100000, '0'); | |
553 BenchmarkMemoryUsage(); | |
554 run(iters, "[0-9]+.(.*)", s); | |
555 SetBenchmarkItemsProcessed(iters); | |
556 } | |
557 | |
558 void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1Cac
hedPCRE); } | |
559 void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1Cac
hedRE2); } | |
560 | |
561 #ifdef USEPCRE | |
562 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs()); | |
563 #endif | |
564 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs()); | |
565 | |
566 // Benchmark: measure time required to parse (but not execute) | |
567 // a simple regular expression. | |
568 | |
569 void ParseRegexp(int iters, const string& regexp) { | |
570 for (int i = 0; i < iters; i++) { | |
571 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
572 CHECK(re); | |
573 re->Decref(); | |
574 } | |
575 } | |
576 | |
577 void SimplifyRegexp(int iters, const string& regexp) { | |
578 for (int i = 0; i < iters; i++) { | |
579 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
580 CHECK(re); | |
581 Regexp* sre = re->Simplify(); | |
582 CHECK(sre); | |
583 sre->Decref(); | |
584 re->Decref(); | |
585 } | |
586 } | |
587 | |
588 void NullWalkRegexp(int iters, const string& regexp) { | |
589 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
590 CHECK(re); | |
591 for (int i = 0; i < iters; i++) { | |
592 re->NullWalk(); | |
593 } | |
594 re->Decref(); | |
595 } | |
596 | |
597 void SimplifyCompileRegexp(int iters, const string& regexp) { | |
598 for (int i = 0; i < iters; i++) { | |
599 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
600 CHECK(re); | |
601 Regexp* sre = re->Simplify(); | |
602 CHECK(sre); | |
603 Prog* prog = sre->CompileToProg(0); | |
604 CHECK(prog); | |
605 delete prog; | |
606 sre->Decref(); | |
607 re->Decref(); | |
608 } | |
609 } | |
610 | |
611 void CompileRegexp(int iters, const string& regexp) { | |
612 for (int i = 0; i < iters; i++) { | |
613 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
614 CHECK(re); | |
615 Prog* prog = re->CompileToProg(0); | |
616 CHECK(prog); | |
617 delete prog; | |
618 re->Decref(); | |
619 } | |
620 } | |
621 | |
622 void CompileToProg(int iters, const string& regexp) { | |
623 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
624 CHECK(re); | |
625 for (int i = 0; i < iters; i++) { | |
626 Prog* prog = re->CompileToProg(0); | |
627 CHECK(prog); | |
628 delete prog; | |
629 } | |
630 re->Decref(); | |
631 } | |
632 | |
633 void CompileByteMap(int iters, const string& regexp) { | |
634 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
635 CHECK(re); | |
636 Prog* prog = re->CompileToProg(0); | |
637 CHECK(prog); | |
638 for (int i = 0; i < iters; i++) { | |
639 prog->ComputeByteMap(); | |
640 } | |
641 delete prog; | |
642 re->Decref(); | |
643 } | |
644 | |
645 void CompilePCRE(int iters, const string& regexp) { | |
646 for (int i = 0; i < iters; i++) { | |
647 PCRE re(regexp, PCRE::UTF8); | |
648 CHECK_EQ(re.error(), ""); | |
649 } | |
650 } | |
651 | |
652 void CompileRE2(int iters, const string& regexp) { | |
653 for (int i = 0; i < iters; i++) { | |
654 RE2 re(regexp); | |
655 CHECK_EQ(re.error(), ""); | |
656 } | |
657 } | |
658 | |
659 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&))
{ | |
660 run(iters, regexp); | |
661 SetBenchmarkItemsProcessed(iters); | |
662 } | |
663 | |
664 } // namespace re2 | |
665 | |
666 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile bench
marks"); | |
667 | |
668 namespace re2 { | |
669 | |
670 void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE
); } | |
671 void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp
); } | |
672 void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyReg
exp); } | |
673 void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToPr
og); } | |
674 void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByt
eMap); } | |
675 void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRege
xp); } | |
676 void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, Simp
lifyCompileRegexp); } | |
677 void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkReg
exp); } | |
678 void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2)
; } | |
679 | |
680 #ifdef USEPCRE | |
681 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs()); | |
682 #endif | |
683 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); | |
684 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); | |
685 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); | |
686 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); | |
687 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); | |
688 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); | |
689 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); | |
690 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); | |
691 | |
692 // Makes text of size nbytes, then calls run to search | |
693 // the text for regexp iters times. | |
694 void SearchPhone(int iters, int nbytes, ParseImpl* search) { | |
695 StopBenchmarkTiming(); | |
696 string s; | |
697 MakeText(&s, nbytes); | |
698 s.append("(650) 253-0001"); | |
699 BenchmarkMemoryUsage(); | |
700 StartBenchmarkTiming(); | |
701 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s); | |
702 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes); | |
703 } | |
704 | |
705 void SearchPhone_CachedPCRE(int i, int n) { | |
706 SearchPhone(i, n, SearchParse2CachedPCRE); | |
707 } | |
708 void SearchPhone_CachedRE2(int i, int n) { | |
709 SearchPhone(i, n, SearchParse2CachedRE2); | |
710 } | |
711 | |
712 #ifdef USEPCRE | |
713 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
714 #endif | |
715 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); | |
716 | |
717 /* | |
718 TODO(rsc): Make this work again. | |
719 | |
720 // Generates and returns a string over binary alphabet {0,1} that contains | |
721 // all possible binary sequences of length n as subsequences. The obvious | |
722 // brute force method would generate a string of length n * 2^n, but this | |
723 // generates a string of length n + 2^n - 1 called a De Bruijn cycle. | |
724 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. | |
725 static string DeBruijnString(int n) { | |
726 CHECK_LT(n, 8*sizeof(int)); | |
727 CHECK_GT(n, 0); | |
728 | |
729 vector<bool> did(1<<n); | |
730 for (int i = 0; i < 1<<n; i++) | |
731 did[i] = false; | |
732 | |
733 string s; | |
734 for (int i = 0; i < n-1; i++) | |
735 s.append("0"); | |
736 int bits = 0; | |
737 int mask = (1<<n) - 1; | |
738 for (int i = 0; i < (1<<n); i++) { | |
739 bits <<= 1; | |
740 bits &= mask; | |
741 if (!did[bits|1]) { | |
742 bits |= 1; | |
743 s.append("1"); | |
744 } else { | |
745 s.append("0"); | |
746 } | |
747 CHECK(!did[bits]); | |
748 did[bits] = true; | |
749 } | |
750 return s; | |
751 } | |
752 | |
753 void CacheFill(int iters, int n, SearchImpl *srch) { | |
754 string s = DeBruijnString(n+1); | |
755 string t; | |
756 for (int i = n+1; i < 20; i++) { | |
757 t = s + s; | |
758 swap(s, t); | |
759 } | |
760 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s, | |
761 Prog::kUnanchored, true); | |
762 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size()); | |
763 } | |
764 | |
765 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); } | |
766 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); } | |
767 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); } | |
768 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); } | |
769 | |
770 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers | |
771 // for the static BenchmarkRegisterer, which makes it unusable inside | |
772 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a | |
773 // to make the identifiers distinct (only possible when 'a' is a simple | |
774 // expression like 2, not like 1+1). | |
775 #define MY_BENCHMARK_WITH_ARG(n, a) \ | |
776 bool __benchmark_ ## n ## a = \ | |
777 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, Num
CPUs()); | |
778 | |
779 #define DO24(A, B) \ | |
780 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \ | |
781 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \ | |
782 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \ | |
783 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24); | |
784 | |
785 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE) | |
786 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA) | |
787 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2) | |
788 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA) | |
789 | |
790 #undef DO24 | |
791 #undef MY_BENCHMARK_WITH_ARG | |
792 */ | |
793 | |
794 //////////////////////////////////////////////////////////////////////// | |
795 // | |
796 // Implementation routines. Sad that there are so many, | |
797 // but all the interfaces are slightly different. | |
798 | |
799 // Runs implementation to search for regexp in text, iters times. | |
800 // Expect_match says whether the regexp should be found. | |
801 // Anchored says whether to run an anchored search. | |
802 | |
803 void SearchDFA(int iters, const char* regexp, const StringPiece& text, | |
804 Prog::Anchor anchor, bool expect_match) { | |
805 for (int i = 0; i < iters; i++) { | |
806 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
807 CHECK(re); | |
808 Prog* prog = re->CompileToProg(0); | |
809 CHECK(prog); | |
810 bool failed = false; | |
811 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch, | |
812 NULL, &failed, NULL), | |
813 expect_match); | |
814 CHECK(!failed); | |
815 delete prog; | |
816 re->Decref(); | |
817 } | |
818 } | |
819 | |
820 void SearchNFA(int iters, const char* regexp, const StringPiece& text, | |
821 Prog::Anchor anchor, bool expect_match) { | |
822 for (int i = 0; i < iters; i++) { | |
823 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
824 CHECK(re); | |
825 Prog* prog = re->CompileToProg(0); | |
826 CHECK(prog); | |
827 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), | |
828 expect_match); | |
829 delete prog; | |
830 re->Decref(); | |
831 } | |
832 } | |
833 | |
834 void SearchOnePass(int iters, const char* regexp, const StringPiece& text, | |
835 Prog::Anchor anchor, bool expect_match) { | |
836 for (int i = 0; i < iters; i++) { | |
837 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
838 CHECK(re); | |
839 Prog* prog = re->CompileToProg(0); | |
840 CHECK(prog); | |
841 CHECK(prog->IsOnePass()); | |
842 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0)
, | |
843 expect_match); | |
844 delete prog; | |
845 re->Decref(); | |
846 } | |
847 } | |
848 | |
849 void SearchBitState(int iters, const char* regexp, const StringPiece& text, | |
850 Prog::Anchor anchor, bool expect_match) { | |
851 for (int i = 0; i < iters; i++) { | |
852 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
853 CHECK(re); | |
854 Prog* prog = re->CompileToProg(0); | |
855 CHECK(prog); | |
856 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0
), | |
857 expect_match); | |
858 delete prog; | |
859 re->Decref(); | |
860 } | |
861 } | |
862 | |
863 void SearchPCRE(int iters, const char* regexp, const StringPiece& text, | |
864 Prog::Anchor anchor, bool expect_match) { | |
865 for (int i = 0; i < iters; i++) { | |
866 PCRE re(regexp, PCRE::UTF8); | |
867 CHECK_EQ(re.error(), ""); | |
868 if (anchor == Prog::kAnchored) | |
869 CHECK_EQ(PCRE::FullMatch(text, re), expect_match); | |
870 else | |
871 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); | |
872 } | |
873 } | |
874 | |
875 void SearchRE2(int iters, const char* regexp, const StringPiece& text, | |
876 Prog::Anchor anchor, bool expect_match) { | |
877 for (int i = 0; i < iters; i++) { | |
878 RE2 re(regexp); | |
879 CHECK_EQ(re.error(), ""); | |
880 if (anchor == Prog::kAnchored) | |
881 CHECK_EQ(RE2::FullMatch(text, re), expect_match); | |
882 else | |
883 CHECK_EQ(RE2::PartialMatch(text, re), expect_match); | |
884 } | |
885 } | |
886 | |
887 // SearchCachedXXX is like SearchXXX but only does the | |
888 // regexp parsing and compiling once. This lets us measure | |
889 // search time without the per-regexp overhead. | |
890 | |
891 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text, | |
892 Prog::Anchor anchor, bool expect_match) { | |
893 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
894 CHECK(re); | |
895 Prog* prog = re->CompileToProg(1LL<<31); | |
896 CHECK(prog); | |
897 for (int i = 0; i < iters; i++) { | |
898 bool failed = false; | |
899 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, | |
900 Prog::kFirstMatch, NULL, &failed, NULL), | |
901 expect_match); | |
902 CHECK(!failed); | |
903 } | |
904 delete prog; | |
905 re->Decref(); | |
906 } | |
907 | |
908 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text, | |
909 Prog::Anchor anchor, bool expect_match) { | |
910 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
911 CHECK(re); | |
912 Prog* prog = re->CompileToProg(0); | |
913 CHECK(prog); | |
914 for (int i = 0; i < iters; i++) { | |
915 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0), | |
916 expect_match); | |
917 } | |
918 delete prog; | |
919 re->Decref(); | |
920 } | |
921 | |
922 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text, | |
923 Prog::Anchor anchor, bool expect_match) { | |
924 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
925 CHECK(re); | |
926 Prog* prog = re->CompileToProg(0); | |
927 CHECK(prog); | |
928 CHECK(prog->IsOnePass()); | |
929 for (int i = 0; i < iters; i++) | |
930 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0)
, | |
931 expect_match); | |
932 delete prog; | |
933 re->Decref(); | |
934 } | |
935 | |
936 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text
, | |
937 Prog::Anchor anchor, bool expect_match) { | |
938 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
939 CHECK(re); | |
940 Prog* prog = re->CompileToProg(0); | |
941 CHECK(prog); | |
942 for (int i = 0; i < iters; i++) | |
943 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0
), | |
944 expect_match); | |
945 delete prog; | |
946 re->Decref(); | |
947 } | |
948 | |
949 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text, | |
950 Prog::Anchor anchor, bool expect_match) { | |
951 PCRE re(regexp, PCRE::UTF8); | |
952 CHECK_EQ(re.error(), ""); | |
953 for (int i = 0; i < iters; i++) { | |
954 if (anchor == Prog::kAnchored) | |
955 CHECK_EQ(PCRE::FullMatch(text, re), expect_match); | |
956 else | |
957 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); | |
958 } | |
959 } | |
960 | |
961 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text, | |
962 Prog::Anchor anchor, bool expect_match) { | |
963 RE2 re(regexp); | |
964 CHECK_EQ(re.error(), ""); | |
965 for (int i = 0; i < iters; i++) { | |
966 if (anchor == Prog::kAnchored) | |
967 CHECK_EQ(RE2::FullMatch(text, re), expect_match); | |
968 else | |
969 CHECK_EQ(RE2::PartialMatch(text, re), expect_match); | |
970 } | |
971 } | |
972 | |
973 | |
974 // Runs implementation to full match regexp against text, | |
975 // extracting three submatches. Expects match always. | |
976 | |
977 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) { | |
978 for (int i = 0; i < iters; i++) { | |
979 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
980 CHECK(re); | |
981 Prog* prog = re->CompileToProg(0); | |
982 CHECK(prog); | |
983 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
984 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4))
; | |
985 delete prog; | |
986 re->Decref(); | |
987 } | |
988 } | |
989 | |
990 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) { | |
991 for (int i = 0; i < iters; i++) { | |
992 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
993 CHECK(re); | |
994 Prog* prog = re->CompileToProg(0); | |
995 CHECK(prog); | |
996 CHECK(prog->IsOnePass()); | |
997 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
998 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
999 delete prog; | |
1000 re->Decref(); | |
1001 } | |
1002 } | |
1003 | |
1004 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) { | |
1005 for (int i = 0; i < iters; i++) { | |
1006 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1007 CHECK(re); | |
1008 Prog* prog = re->CompileToProg(0); | |
1009 CHECK(prog); | |
1010 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1011 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 4)); | |
1012 delete prog; | |
1013 re->Decref(); | |
1014 } | |
1015 } | |
1016 | |
1017 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) { | |
1018 for (int i = 0; i < iters; i++) { | |
1019 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1020 CHECK(re); | |
1021 Prog* prog = re->CompileToProg(0); | |
1022 CHECK(prog); | |
1023 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1024 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 4)); | |
1025 delete prog; | |
1026 re->Decref(); | |
1027 } | |
1028 } | |
1029 | |
1030 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) { | |
1031 for (int i = 0; i < iters; i++) { | |
1032 PCRE re(regexp, PCRE::UTF8); | |
1033 CHECK_EQ(re.error(), ""); | |
1034 StringPiece sp1, sp2, sp3; | |
1035 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
1036 } | |
1037 } | |
1038 | |
1039 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) { | |
1040 for (int i = 0; i < iters; i++) { | |
1041 RE2 re(regexp); | |
1042 CHECK_EQ(re.error(), ""); | |
1043 StringPiece sp1, sp2, sp3; | |
1044 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
1045 } | |
1046 } | |
1047 | |
1048 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) { | |
1049 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1050 CHECK(re); | |
1051 Prog* prog = re->CompileToProg(0); | |
1052 CHECK(prog); | |
1053 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1054 for (int i = 0; i < iters; i++) { | |
1055 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4))
; | |
1056 } | |
1057 delete prog; | |
1058 re->Decref(); | |
1059 } | |
1060 | |
1061 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text)
{ | |
1062 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1063 CHECK(re); | |
1064 Prog* prog = re->CompileToProg(0); | |
1065 CHECK(prog); | |
1066 CHECK(prog->IsOnePass()); | |
1067 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1068 for (int i = 0; i < iters; i++) | |
1069 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
4)); | |
1070 delete prog; | |
1071 re->Decref(); | |
1072 } | |
1073 | |
1074 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text
) { | |
1075 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1076 CHECK(re); | |
1077 Prog* prog = re->CompileToProg(0); | |
1078 CHECK(prog); | |
1079 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1080 for (int i = 0; i < iters; i++) | |
1081 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 4)); | |
1082 delete prog; | |
1083 re->Decref(); | |
1084 } | |
1085 | |
1086 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& tex
t) { | |
1087 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1088 CHECK(re); | |
1089 Prog* prog = re->CompileToProg(0); | |
1090 CHECK(prog); | |
1091 StringPiece sp[4]; // 4 because sp[0] is whole match. | |
1092 for (int i = 0; i < iters; i++) | |
1093 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 4)); | |
1094 delete prog; | |
1095 re->Decref(); | |
1096 } | |
1097 | |
1098 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) { | |
1099 PCRE re(regexp, PCRE::UTF8); | |
1100 CHECK_EQ(re.error(), ""); | |
1101 StringPiece sp1, sp2, sp3; | |
1102 for (int i = 0; i < iters; i++) { | |
1103 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
1104 } | |
1105 } | |
1106 | |
1107 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) { | |
1108 RE2 re(regexp); | |
1109 CHECK_EQ(re.error(), ""); | |
1110 StringPiece sp1, sp2, sp3; | |
1111 for (int i = 0; i < iters; i++) { | |
1112 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); | |
1113 } | |
1114 } | |
1115 | |
1116 | |
1117 // Runs implementation to full match regexp against text, | |
1118 // extracting three submatches. Expects match always. | |
1119 | |
1120 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) { | |
1121 for (int i = 0; i < iters; i++) { | |
1122 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1123 CHECK(re); | |
1124 Prog* prog = re->CompileToProg(0); | |
1125 CHECK(prog); | |
1126 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1127 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2))
; | |
1128 delete prog; | |
1129 re->Decref(); | |
1130 } | |
1131 } | |
1132 | |
1133 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) { | |
1134 for (int i = 0; i < iters; i++) { | |
1135 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1136 CHECK(re); | |
1137 Prog* prog = re->CompileToProg(0); | |
1138 CHECK(prog); | |
1139 CHECK(prog->IsOnePass()); | |
1140 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1141 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
2)); | |
1142 delete prog; | |
1143 re->Decref(); | |
1144 } | |
1145 } | |
1146 | |
1147 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) { | |
1148 for (int i = 0; i < iters; i++) { | |
1149 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1150 CHECK(re); | |
1151 Prog* prog = re->CompileToProg(0); | |
1152 CHECK(prog); | |
1153 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1154 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 2)); | |
1155 delete prog; | |
1156 re->Decref(); | |
1157 } | |
1158 } | |
1159 | |
1160 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) { | |
1161 for (int i = 0; i < iters; i++) { | |
1162 PCRE re(regexp, PCRE::UTF8); | |
1163 CHECK_EQ(re.error(), ""); | |
1164 StringPiece sp1; | |
1165 CHECK(PCRE::FullMatch(text, re, &sp1)); | |
1166 } | |
1167 } | |
1168 | |
1169 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) { | |
1170 for (int i = 0; i < iters; i++) { | |
1171 RE2 re(regexp); | |
1172 CHECK_EQ(re.error(), ""); | |
1173 StringPiece sp1; | |
1174 CHECK(RE2::FullMatch(text, re, &sp1)); | |
1175 } | |
1176 } | |
1177 | |
1178 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) { | |
1179 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1180 CHECK(re); | |
1181 Prog* prog = re->CompileToProg(0); | |
1182 CHECK(prog); | |
1183 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1184 for (int i = 0; i < iters; i++) { | |
1185 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2))
; | |
1186 } | |
1187 delete prog; | |
1188 re->Decref(); | |
1189 } | |
1190 | |
1191 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text)
{ | |
1192 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1193 CHECK(re); | |
1194 Prog* prog = re->CompileToProg(0); | |
1195 CHECK(prog); | |
1196 CHECK(prog->IsOnePass()); | |
1197 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1198 for (int i = 0; i < iters; i++) | |
1199 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp,
2)); | |
1200 delete prog; | |
1201 re->Decref(); | |
1202 } | |
1203 | |
1204 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text
) { | |
1205 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1206 CHECK(re); | |
1207 Prog* prog = re->CompileToProg(0); | |
1208 CHECK(prog); | |
1209 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1210 for (int i = 0; i < iters; i++) | |
1211 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp
, 2)); | |
1212 delete prog; | |
1213 re->Decref(); | |
1214 } | |
1215 | |
1216 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& tex
t) { | |
1217 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); | |
1218 CHECK(re); | |
1219 Prog* prog = re->CompileToProg(0); | |
1220 CHECK(prog); | |
1221 StringPiece sp[2]; // 2 because sp[0] is whole match. | |
1222 for (int i = 0; i < iters; i++) | |
1223 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMa
tch, sp, 2)); | |
1224 delete prog; | |
1225 re->Decref(); | |
1226 } | |
1227 | |
1228 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) { | |
1229 PCRE re(regexp, PCRE::UTF8); | |
1230 CHECK_EQ(re.error(), ""); | |
1231 StringPiece sp1; | |
1232 for (int i = 0; i < iters; i++) { | |
1233 CHECK(PCRE::FullMatch(text, re, &sp1)); | |
1234 } | |
1235 } | |
1236 | |
1237 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) { | |
1238 RE2 re(regexp); | |
1239 CHECK_EQ(re.error(), ""); | |
1240 StringPiece sp1; | |
1241 for (int i = 0; i < iters; i++) { | |
1242 CHECK(RE2::FullMatch(text, re, &sp1)); | |
1243 } | |
1244 } | |
1245 | |
1246 void SearchParse2CachedPCRE(int iters, const char* regexp, | |
1247 const StringPiece& text) { | |
1248 PCRE re(regexp, PCRE::UTF8); | |
1249 CHECK_EQ(re.error(), ""); | |
1250 for (int i = 0; i < iters; i++) { | |
1251 StringPiece sp1, sp2; | |
1252 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2)); | |
1253 } | |
1254 } | |
1255 | |
1256 void SearchParse2CachedRE2(int iters, const char* regexp, | |
1257 const StringPiece& text) { | |
1258 RE2 re(regexp); | |
1259 CHECK_EQ(re.error(), ""); | |
1260 for (int i = 0; i < iters; i++) { | |
1261 StringPiece sp1, sp2; | |
1262 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2)); | |
1263 } | |
1264 } | |
1265 | |
1266 void SearchParse1CachedPCRE(int iters, const char* regexp, | |
1267 const StringPiece& text) { | |
1268 PCRE re(regexp, PCRE::UTF8); | |
1269 CHECK_EQ(re.error(), ""); | |
1270 for (int i = 0; i < iters; i++) { | |
1271 StringPiece sp1; | |
1272 CHECK(PCRE::PartialMatch(text, re, &sp1)); | |
1273 } | |
1274 } | |
1275 | |
1276 void SearchParse1CachedRE2(int iters, const char* regexp, | |
1277 const StringPiece& text) { | |
1278 RE2 re(regexp); | |
1279 CHECK_EQ(re.error(), ""); | |
1280 for (int i = 0; i < iters; i++) { | |
1281 StringPiece sp1; | |
1282 CHECK(RE2::PartialMatch(text, re, &sp1)); | |
1283 } | |
1284 } | |
1285 | |
1286 void EmptyPartialMatchPCRE(int n) { | |
1287 PCRE re(""); | |
1288 for (int i = 0; i < n; i++) { | |
1289 PCRE::PartialMatch("", re); | |
1290 } | |
1291 } | |
1292 | |
1293 void EmptyPartialMatchRE2(int n) { | |
1294 RE2 re(""); | |
1295 for (int i = 0; i < n; i++) { | |
1296 RE2::PartialMatch("", re); | |
1297 } | |
1298 } | |
1299 #ifdef USEPCRE | |
1300 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1301 #endif | |
1302 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
1303 | |
1304 void SimplePartialMatchPCRE(int n) { | |
1305 PCRE re("abcdefg"); | |
1306 for (int i = 0; i < n; i++) { | |
1307 PCRE::PartialMatch("abcdefg", re); | |
1308 } | |
1309 } | |
1310 | |
1311 void SimplePartialMatchRE2(int n) { | |
1312 RE2 re("abcdefg"); | |
1313 for (int i = 0; i < n; i++) { | |
1314 RE2::PartialMatch("abcdefg", re); | |
1315 } | |
1316 } | |
1317 #ifdef USEPCRE | |
1318 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1319 #endif | |
1320 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
1321 | |
1322 static string http_text = | |
1323 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" | |
1324 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1"; | |
1325 | |
1326 void HTTPPartialMatchPCRE(int n) { | |
1327 StringPiece a; | |
1328 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
1329 for (int i = 0; i < n; i++) { | |
1330 PCRE::PartialMatch(http_text, re, &a); | |
1331 } | |
1332 } | |
1333 | |
1334 void HTTPPartialMatchRE2(int n) { | |
1335 StringPiece a; | |
1336 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
1337 for (int i = 0; i < n; i++) { | |
1338 RE2::PartialMatch(http_text, re, &a); | |
1339 } | |
1340 } | |
1341 | |
1342 #ifdef USEPCRE | |
1343 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1344 #endif | |
1345 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
1346 | |
1347 static string http_smalltext = | |
1348 "GET /abc HTTP/1.1"; | |
1349 | |
1350 void SmallHTTPPartialMatchPCRE(int n) { | |
1351 StringPiece a; | |
1352 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
1353 for (int i = 0; i < n; i++) { | |
1354 PCRE::PartialMatch(http_text, re, &a); | |
1355 } | |
1356 } | |
1357 | |
1358 void SmallHTTPPartialMatchRE2(int n) { | |
1359 StringPiece a; | |
1360 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); | |
1361 for (int i = 0; i < n; i++) { | |
1362 RE2::PartialMatch(http_text, re, &a); | |
1363 } | |
1364 } | |
1365 | |
1366 #ifdef USEPCRE | |
1367 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1368 #endif | |
1369 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); | |
1370 | |
1371 void DotMatchPCRE(int n) { | |
1372 StringPiece a; | |
1373 PCRE re("(?-s)^(.+)"); | |
1374 for (int i = 0; i < n; i++) { | |
1375 PCRE::PartialMatch(http_text, re, &a); | |
1376 } | |
1377 } | |
1378 | |
1379 void DotMatchRE2(int n) { | |
1380 StringPiece a; | |
1381 RE2 re("(?-s)^(.+)"); | |
1382 for (int i = 0; i < n; i++) { | |
1383 RE2::PartialMatch(http_text, re, &a); | |
1384 } | |
1385 } | |
1386 | |
1387 #ifdef USEPCRE | |
1388 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1389 #endif | |
1390 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs()); | |
1391 | |
1392 void ASCIIMatchPCRE(int n) { | |
1393 StringPiece a; | |
1394 PCRE re("(?-s)^([ -~]+)"); | |
1395 for (int i = 0; i < n; i++) { | |
1396 PCRE::PartialMatch(http_text, re, &a); | |
1397 } | |
1398 } | |
1399 | |
1400 void ASCIIMatchRE2(int n) { | |
1401 StringPiece a; | |
1402 RE2 re("(?-s)^([ -~]+)"); | |
1403 for (int i = 0; i < n; i++) { | |
1404 RE2::PartialMatch(http_text, re, &a); | |
1405 } | |
1406 } | |
1407 | |
1408 #ifdef USEPCRE | |
1409 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); | |
1410 #endif | |
1411 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); | |
1412 | |
1413 void FullMatchPCRE(int iter, int n, const char *regexp) { | |
1414 StopBenchmarkTiming(); | |
1415 string s; | |
1416 MakeText(&s, n); | |
1417 s += "ABCDEFGHIJ"; | |
1418 BenchmarkMemoryUsage(); | |
1419 PCRE re(regexp); | |
1420 StartBenchmarkTiming(); | |
1421 for (int i = 0; i < iter; i++) | |
1422 CHECK(PCRE::FullMatch(s, re)); | |
1423 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); | |
1424 } | |
1425 | |
1426 void FullMatchRE2(int iter, int n, const char *regexp) { | |
1427 StopBenchmarkTiming(); | |
1428 string s; | |
1429 MakeText(&s, n); | |
1430 s += "ABCDEFGHIJ"; | |
1431 BenchmarkMemoryUsage(); | |
1432 RE2 re(regexp, RE2::Latin1); | |
1433 StartBenchmarkTiming(); | |
1434 for (int i = 0; i < iter; i++) | |
1435 CHECK(RE2::FullMatch(s, re)); | |
1436 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n); | |
1437 } | |
1438 | |
1439 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*");
} | |
1440 void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*");
} | |
1441 | |
1442 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s
).*$"); } | |
1443 void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)
.*$"); } | |
1444 | |
1445 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?
s)((.*)()()($))"); } | |
1446 void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s
)((.*)()()($))"); } | |
1447 | |
1448 #ifdef USEPCRE | |
1449 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20); | |
1450 #endif | |
1451 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20); | |
1452 | |
1453 #ifdef USEPCRE | |
1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); | |
1455 #endif | |
1456 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); | |
1457 | |
1458 #ifdef USEPCRE | |
1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); | |
1460 #endif | |
1461 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); | |
1462 | |
1463 } // namespace re2 | |
OLD | NEW |