| OLD | NEW |
| (Empty) |
| 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Regular expression engine tester -- test all the implementations against each
other. | |
| 6 | |
| 7 #include "util/util.h" | |
| 8 #include "util/flags.h" | |
| 9 #include "re2/testing/tester.h" | |
| 10 #include "re2/prog.h" | |
| 11 #include "re2/re2.h" | |
| 12 #include "re2/regexp.h" | |
| 13 | |
| 14 DEFINE_bool(dump_prog, false, "dump regexp program"); | |
| 15 DEFINE_bool(log_okay, false, "log successful runs"); | |
| 16 DEFINE_bool(dump_rprog, false, "dump reversed regexp program"); | |
| 17 | |
| 18 DEFINE_int32(max_regexp_failures, 100, | |
| 19 "maximum number of regexp test failures (-1 = unlimited)"); | |
| 20 | |
| 21 DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test"); | |
| 22 | |
| 23 namespace re2 { | |
| 24 | |
| 25 enum { | |
| 26 kMaxSubmatch = 1+16, // $0...$16 | |
| 27 }; | |
| 28 | |
| 29 const char* engine_types[kEngineMax] = { | |
| 30 "Backtrack", | |
| 31 "NFA", | |
| 32 "DFA", | |
| 33 "DFA1", | |
| 34 "OnePass", | |
| 35 "BitState", | |
| 36 "RE2", | |
| 37 "RE2a", | |
| 38 "RE2b", | |
| 39 "PCRE", | |
| 40 }; | |
| 41 | |
| 42 // Returns the name string for the type t. | |
| 43 static string EngineString(Engine t) { | |
| 44 if (t < 0 || t >= arraysize(engine_types) || engine_types[t] == NULL) { | |
| 45 return StringPrintf("type%d", static_cast<int>(t)); | |
| 46 } | |
| 47 return engine_types[t]; | |
| 48 } | |
| 49 | |
| 50 // Returns bit mask of engines to use. | |
| 51 static uint32 Engines() { | |
| 52 static uint32 cached_engines; | |
| 53 static bool did_parse; | |
| 54 | |
| 55 if (did_parse) | |
| 56 return cached_engines; | |
| 57 | |
| 58 if (FLAGS_regexp_engines.empty()) { | |
| 59 cached_engines = ~0; | |
| 60 } else { | |
| 61 for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) | |
| 62 if (strstr(EngineString(i).c_str(), FLAGS_regexp_engines.c_str())) | |
| 63 cached_engines |= 1<<i; | |
| 64 } | |
| 65 | |
| 66 if (cached_engines == 0) | |
| 67 LOG(INFO) << "Warning: no engines enabled."; | |
| 68 if (!UsingPCRE) | |
| 69 cached_engines &= ~(1<<kEnginePCRE); | |
| 70 for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) { | |
| 71 if (cached_engines & (1<<i)) | |
| 72 LOG(INFO) << EngineString(i) << " enabled"; | |
| 73 } | |
| 74 did_parse = true; | |
| 75 return cached_engines; | |
| 76 } | |
| 77 | |
| 78 // The result of running a match. | |
| 79 struct TestInstance::Result { | |
| 80 bool skipped; // test skipped: wasn't applicable | |
| 81 bool matched; // found a match | |
| 82 bool untrusted; // don't really trust the answer | |
| 83 bool have_submatch; // computed all submatch info | |
| 84 bool have_submatch0; // computed just submatch[0] | |
| 85 StringPiece submatch[kMaxSubmatch]; | |
| 86 }; | |
| 87 | |
| 88 typedef TestInstance::Result Result; | |
| 89 | |
| 90 // Formats a single capture range s in text in the form (a,b) | |
| 91 // where a and b are the starting and ending offsets of s in text. | |
| 92 static string FormatCapture(const StringPiece& text, const StringPiece& s) { | |
| 93 if (s.begin() == NULL) | |
| 94 return "(?,?)"; | |
| 95 return StringPrintf("(%d,%d)", | |
| 96 static_cast<int>(s.begin() - text.begin()), | |
| 97 static_cast<int>(s.end() - text.begin())); | |
| 98 } | |
| 99 | |
| 100 // Returns whether text contains non-ASCII (>= 0x80) bytes. | |
| 101 static bool NonASCII(const StringPiece& text) { | |
| 102 for (int i = 0; i < text.size(); i++) | |
| 103 if ((uint8)text[i] >= 0x80) | |
| 104 return true; | |
| 105 return false; | |
| 106 } | |
| 107 | |
| 108 // Returns string representation of match kind. | |
| 109 static string FormatKind(Prog::MatchKind kind) { | |
| 110 switch (kind) { | |
| 111 case Prog::kFullMatch: | |
| 112 return "full match"; | |
| 113 case Prog::kLongestMatch: | |
| 114 return "longest match"; | |
| 115 case Prog::kFirstMatch: | |
| 116 return "first match"; | |
| 117 case Prog::kManyMatch: | |
| 118 return "many match"; | |
| 119 } | |
| 120 return "???"; | |
| 121 } | |
| 122 | |
| 123 // Returns string representation of anchor kind. | |
| 124 static string FormatAnchor(Prog::Anchor anchor) { | |
| 125 switch (anchor) { | |
| 126 case Prog::kAnchored: | |
| 127 return "anchored"; | |
| 128 case Prog::kUnanchored: | |
| 129 return "unanchored"; | |
| 130 } | |
| 131 return "???"; | |
| 132 } | |
| 133 | |
| 134 struct ParseMode { | |
| 135 Regexp::ParseFlags parse_flags; | |
| 136 string desc; | |
| 137 }; | |
| 138 | |
| 139 static const Regexp::ParseFlags single_line = | |
| 140 Regexp::LikePerl; | |
| 141 static const Regexp::ParseFlags multi_line = | |
| 142 static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine); | |
| 143 | |
| 144 static ParseMode parse_modes[] = { | |
| 145 { single_line, "single-line" }, | |
| 146 { single_line|Regexp::Latin1, "single-line, latin1" }, | |
| 147 { multi_line, "multiline" }, | |
| 148 { multi_line|Regexp::NonGreedy, "multiline, nongreedy" }, | |
| 149 { multi_line|Regexp::Latin1, "multiline, latin1" }, | |
| 150 }; | |
| 151 | |
| 152 static string FormatMode(Regexp::ParseFlags flags) { | |
| 153 for (int i = 0; i < arraysize(parse_modes); i++) | |
| 154 if (parse_modes[i].parse_flags == flags) | |
| 155 return parse_modes[i].desc; | |
| 156 return StringPrintf("%#x", static_cast<uint>(flags)); | |
| 157 } | |
| 158 | |
| 159 // Constructs and saves all the matching engines that | |
| 160 // will be required for the given tests. | |
| 161 TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind, | |
| 162 Regexp::ParseFlags flags) | |
| 163 : regexp_str_(regexp_str), | |
| 164 kind_(kind), | |
| 165 flags_(flags), | |
| 166 error_(false), | |
| 167 regexp_(NULL), | |
| 168 num_captures_(0), | |
| 169 prog_(NULL), | |
| 170 rprog_(NULL), | |
| 171 re_(NULL), | |
| 172 re2_(NULL) { | |
| 173 | |
| 174 VLOG(1) << CEscape(regexp_str); | |
| 175 | |
| 176 // Compile regexp to prog. | |
| 177 // Always required - needed for backtracking (reference implementation). | |
| 178 RegexpStatus status; | |
| 179 regexp_ = Regexp::Parse(regexp_str, flags, &status); | |
| 180 if (regexp_ == NULL) { | |
| 181 LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_) | |
| 182 << " mode: " << FormatMode(flags); | |
| 183 error_ = true; | |
| 184 return; | |
| 185 } | |
| 186 num_captures_ = regexp_->NumCaptures(); | |
| 187 prog_ = regexp_->CompileToProg(0); | |
| 188 if (prog_ == NULL) { | |
| 189 LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_); | |
| 190 error_ = true; | |
| 191 return; | |
| 192 } | |
| 193 if (FLAGS_dump_prog) { | |
| 194 LOG(INFO) << "Prog for " | |
| 195 << " regexp " | |
| 196 << CEscape(regexp_str_) | |
| 197 << " (" << FormatKind(kind_) | |
| 198 << ", " << FormatMode(flags_) | |
| 199 << ")\n" | |
| 200 << prog_->Dump(); | |
| 201 } | |
| 202 | |
| 203 // Compile regexp to reversed prog. Only needed for DFA engines. | |
| 204 if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) { | |
| 205 rprog_ = regexp_->CompileToReverseProg(0); | |
| 206 if (rprog_ == NULL) { | |
| 207 LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_); | |
| 208 error_ = true; | |
| 209 return; | |
| 210 } | |
| 211 if (FLAGS_dump_rprog) | |
| 212 LOG(INFO) << rprog_->Dump(); | |
| 213 } | |
| 214 | |
| 215 // Create re string that will be used for RE and RE2. | |
| 216 string re = regexp_str.as_string(); | |
| 217 // Accomodate flags. | |
| 218 // Regexp::Latin1 will be accomodated below. | |
| 219 if (!(flags & Regexp::OneLine)) | |
| 220 re = "(?m)" + re; | |
| 221 if (flags & Regexp::NonGreedy) | |
| 222 re = "(?U)" + re; | |
| 223 if (flags & Regexp::DotNL) | |
| 224 re = "(?s)" + re; | |
| 225 | |
| 226 // Compile regexp to RE2. | |
| 227 if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) { | |
| 228 RE2::Options options; | |
| 229 if (flags & Regexp::Latin1) | |
| 230 options.set_encoding(RE2::Options::EncodingLatin1); | |
| 231 if (kind_ == Prog::kLongestMatch) | |
| 232 options.set_longest_match(true); | |
| 233 re2_ = new RE2(re, options); | |
| 234 if (!re2_->error().empty()) { | |
| 235 LOG(INFO) << "Cannot RE2: " << CEscape(re); | |
| 236 error_ = true; | |
| 237 return; | |
| 238 } | |
| 239 } | |
| 240 | |
| 241 // Compile regexp to RE. | |
| 242 // PCRE as exposed by the RE interface isn't always usable. | |
| 243 // 1. It disagrees about handling of empty-string reptitions | |
| 244 // like matching (a*)* against "b". PCRE treats the (a*) as | |
| 245 // occurring once, while we treat it as occurring not at all. | |
| 246 // 2. It treats $ as this weird thing meaning end of string | |
| 247 // or before the \n at the end of the string. | |
| 248 // 3. It doesn't implement POSIX leftmost-longest matching. | |
| 249 // 4. It lets \s match vertical tab. | |
| 250 // MimicsPCRE() detects 1 and 2. | |
| 251 if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() && | |
| 252 kind_ != Prog::kLongestMatch) { | |
| 253 PCRE_Options o; | |
| 254 o.set_option(PCRE::UTF8); | |
| 255 if (flags & Regexp::Latin1) | |
| 256 o.set_option(PCRE::None); | |
| 257 // PCRE has interface bug keeping us from finding $0, so | |
| 258 // add one more layer of parens. | |
| 259 re_ = new PCRE("("+re+")", o); | |
| 260 if (!re_->error().empty()) { | |
| 261 LOG(INFO) << "Cannot PCRE: " << CEscape(re); | |
| 262 error_ = true; | |
| 263 return; | |
| 264 } | |
| 265 } | |
| 266 } | |
| 267 | |
| 268 TestInstance::~TestInstance() { | |
| 269 if (regexp_) | |
| 270 regexp_->Decref(); | |
| 271 delete prog_; | |
| 272 delete rprog_; | |
| 273 delete re_; | |
| 274 delete re2_; | |
| 275 } | |
| 276 | |
| 277 // Runs a single search using the named engine type. | |
| 278 // This interface hides all the irregularities of the various | |
| 279 // engine interfaces from the rest of this file. | |
| 280 void TestInstance::RunSearch(Engine type, | |
| 281 const StringPiece& orig_text, | |
| 282 const StringPiece& orig_context, | |
| 283 Prog::Anchor anchor, | |
| 284 Result *result) { | |
| 285 memset(result, 0, sizeof *result); | |
| 286 if (regexp_ == NULL) { | |
| 287 result->skipped = true; | |
| 288 return; | |
| 289 } | |
| 290 int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0 | |
| 291 if (nsubmatch > kMaxSubmatch) | |
| 292 nsubmatch = kMaxSubmatch; | |
| 293 | |
| 294 StringPiece text = orig_text; | |
| 295 StringPiece context = orig_context; | |
| 296 | |
| 297 switch (type) { | |
| 298 default: | |
| 299 LOG(FATAL) << "Bad RunSearch type: " << (int)type; | |
| 300 | |
| 301 case kEngineBacktrack: | |
| 302 if (prog_ == NULL) { | |
| 303 result->skipped = true; | |
| 304 break; | |
| 305 } | |
| 306 result->matched = | |
| 307 prog_->UnsafeSearchBacktrack(text, context, anchor, kind_, | |
| 308 result->submatch, nsubmatch); | |
| 309 result->have_submatch = true; | |
| 310 break; | |
| 311 | |
| 312 case kEngineNFA: | |
| 313 if (prog_ == NULL) { | |
| 314 result->skipped = true; | |
| 315 break; | |
| 316 } | |
| 317 result->matched = | |
| 318 prog_->SearchNFA(text, context, anchor, kind_, | |
| 319 result->submatch, nsubmatch); | |
| 320 result->have_submatch = true; | |
| 321 break; | |
| 322 | |
| 323 case kEngineDFA: | |
| 324 if (prog_ == NULL) { | |
| 325 result->skipped = true; | |
| 326 break; | |
| 327 } | |
| 328 result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL, | |
| 329 &result->skipped, NULL); | |
| 330 break; | |
| 331 | |
| 332 case kEngineDFA1: | |
| 333 if (prog_ == NULL || rprog_ == NULL) { | |
| 334 result->skipped = true; | |
| 335 break; | |
| 336 } | |
| 337 result->matched = | |
| 338 prog_->SearchDFA(text, context, anchor, kind_, result->submatch, | |
| 339 &result->skipped, NULL); | |
| 340 // If anchored, no need for second run, | |
| 341 // but do it anyway to find more bugs. | |
| 342 if (result->matched) { | |
| 343 if (!rprog_->SearchDFA(result->submatch[0], context, | |
| 344 Prog::kAnchored, Prog::kLongestMatch, | |
| 345 result->submatch, | |
| 346 &result->skipped, NULL)) { | |
| 347 LOG(ERROR) << "Reverse DFA inconsistency: " | |
| 348 << CEscape(regexp_str_) | |
| 349 << " on " << CEscape(text); | |
| 350 result->matched = false; | |
| 351 } | |
| 352 } | |
| 353 result->have_submatch0 = true; | |
| 354 break; | |
| 355 | |
| 356 case kEngineOnePass: | |
| 357 if (prog_ == NULL || | |
| 358 anchor == Prog::kUnanchored || | |
| 359 !prog_->IsOnePass() || | |
| 360 nsubmatch > Prog::kMaxOnePassCapture) { | |
| 361 result->skipped = true; | |
| 362 break; | |
| 363 } | |
| 364 result->matched = prog_->SearchOnePass(text, context, anchor, kind_, | |
| 365 result->submatch, nsubmatch); | |
| 366 result->have_submatch = true; | |
| 367 break; | |
| 368 | |
| 369 case kEngineBitState: | |
| 370 if (prog_ == NULL) { | |
| 371 result->skipped = true; | |
| 372 break; | |
| 373 } | |
| 374 result->matched = prog_->SearchBitState(text, context, anchor, kind_, | |
| 375 result->submatch, nsubmatch); | |
| 376 result->have_submatch = true; | |
| 377 break; | |
| 378 | |
| 379 case kEngineRE2: | |
| 380 case kEngineRE2a: | |
| 381 case kEngineRE2b: { | |
| 382 if (!re2_ || text.end() != context.end()) { | |
| 383 result->skipped = true; | |
| 384 break; | |
| 385 } | |
| 386 | |
| 387 RE2::Anchor re_anchor; | |
| 388 if (anchor == Prog::kAnchored) | |
| 389 re_anchor = RE2::ANCHOR_START; | |
| 390 else | |
| 391 re_anchor = RE2::UNANCHORED; | |
| 392 if (kind_ == Prog::kFullMatch) | |
| 393 re_anchor = RE2::ANCHOR_BOTH; | |
| 394 | |
| 395 result->matched = re2_->Match( | |
| 396 context, | |
| 397 static_cast<int>(text.begin() - context.begin()), | |
| 398 static_cast<int>(text.end() - context.begin()), | |
| 399 re_anchor, | |
| 400 result->submatch, | |
| 401 nsubmatch); | |
| 402 result->have_submatch = nsubmatch > 0; | |
| 403 break; | |
| 404 } | |
| 405 | |
| 406 case kEnginePCRE: { | |
| 407 if (!re_ || text.begin() != context.begin() || | |
| 408 text.end() != context.end()) { | |
| 409 result->skipped = true; | |
| 410 break; | |
| 411 } | |
| 412 | |
| 413 // PCRE 8.34 or so started allowing vertical tab to match \s, | |
| 414 // following a change made in Perl 5.18. RE2 does not. | |
| 415 if ((regexp_str_.contains("\\s") || regexp_str_.contains("\\S")) && | |
| 416 text.contains("\v")) { | |
| 417 result->skipped = true; | |
| 418 break; | |
| 419 } | |
| 420 | |
| 421 const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch]; | |
| 422 PCRE::Arg *a = new PCRE::Arg[nsubmatch]; | |
| 423 for (int i = 0; i < nsubmatch; i++) { | |
| 424 a[i] = PCRE::Arg(&result->submatch[i]); | |
| 425 argptr[i] = &a[i]; | |
| 426 } | |
| 427 int consumed; | |
| 428 PCRE::Anchor pcre_anchor; | |
| 429 if (anchor == Prog::kAnchored) | |
| 430 pcre_anchor = PCRE::ANCHOR_START; | |
| 431 else | |
| 432 pcre_anchor = PCRE::UNANCHORED; | |
| 433 if (kind_ == Prog::kFullMatch) | |
| 434 pcre_anchor = PCRE::ANCHOR_BOTH; | |
| 435 re_->ClearHitLimit(); | |
| 436 result->matched = | |
| 437 re_->DoMatch(text, | |
| 438 pcre_anchor, | |
| 439 &consumed, | |
| 440 argptr, nsubmatch); | |
| 441 if (re_->HitLimit()) { | |
| 442 result->untrusted = true; | |
| 443 delete[] argptr; | |
| 444 delete[] a; | |
| 445 break; | |
| 446 } | |
| 447 result->have_submatch = true; | |
| 448 | |
| 449 // Work around RE interface bug: PCRE returns -1 as the | |
| 450 // offsets for an unmatched subexpression, and RE should | |
| 451 // turn that into StringPiece(NULL) but in fact it uses | |
| 452 // StringPiece(text.begin() - 1, 0). Oops. | |
| 453 for (int i = 0; i < nsubmatch; i++) | |
| 454 if (result->submatch[i].begin() == text.begin() - 1) | |
| 455 result->submatch[i] = NULL; | |
| 456 delete[] argptr; | |
| 457 delete[] a; | |
| 458 break; | |
| 459 } | |
| 460 } | |
| 461 | |
| 462 if (!result->matched) | |
| 463 memset(result->submatch, 0, sizeof result->submatch); | |
| 464 } | |
| 465 | |
| 466 // Checks whether r is okay given that correct is the right answer. | |
| 467 // Specifically, r's answers have to match (but it doesn't have to | |
| 468 // claim to have all the answers). | |
| 469 static bool ResultOkay(const Result& r, const Result& correct) { | |
| 470 if (r.skipped) | |
| 471 return true; | |
| 472 if (r.matched != correct.matched) | |
| 473 return false; | |
| 474 if (r.have_submatch || r.have_submatch0) { | |
| 475 for (int i = 0; i < kMaxSubmatch; i++) { | |
| 476 if (correct.submatch[i].begin() != r.submatch[i].begin() || | |
| 477 correct.submatch[i].size() != r.submatch[i].size()) | |
| 478 return false; | |
| 479 if (!r.have_submatch) | |
| 480 break; | |
| 481 } | |
| 482 } | |
| 483 return true; | |
| 484 } | |
| 485 | |
| 486 // Runs a single test. | |
| 487 bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context, | |
| 488 Prog::Anchor anchor) { | |
| 489 // Backtracking is the gold standard. | |
| 490 Result correct; | |
| 491 RunSearch(kEngineBacktrack, text, context, anchor, &correct); | |
| 492 if (correct.skipped) { | |
| 493 if (regexp_ == NULL) | |
| 494 return true; | |
| 495 LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_) | |
| 496 << " " << FormatMode(flags_); | |
| 497 return false; | |
| 498 } | |
| 499 VLOG(1) << "Try: regexp " << CEscape(regexp_str_) | |
| 500 << " text " << CEscape(text) | |
| 501 << " (" << FormatKind(kind_) | |
| 502 << ", " << FormatAnchor(anchor) | |
| 503 << ", " << FormatMode(flags_) | |
| 504 << ")"; | |
| 505 | |
| 506 // Compare the others. | |
| 507 bool all_okay = true; | |
| 508 for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) { | |
| 509 if (!(Engines() & (1<<i))) | |
| 510 continue; | |
| 511 | |
| 512 Result r; | |
| 513 RunSearch(i, text, context, anchor, &r); | |
| 514 if (ResultOkay(r, correct)) { | |
| 515 if (FLAGS_log_okay) | |
| 516 LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor); | |
| 517 continue; | |
| 518 } | |
| 519 | |
| 520 // We disagree with PCRE on the meaning of some Unicode matches. | |
| 521 // In particular, we treat non-ASCII UTF-8 as non-word characters. | |
| 522 // We also treat "empty" character sets like [^\w\W] as being | |
| 523 // impossible to match, while PCRE apparently excludes some code | |
| 524 // points (e.g., 0x0080) from both \w and \W. | |
| 525 if (i == kEnginePCRE && NonASCII(text)) | |
| 526 continue; | |
| 527 | |
| 528 if (!r.untrusted) | |
| 529 all_okay = false; | |
| 530 | |
| 531 LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text, | |
| 532 context, anchor); | |
| 533 if (r.matched != correct.matched) { | |
| 534 if (r.matched) { | |
| 535 LOG(INFO) << " Should not match (but does)."; | |
| 536 } else { | |
| 537 LOG(INFO) << " Should match (but does not)."; | |
| 538 continue; | |
| 539 } | |
| 540 } | |
| 541 for (int i = 0; i < 1+num_captures_; i++) { | |
| 542 if (r.submatch[i].begin() != correct.submatch[i].begin() || | |
| 543 r.submatch[i].end() != correct.submatch[i].end()) { | |
| 544 LOG(INFO) << | |
| 545 StringPrintf(" $%d: should be %s is %s", | |
| 546 i, | |
| 547 FormatCapture(text, correct.submatch[i]).c_str(), | |
| 548 FormatCapture(text, r.submatch[i]).c_str()); | |
| 549 } else { | |
| 550 LOG(INFO) << | |
| 551 StringPrintf(" $%d: %s ok", i, | |
| 552 FormatCapture(text, r.submatch[i]).c_str()); | |
| 553 } | |
| 554 } | |
| 555 } | |
| 556 | |
| 557 if (!all_okay) { | |
| 558 if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0) | |
| 559 LOG(QFATAL) << "Too many regexp failures."; | |
| 560 } | |
| 561 | |
| 562 return all_okay; | |
| 563 } | |
| 564 | |
| 565 void TestInstance::LogMatch(const char* prefix, Engine e, | |
| 566 const StringPiece& text, const StringPiece& context, | |
| 567 Prog::Anchor anchor) { | |
| 568 LOG(INFO) << prefix | |
| 569 << EngineString(e) | |
| 570 << " regexp " | |
| 571 << CEscape(regexp_str_) | |
| 572 << " " | |
| 573 << CEscape(regexp_->ToString()) | |
| 574 << " text " | |
| 575 << CEscape(text) | |
| 576 << " (" | |
| 577 << text.begin() - context.begin() | |
| 578 << "," | |
| 579 << text.end() - context.begin() | |
| 580 << ") of context " | |
| 581 << CEscape(context) | |
| 582 << " (" << FormatKind(kind_) | |
| 583 << ", " << FormatAnchor(anchor) | |
| 584 << ", " << FormatMode(flags_) | |
| 585 << ")"; | |
| 586 } | |
| 587 | |
| 588 static Prog::MatchKind kinds[] = { | |
| 589 Prog::kFirstMatch, | |
| 590 Prog::kLongestMatch, | |
| 591 Prog::kFullMatch, | |
| 592 }; | |
| 593 | |
| 594 // Test all possible match kinds and parse modes. | |
| 595 Tester::Tester(const StringPiece& regexp) { | |
| 596 error_ = false; | |
| 597 for (int i = 0; i < arraysize(kinds); i++) { | |
| 598 for (int j = 0; j < arraysize(parse_modes); j++) { | |
| 599 TestInstance* t = new TestInstance(regexp, kinds[i], | |
| 600 parse_modes[j].parse_flags); | |
| 601 error_ |= t->error(); | |
| 602 v_.push_back(t); | |
| 603 } | |
| 604 } | |
| 605 } | |
| 606 | |
| 607 Tester::~Tester() { | |
| 608 for (size_t i = 0; i < v_.size(); i++) | |
| 609 delete v_[i]; | |
| 610 } | |
| 611 | |
| 612 bool Tester::TestCase(const StringPiece& text, const StringPiece& context, | |
| 613 Prog::Anchor anchor) { | |
| 614 bool okay = true; | |
| 615 for (size_t i = 0; i < v_.size(); i++) | |
| 616 okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor)); | |
| 617 return okay; | |
| 618 } | |
| 619 | |
| 620 static Prog::Anchor anchors[] = { | |
| 621 Prog::kAnchored, | |
| 622 Prog::kUnanchored | |
| 623 }; | |
| 624 | |
| 625 bool Tester::TestInput(const StringPiece& text) { | |
| 626 bool okay = TestInputInContext(text, text); | |
| 627 if (text.size() > 0) { | |
| 628 StringPiece sp; | |
| 629 sp = text; | |
| 630 sp.remove_prefix(1); | |
| 631 okay &= TestInputInContext(sp, text); | |
| 632 sp = text; | |
| 633 sp.remove_suffix(1); | |
| 634 okay &= TestInputInContext(sp, text); | |
| 635 } | |
| 636 return okay; | |
| 637 } | |
| 638 | |
| 639 bool Tester::TestInputInContext(const StringPiece& text, | |
| 640 const StringPiece& context) { | |
| 641 bool okay = true; | |
| 642 for (int i = 0; i < arraysize(anchors); i++) | |
| 643 okay &= TestCase(text, context, anchors[i]); | |
| 644 return okay; | |
| 645 } | |
| 646 | |
| 647 bool TestRegexpOnText(const StringPiece& regexp, | |
| 648 const StringPiece& text) { | |
| 649 Tester t(regexp); | |
| 650 return t.TestInput(text); | |
| 651 } | |
| 652 | |
| 653 } // namespace re2 | |
| OLD | NEW |