| OLD | NEW |
| (Empty) |
| 1 // Copyright 2009 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 #include "util/util.h" | |
| 6 #include "re2/prefilter.h" | |
| 7 #include "re2/re2.h" | |
| 8 #include "re2/unicode_casefold.h" | |
| 9 #include "re2/walker-inl.h" | |
| 10 | |
| 11 namespace re2 { | |
| 12 | |
| 13 static const int Trace = false; | |
| 14 | |
| 15 typedef set<string>::iterator SSIter; | |
| 16 typedef set<string>::const_iterator ConstSSIter; | |
| 17 | |
| 18 GLOBAL_MUTEX(alloc_id_mutex); | |
| 19 static int alloc_id = 100000; // Used for debugging. | |
| 20 // Initializes a Prefilter, allocating subs_ as necessary. | |
| 21 Prefilter::Prefilter(Op op) { | |
| 22 op_ = op; | |
| 23 subs_ = NULL; | |
| 24 if (op_ == AND || op_ == OR) | |
| 25 subs_ = new vector<Prefilter*>; | |
| 26 | |
| 27 GLOBAL_MUTEX_LOCK(alloc_id_mutex); | |
| 28 alloc_id_ = alloc_id++; | |
| 29 GLOBAL_MUTEX_UNLOCK(alloc_id_mutex); | |
| 30 VLOG(10) << "alloc_id: " << alloc_id_; | |
| 31 } | |
| 32 | |
| 33 // Destroys a Prefilter. | |
| 34 Prefilter::~Prefilter() { | |
| 35 VLOG(10) << "Deleted: " << alloc_id_; | |
| 36 if (subs_) { | |
| 37 for (size_t i = 0; i < subs_->size(); i++) | |
| 38 delete (*subs_)[i]; | |
| 39 delete subs_; | |
| 40 subs_ = NULL; | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 // Simplify if the node is an empty Or or And. | |
| 45 Prefilter* Prefilter::Simplify() { | |
| 46 if (op_ != AND && op_ != OR) { | |
| 47 return this; | |
| 48 } | |
| 49 | |
| 50 // Nothing left in the AND/OR. | |
| 51 if (subs_->size() == 0) { | |
| 52 if (op_ == AND) | |
| 53 op_ = ALL; // AND of nothing is true | |
| 54 else | |
| 55 op_ = NONE; // OR of nothing is false | |
| 56 | |
| 57 return this; | |
| 58 } | |
| 59 | |
| 60 // Just one subnode: throw away wrapper. | |
| 61 if (subs_->size() == 1) { | |
| 62 Prefilter* a = (*subs_)[0]; | |
| 63 subs_->clear(); | |
| 64 delete this; | |
| 65 return a->Simplify(); | |
| 66 } | |
| 67 | |
| 68 return this; | |
| 69 } | |
| 70 | |
| 71 // Combines two Prefilters together to create an "op" (AND or OR). | |
| 72 // The passed Prefilters will be part of the returned Prefilter or deleted. | |
| 73 // Does lots of work to avoid creating unnecessarily complicated structures. | |
| 74 Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { | |
| 75 // If a, b can be rewritten as op, do so. | |
| 76 a = a->Simplify(); | |
| 77 b = b->Simplify(); | |
| 78 | |
| 79 // Canonicalize: a->op <= b->op. | |
| 80 if (a->op() > b->op()) { | |
| 81 Prefilter* t = a; | |
| 82 a = b; | |
| 83 b = t; | |
| 84 } | |
| 85 | |
| 86 // Trivial cases. | |
| 87 // ALL AND b = b | |
| 88 // NONE OR b = b | |
| 89 // ALL OR b = ALL | |
| 90 // NONE AND b = NONE | |
| 91 // Don't need to look at b, because of canonicalization above. | |
| 92 // ALL and NONE are smallest opcodes. | |
| 93 if (a->op() == ALL || a->op() == NONE) { | |
| 94 if ((a->op() == ALL && op == AND) || | |
| 95 (a->op() == NONE && op == OR)) { | |
| 96 delete a; | |
| 97 return b; | |
| 98 } else { | |
| 99 delete b; | |
| 100 return a; | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 // If a and b match op, merge their contents. | |
| 105 if (a->op() == op && b->op() == op) { | |
| 106 for (size_t i = 0; i < b->subs()->size(); i++) { | |
| 107 Prefilter* bb = (*b->subs())[i]; | |
| 108 a->subs()->push_back(bb); | |
| 109 } | |
| 110 b->subs()->clear(); | |
| 111 delete b; | |
| 112 return a; | |
| 113 } | |
| 114 | |
| 115 // If a already has the same op as the op that is under construction | |
| 116 // add in b (similarly if b already has the same op, add in a). | |
| 117 if (b->op() == op) { | |
| 118 Prefilter* t = a; | |
| 119 a = b; | |
| 120 b = t; | |
| 121 } | |
| 122 if (a->op() == op) { | |
| 123 a->subs()->push_back(b); | |
| 124 return a; | |
| 125 } | |
| 126 | |
| 127 // Otherwise just return the op. | |
| 128 Prefilter* c = new Prefilter(op); | |
| 129 c->subs()->push_back(a); | |
| 130 c->subs()->push_back(b); | |
| 131 return c; | |
| 132 } | |
| 133 | |
| 134 Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) { | |
| 135 return AndOr(AND, a, b); | |
| 136 } | |
| 137 | |
| 138 Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { | |
| 139 return AndOr(OR, a, b); | |
| 140 } | |
| 141 | |
| 142 static void SimplifyStringSet(set<string> *ss) { | |
| 143 // Now make sure that the strings aren't redundant. For example, if | |
| 144 // we know "ab" is a required string, then it doesn't help at all to | |
| 145 // know that "abc" is also a required string, so delete "abc". This | |
| 146 // is because, when we are performing a string search to filter | |
| 147 // regexps, matching ab will already allow this regexp to be a | |
| 148 // candidate for match, so further matching abc is redundant. | |
| 149 | |
| 150 for (SSIter i = ss->begin(); i != ss->end(); ++i) { | |
| 151 SSIter j = i; | |
| 152 ++j; | |
| 153 while (j != ss->end()) { | |
| 154 // Increment j early so that we can erase the element it points to. | |
| 155 SSIter old_j = j; | |
| 156 ++j; | |
| 157 if (old_j->find(*i) != string::npos) | |
| 158 ss->erase(old_j); | |
| 159 } | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 Prefilter* Prefilter::OrStrings(set<string>* ss) { | |
| 164 SimplifyStringSet(ss); | |
| 165 Prefilter* or_prefilter = NULL; | |
| 166 if (!ss->empty()) { | |
| 167 or_prefilter = new Prefilter(NONE); | |
| 168 for (SSIter i = ss->begin(); i != ss->end(); ++i) | |
| 169 or_prefilter = Or(or_prefilter, FromString(*i)); | |
| 170 } | |
| 171 return or_prefilter; | |
| 172 } | |
| 173 | |
| 174 static Rune ToLowerRune(Rune r) { | |
| 175 if (r < Runeself) { | |
| 176 if ('A' <= r && r <= 'Z') | |
| 177 r += 'a' - 'A'; | |
| 178 return r; | |
| 179 } | |
| 180 | |
| 181 const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); | |
| 182 if (f == NULL || r < f->lo) | |
| 183 return r; | |
| 184 return ApplyFold(f, r); | |
| 185 } | |
| 186 | |
| 187 static Rune ToLowerRuneLatin1(Rune r) { | |
| 188 if ('A' <= r && r <= 'Z') | |
| 189 r += 'a' - 'A'; | |
| 190 return r; | |
| 191 } | |
| 192 | |
| 193 Prefilter* Prefilter::FromString(const string& str) { | |
| 194 Prefilter* m = new Prefilter(Prefilter::ATOM); | |
| 195 m->atom_ = str; | |
| 196 return m; | |
| 197 } | |
| 198 | |
| 199 // Information about a regexp used during computation of Prefilter. | |
| 200 // Can be thought of as information about the set of strings matching | |
| 201 // the given regular expression. | |
| 202 class Prefilter::Info { | |
| 203 public: | |
| 204 Info(); | |
| 205 ~Info(); | |
| 206 | |
| 207 // More constructors. They delete their Info* arguments. | |
| 208 static Info* Alt(Info* a, Info* b); | |
| 209 static Info* Concat(Info* a, Info* b); | |
| 210 static Info* And(Info* a, Info* b); | |
| 211 static Info* Star(Info* a); | |
| 212 static Info* Plus(Info* a); | |
| 213 static Info* Quest(Info* a); | |
| 214 static Info* EmptyString(); | |
| 215 static Info* NoMatch(); | |
| 216 static Info* AnyChar(); | |
| 217 static Info* CClass(CharClass* cc, bool latin1); | |
| 218 static Info* Literal(Rune r); | |
| 219 static Info* LiteralLatin1(Rune r); | |
| 220 static Info* AnyMatch(); | |
| 221 | |
| 222 // Format Info as a string. | |
| 223 string ToString(); | |
| 224 | |
| 225 // Caller takes ownership of the Prefilter. | |
| 226 Prefilter* TakeMatch(); | |
| 227 | |
| 228 set<string>& exact() { return exact_; } | |
| 229 | |
| 230 bool is_exact() const { return is_exact_; } | |
| 231 | |
| 232 class Walker; | |
| 233 | |
| 234 private: | |
| 235 set<string> exact_; | |
| 236 | |
| 237 // When is_exact_ is true, the strings that match | |
| 238 // are placed in exact_. When it is no longer an exact | |
| 239 // set of strings that match this RE, then is_exact_ | |
| 240 // is false and the match_ contains the required match | |
| 241 // criteria. | |
| 242 bool is_exact_; | |
| 243 | |
| 244 // Accumulated Prefilter query that any | |
| 245 // match for this regexp is guaranteed to match. | |
| 246 Prefilter* match_; | |
| 247 }; | |
| 248 | |
| 249 | |
| 250 Prefilter::Info::Info() | |
| 251 : is_exact_(false), | |
| 252 match_(NULL) { | |
| 253 } | |
| 254 | |
| 255 Prefilter::Info::~Info() { | |
| 256 delete match_; | |
| 257 } | |
| 258 | |
| 259 Prefilter* Prefilter::Info::TakeMatch() { | |
| 260 if (is_exact_) { | |
| 261 match_ = Prefilter::OrStrings(&exact_); | |
| 262 is_exact_ = false; | |
| 263 } | |
| 264 Prefilter* m = match_; | |
| 265 match_ = NULL; | |
| 266 return m; | |
| 267 } | |
| 268 | |
| 269 // Format a Info in string form. | |
| 270 string Prefilter::Info::ToString() { | |
| 271 if (is_exact_) { | |
| 272 int n = 0; | |
| 273 string s; | |
| 274 for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) { | |
| 275 if (n++ > 0) | |
| 276 s += ","; | |
| 277 s += *i; | |
| 278 } | |
| 279 return s; | |
| 280 } | |
| 281 | |
| 282 if (match_) | |
| 283 return match_->DebugString(); | |
| 284 | |
| 285 return ""; | |
| 286 } | |
| 287 | |
| 288 // Add the strings from src to dst. | |
| 289 static void CopyIn(const set<string>& src, set<string>* dst) { | |
| 290 for (ConstSSIter i = src.begin(); i != src.end(); ++i) | |
| 291 dst->insert(*i); | |
| 292 } | |
| 293 | |
| 294 // Add the cross-product of a and b to dst. | |
| 295 // (For each string i in a and j in b, add i+j.) | |
| 296 static void CrossProduct(const set<string>& a, | |
| 297 const set<string>& b, | |
| 298 set<string>* dst) { | |
| 299 for (ConstSSIter i = a.begin(); i != a.end(); ++i) | |
| 300 for (ConstSSIter j = b.begin(); j != b.end(); ++j) | |
| 301 dst->insert(*i + *j); | |
| 302 } | |
| 303 | |
| 304 // Concats a and b. Requires that both are exact sets. | |
| 305 // Forms an exact set that is a crossproduct of a and b. | |
| 306 Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) { | |
| 307 if (a == NULL) | |
| 308 return b; | |
| 309 DCHECK(a->is_exact_); | |
| 310 DCHECK(b && b->is_exact_); | |
| 311 Info *ab = new Info(); | |
| 312 | |
| 313 CrossProduct(a->exact_, b->exact_, &ab->exact_); | |
| 314 ab->is_exact_ = true; | |
| 315 | |
| 316 delete a; | |
| 317 delete b; | |
| 318 return ab; | |
| 319 } | |
| 320 | |
| 321 // Constructs an inexact Info for ab given a and b. | |
| 322 // Used only when a or b is not exact or when the | |
| 323 // exact cross product is likely to be too big. | |
| 324 Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) { | |
| 325 if (a == NULL) | |
| 326 return b; | |
| 327 if (b == NULL) | |
| 328 return a; | |
| 329 | |
| 330 Info *ab = new Info(); | |
| 331 | |
| 332 ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch()); | |
| 333 ab->is_exact_ = false; | |
| 334 delete a; | |
| 335 delete b; | |
| 336 return ab; | |
| 337 } | |
| 338 | |
| 339 // Constructs Info for a|b given a and b. | |
| 340 Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) { | |
| 341 Info *ab = new Info(); | |
| 342 | |
| 343 if (a->is_exact_ && b->is_exact_) { | |
| 344 CopyIn(a->exact_, &ab->exact_); | |
| 345 CopyIn(b->exact_, &ab->exact_); | |
| 346 ab->is_exact_ = true; | |
| 347 } else { | |
| 348 // Either a or b has is_exact_ = false. If the other | |
| 349 // one has is_exact_ = true, we move it to match_ and | |
| 350 // then create a OR of a,b. The resulting Info has | |
| 351 // is_exact_ = false. | |
| 352 ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch()); | |
| 353 ab->is_exact_ = false; | |
| 354 } | |
| 355 | |
| 356 delete a; | |
| 357 delete b; | |
| 358 return ab; | |
| 359 } | |
| 360 | |
| 361 // Constructs Info for a? given a. | |
| 362 Prefilter::Info* Prefilter::Info::Quest(Info *a) { | |
| 363 Info *ab = new Info(); | |
| 364 | |
| 365 ab->is_exact_ = false; | |
| 366 ab->match_ = new Prefilter(ALL); | |
| 367 delete a; | |
| 368 return ab; | |
| 369 } | |
| 370 | |
| 371 // Constructs Info for a* given a. | |
| 372 // Same as a? -- not much to do. | |
| 373 Prefilter::Info* Prefilter::Info::Star(Info *a) { | |
| 374 return Quest(a); | |
| 375 } | |
| 376 | |
| 377 // Constructs Info for a+ given a. If a was exact set, it isn't | |
| 378 // anymore. | |
| 379 Prefilter::Info* Prefilter::Info::Plus(Info *a) { | |
| 380 Info *ab = new Info(); | |
| 381 | |
| 382 ab->match_ = a->TakeMatch(); | |
| 383 ab->is_exact_ = false; | |
| 384 | |
| 385 delete a; | |
| 386 return ab; | |
| 387 } | |
| 388 | |
| 389 static string RuneToString(Rune r) { | |
| 390 char buf[UTFmax]; | |
| 391 int n = runetochar(buf, &r); | |
| 392 return string(buf, n); | |
| 393 } | |
| 394 | |
| 395 static string RuneToStringLatin1(Rune r) { | |
| 396 char c = r & 0xff; | |
| 397 return string(&c, 1); | |
| 398 } | |
| 399 | |
| 400 // Constructs Info for literal rune. | |
| 401 Prefilter::Info* Prefilter::Info::Literal(Rune r) { | |
| 402 Info* info = new Info(); | |
| 403 info->exact_.insert(RuneToString(ToLowerRune(r))); | |
| 404 info->is_exact_ = true; | |
| 405 return info; | |
| 406 } | |
| 407 | |
| 408 // Constructs Info for literal rune for Latin1 encoded string. | |
| 409 Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { | |
| 410 Info* info = new Info(); | |
| 411 info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); | |
| 412 info->is_exact_ = true; | |
| 413 return info; | |
| 414 } | |
| 415 | |
| 416 // Constructs Info for dot (any character). | |
| 417 Prefilter::Info* Prefilter::Info::AnyChar() { | |
| 418 Prefilter::Info* info = new Prefilter::Info(); | |
| 419 info->match_ = new Prefilter(ALL); | |
| 420 return info; | |
| 421 } | |
| 422 | |
| 423 // Constructs Prefilter::Info for no possible match. | |
| 424 Prefilter::Info* Prefilter::Info::NoMatch() { | |
| 425 Prefilter::Info* info = new Prefilter::Info(); | |
| 426 info->match_ = new Prefilter(NONE); | |
| 427 return info; | |
| 428 } | |
| 429 | |
| 430 // Constructs Prefilter::Info for any possible match. | |
| 431 // This Prefilter::Info is valid for any regular expression, | |
| 432 // since it makes no assertions whatsoever about the | |
| 433 // strings being matched. | |
| 434 Prefilter::Info* Prefilter::Info::AnyMatch() { | |
| 435 Prefilter::Info *info = new Prefilter::Info(); | |
| 436 info->match_ = new Prefilter(ALL); | |
| 437 return info; | |
| 438 } | |
| 439 | |
| 440 // Constructs Prefilter::Info for just the empty string. | |
| 441 Prefilter::Info* Prefilter::Info::EmptyString() { | |
| 442 Prefilter::Info* info = new Prefilter::Info(); | |
| 443 info->is_exact_ = true; | |
| 444 info->exact_.insert(""); | |
| 445 return info; | |
| 446 } | |
| 447 | |
| 448 // Constructs Prefilter::Info for a character class. | |
| 449 typedef CharClass::iterator CCIter; | |
| 450 Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, | |
| 451 bool latin1) { | |
| 452 if (Trace) { | |
| 453 VLOG(0) << "CharClassInfo:"; | |
| 454 for (CCIter i = cc->begin(); i != cc->end(); ++i) | |
| 455 VLOG(0) << " " << i->lo << "-" << i->hi; | |
| 456 } | |
| 457 | |
| 458 // If the class is too large, it's okay to overestimate. | |
| 459 if (cc->size() > 10) | |
| 460 return AnyChar(); | |
| 461 | |
| 462 Prefilter::Info *a = new Prefilter::Info(); | |
| 463 for (CCIter i = cc->begin(); i != cc->end(); ++i) | |
| 464 for (Rune r = i->lo; r <= i->hi; r++) { | |
| 465 if (latin1) { | |
| 466 a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); | |
| 467 } else { | |
| 468 a->exact_.insert(RuneToString(ToLowerRune(r))); | |
| 469 } | |
| 470 } | |
| 471 | |
| 472 | |
| 473 a->is_exact_ = true; | |
| 474 | |
| 475 if (Trace) { | |
| 476 VLOG(0) << " = " << a->ToString(); | |
| 477 } | |
| 478 | |
| 479 return a; | |
| 480 } | |
| 481 | |
| 482 class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { | |
| 483 public: | |
| 484 Walker(bool latin1) : latin1_(latin1) {} | |
| 485 | |
| 486 virtual Info* PostVisit( | |
| 487 Regexp* re, Info* parent_arg, | |
| 488 Info* pre_arg, | |
| 489 Info** child_args, int nchild_args); | |
| 490 | |
| 491 virtual Info* ShortVisit( | |
| 492 Regexp* re, | |
| 493 Info* parent_arg); | |
| 494 | |
| 495 bool latin1() { return latin1_; } | |
| 496 private: | |
| 497 bool latin1_; | |
| 498 DISALLOW_COPY_AND_ASSIGN(Walker); | |
| 499 }; | |
| 500 | |
| 501 Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { | |
| 502 if (Trace) { | |
| 503 LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); | |
| 504 } | |
| 505 | |
| 506 bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; | |
| 507 Prefilter::Info::Walker w(latin1); | |
| 508 Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); | |
| 509 | |
| 510 if (w.stopped_early()) { | |
| 511 delete info; | |
| 512 return NULL; | |
| 513 } | |
| 514 | |
| 515 return info; | |
| 516 } | |
| 517 | |
| 518 Prefilter::Info* Prefilter::Info::Walker::ShortVisit( | |
| 519 Regexp* re, Prefilter::Info* parent_arg) { | |
| 520 return AnyMatch(); | |
| 521 } | |
| 522 | |
| 523 // Constructs the Prefilter::Info for the given regular expression. | |
| 524 // Assumes re is simplified. | |
| 525 Prefilter::Info* Prefilter::Info::Walker::PostVisit( | |
| 526 Regexp* re, Prefilter::Info* parent_arg, | |
| 527 Prefilter::Info* pre_arg, Prefilter::Info** child_args, | |
| 528 int nchild_args) { | |
| 529 Prefilter::Info *info; | |
| 530 switch (re->op()) { | |
| 531 default: | |
| 532 case kRegexpRepeat: | |
| 533 LOG(DFATAL) << "Bad regexp op " << re->op(); | |
| 534 info = EmptyString(); | |
| 535 break; | |
| 536 | |
| 537 case kRegexpNoMatch: | |
| 538 info = NoMatch(); | |
| 539 break; | |
| 540 | |
| 541 // These ops match the empty string: | |
| 542 case kRegexpEmptyMatch: // anywhere | |
| 543 case kRegexpBeginLine: // at beginning of line | |
| 544 case kRegexpEndLine: // at end of line | |
| 545 case kRegexpBeginText: // at beginning of text | |
| 546 case kRegexpEndText: // at end of text | |
| 547 case kRegexpWordBoundary: // at word boundary | |
| 548 case kRegexpNoWordBoundary: // not at word boundary | |
| 549 info = EmptyString(); | |
| 550 break; | |
| 551 | |
| 552 case kRegexpLiteral: | |
| 553 if (latin1()) { | |
| 554 info = LiteralLatin1(re->rune()); | |
| 555 } | |
| 556 else { | |
| 557 info = Literal(re->rune()); | |
| 558 } | |
| 559 break; | |
| 560 | |
| 561 case kRegexpLiteralString: | |
| 562 if (re->nrunes() == 0) { | |
| 563 info = NoMatch(); | |
| 564 break; | |
| 565 } | |
| 566 if (latin1()) { | |
| 567 info = LiteralLatin1(re->runes()[0]); | |
| 568 for (int i = 1; i < re->nrunes(); i++) { | |
| 569 info = Concat(info, LiteralLatin1(re->runes()[i])); | |
| 570 } | |
| 571 } else { | |
| 572 info = Literal(re->runes()[0]); | |
| 573 for (int i = 1; i < re->nrunes(); i++) { | |
| 574 info = Concat(info, Literal(re->runes()[i])); | |
| 575 } | |
| 576 } | |
| 577 break; | |
| 578 | |
| 579 case kRegexpConcat: { | |
| 580 // Accumulate in info. | |
| 581 // Exact is concat of recent contiguous exact nodes. | |
| 582 info = NULL; | |
| 583 Info* exact = NULL; | |
| 584 for (int i = 0; i < nchild_args; i++) { | |
| 585 Info* ci = child_args[i]; // child info | |
| 586 if (!ci->is_exact() || | |
| 587 (exact && ci->exact().size() * exact->exact().size() > 16)) { | |
| 588 // Exact run is over. | |
| 589 info = And(info, exact); | |
| 590 exact = NULL; | |
| 591 // Add this child's info. | |
| 592 info = And(info, ci); | |
| 593 } else { | |
| 594 // Append to exact run. | |
| 595 exact = Concat(exact, ci); | |
| 596 } | |
| 597 } | |
| 598 info = And(info, exact); | |
| 599 } | |
| 600 break; | |
| 601 | |
| 602 case kRegexpAlternate: | |
| 603 info = child_args[0]; | |
| 604 for (int i = 1; i < nchild_args; i++) | |
| 605 info = Alt(info, child_args[i]); | |
| 606 VLOG(10) << "Alt: " << info->ToString(); | |
| 607 break; | |
| 608 | |
| 609 case kRegexpStar: | |
| 610 info = Star(child_args[0]); | |
| 611 break; | |
| 612 | |
| 613 case kRegexpQuest: | |
| 614 info = Quest(child_args[0]); | |
| 615 break; | |
| 616 | |
| 617 case kRegexpPlus: | |
| 618 info = Plus(child_args[0]); | |
| 619 break; | |
| 620 | |
| 621 case kRegexpAnyChar: | |
| 622 // Claim nothing, except that it's not empty. | |
| 623 info = AnyChar(); | |
| 624 break; | |
| 625 | |
| 626 case kRegexpCharClass: | |
| 627 info = CClass(re->cc(), latin1()); | |
| 628 break; | |
| 629 | |
| 630 case kRegexpCapture: | |
| 631 // These don't affect the set of matching strings. | |
| 632 info = child_args[0]; | |
| 633 break; | |
| 634 } | |
| 635 | |
| 636 if (Trace) { | |
| 637 VLOG(0) << "BuildInfo " << re->ToString() | |
| 638 << ": " << (info ? info->ToString() : ""); | |
| 639 } | |
| 640 | |
| 641 return info; | |
| 642 } | |
| 643 | |
| 644 | |
| 645 Prefilter* Prefilter::FromRegexp(Regexp* re) { | |
| 646 if (re == NULL) | |
| 647 return NULL; | |
| 648 | |
| 649 Regexp* simple = re->Simplify(); | |
| 650 Prefilter::Info *info = BuildInfo(simple); | |
| 651 | |
| 652 simple->Decref(); | |
| 653 if (info == NULL) | |
| 654 return NULL; | |
| 655 | |
| 656 Prefilter* m = info->TakeMatch(); | |
| 657 | |
| 658 delete info; | |
| 659 return m; | |
| 660 } | |
| 661 | |
| 662 string Prefilter::DebugString() const { | |
| 663 switch (op_) { | |
| 664 default: | |
| 665 LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; | |
| 666 return StringPrintf("op%d", op_); | |
| 667 case NONE: | |
| 668 return "*no-matches*"; | |
| 669 case ATOM: | |
| 670 return atom_; | |
| 671 case ALL: | |
| 672 return ""; | |
| 673 case AND: { | |
| 674 string s = ""; | |
| 675 for (size_t i = 0; i < subs_->size(); i++) { | |
| 676 if (i > 0) | |
| 677 s += " "; | |
| 678 Prefilter* sub = (*subs_)[i]; | |
| 679 s += sub ? sub->DebugString() : "<nil>"; | |
| 680 } | |
| 681 return s; | |
| 682 } | |
| 683 case OR: { | |
| 684 string s = "("; | |
| 685 for (size_t i = 0; i < subs_->size(); i++) { | |
| 686 if (i > 0) | |
| 687 s += "|"; | |
| 688 Prefilter* sub = (*subs_)[i]; | |
| 689 s += sub ? sub->DebugString() : "<nil>"; | |
| 690 } | |
| 691 s += ")"; | |
| 692 return s; | |
| 693 } | |
| 694 } | |
| 695 } | |
| 696 | |
| 697 Prefilter* Prefilter::FromRE2(const RE2* re2) { | |
| 698 if (re2 == NULL) | |
| 699 return NULL; | |
| 700 | |
| 701 Regexp* regexp = re2->Regexp(); | |
| 702 if (regexp == NULL) | |
| 703 return NULL; | |
| 704 | |
| 705 return FromRegexp(regexp); | |
| 706 } | |
| 707 | |
| 708 | |
| 709 } // namespace re2 | |
| OLD | NEW |