| OLD | NEW |
| 1 // Copyright 2009 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2009 The RE2 Authors. All Rights Reserved. |
| 2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "util/util.h" | 5 #include "util/util.h" |
| 6 #include "re2/prefilter.h" | 6 #include "re2/prefilter.h" |
| 7 #include "re2/re2.h" | 7 #include "re2/re2.h" |
| 8 #include "re2/unicode_casefold.h" | 8 #include "re2/unicode_casefold.h" |
| 9 #include "re2/walker-inl.h" | 9 #include "re2/walker-inl.h" |
| 10 | 10 |
| 11 namespace re2 { | 11 namespace re2 { |
| 12 | 12 |
| 13 static const int Trace = false; | 13 static const int Trace = false; |
| 14 | 14 |
| 15 typedef set<string>::iterator SSIter; | 15 typedef set<string>::iterator SSIter; |
| 16 typedef set<string>::const_iterator ConstSSIter; | 16 typedef set<string>::const_iterator ConstSSIter; |
| 17 | 17 |
| 18 GLOBAL_MUTEX(alloc_id_mutex); |
| 18 static int alloc_id = 100000; // Used for debugging. | 19 static int alloc_id = 100000; // Used for debugging. |
| 19 // Initializes a Prefilter, allocating subs_ as necessary. | 20 // Initializes a Prefilter, allocating subs_ as necessary. |
| 20 Prefilter::Prefilter(Op op) { | 21 Prefilter::Prefilter(Op op) { |
| 21 op_ = op; | 22 op_ = op; |
| 22 subs_ = NULL; | 23 subs_ = NULL; |
| 23 if (op_ == AND || op_ == OR) | 24 if (op_ == AND || op_ == OR) |
| 24 subs_ = new vector<Prefilter*>; | 25 subs_ = new vector<Prefilter*>; |
| 25 | 26 |
| 27 GLOBAL_MUTEX_LOCK(alloc_id_mutex); |
| 26 alloc_id_ = alloc_id++; | 28 alloc_id_ = alloc_id++; |
| 29 GLOBAL_MUTEX_UNLOCK(alloc_id_mutex); |
| 27 VLOG(10) << "alloc_id: " << alloc_id_; | 30 VLOG(10) << "alloc_id: " << alloc_id_; |
| 28 } | 31 } |
| 29 | 32 |
| 30 // Destroys a Prefilter. | 33 // Destroys a Prefilter. |
| 31 Prefilter::~Prefilter() { | 34 Prefilter::~Prefilter() { |
| 32 VLOG(10) << "Deleted: " << alloc_id_; | 35 VLOG(10) << "Deleted: " << alloc_id_; |
| 33 if (subs_) { | 36 if (subs_) { |
| 34 for (int i = 0; i < subs_->size(); i++) | 37 for (size_t i = 0; i < subs_->size(); i++) |
| 35 delete (*subs_)[i]; | 38 delete (*subs_)[i]; |
| 36 delete subs_; | 39 delete subs_; |
| 37 subs_ = NULL; | 40 subs_ = NULL; |
| 38 } | 41 } |
| 39 } | 42 } |
| 40 | 43 |
| 41 // Simplify if the node is an empty Or or And. | 44 // Simplify if the node is an empty Or or And. |
| 42 Prefilter* Prefilter::Simplify() { | 45 Prefilter* Prefilter::Simplify() { |
| 43 if (op_ != AND && op_ != OR) { | 46 if (op_ != AND && op_ != OR) { |
| 44 return this; | 47 return this; |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 93 delete a; | 96 delete a; |
| 94 return b; | 97 return b; |
| 95 } else { | 98 } else { |
| 96 delete b; | 99 delete b; |
| 97 return a; | 100 return a; |
| 98 } | 101 } |
| 99 } | 102 } |
| 100 | 103 |
| 101 // If a and b match op, merge their contents. | 104 // If a and b match op, merge their contents. |
| 102 if (a->op() == op && b->op() == op) { | 105 if (a->op() == op && b->op() == op) { |
| 103 for (int i = 0; i < b->subs()->size(); i++) { | 106 for (size_t i = 0; i < b->subs()->size(); i++) { |
| 104 Prefilter* bb = (*b->subs())[i]; | 107 Prefilter* bb = (*b->subs())[i]; |
| 105 a->subs()->push_back(bb); | 108 a->subs()->push_back(bb); |
| 106 } | 109 } |
| 107 b->subs()->clear(); | 110 b->subs()->clear(); |
| 108 delete b; | 111 delete b; |
| 109 return a; | 112 return a; |
| 110 } | 113 } |
| 111 | 114 |
| 112 // If a already has the same op as the op that is under construction | 115 // If a already has the same op as the op that is under construction |
| 113 // add in b (similarly if b already has the same op, add in a). | 116 // add in b (similarly if b already has the same op, add in a). |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 168 return or_prefilter; | 171 return or_prefilter; |
| 169 } | 172 } |
| 170 | 173 |
| 171 static Rune ToLowerRune(Rune r) { | 174 static Rune ToLowerRune(Rune r) { |
| 172 if (r < Runeself) { | 175 if (r < Runeself) { |
| 173 if ('A' <= r && r <= 'Z') | 176 if ('A' <= r && r <= 'Z') |
| 174 r += 'a' - 'A'; | 177 r += 'a' - 'A'; |
| 175 return r; | 178 return r; |
| 176 } | 179 } |
| 177 | 180 |
| 178 CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); | 181 const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); |
| 179 if (f == NULL || r < f->lo) | 182 if (f == NULL || r < f->lo) |
| 180 return r; | 183 return r; |
| 181 return ApplyFold(f, r); | 184 return ApplyFold(f, r); |
| 182 } | 185 } |
| 183 | 186 |
| 184 static Rune ToLowerRuneLatin1(Rune r) { | 187 static Rune ToLowerRuneLatin1(Rune r) { |
| 185 if ('A' <= r && r <= 'Z') | 188 if ('A' <= r && r <= 'Z') |
| 186 r += 'a' - 'A'; | 189 r += 'a' - 'A'; |
| 187 return r; | 190 return r; |
| 188 } | 191 } |
| (...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 485 Info* pre_arg, | 488 Info* pre_arg, |
| 486 Info** child_args, int nchild_args); | 489 Info** child_args, int nchild_args); |
| 487 | 490 |
| 488 virtual Info* ShortVisit( | 491 virtual Info* ShortVisit( |
| 489 Regexp* re, | 492 Regexp* re, |
| 490 Info* parent_arg); | 493 Info* parent_arg); |
| 491 | 494 |
| 492 bool latin1() { return latin1_; } | 495 bool latin1() { return latin1_; } |
| 493 private: | 496 private: |
| 494 bool latin1_; | 497 bool latin1_; |
| 495 DISALLOW_EVIL_CONSTRUCTORS(Walker); | 498 DISALLOW_COPY_AND_ASSIGN(Walker); |
| 496 }; | 499 }; |
| 497 | 500 |
| 498 Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { | 501 Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { |
| 499 if (Trace) { | 502 if (Trace) { |
| 500 LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); | 503 LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); |
| 501 } | 504 } |
| 502 | 505 |
| 503 bool latin1 = re->parse_flags() & Regexp::Latin1; | 506 bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; |
| 504 Prefilter::Info::Walker w(latin1); | 507 Prefilter::Info::Walker w(latin1); |
| 505 Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); | 508 Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); |
| 506 | 509 |
| 507 if (w.stopped_early()) { | 510 if (w.stopped_early()) { |
| 508 delete info; | 511 delete info; |
| 509 return NULL; | 512 return NULL; |
| 510 } | 513 } |
| 511 | 514 |
| 512 return info; | 515 return info; |
| 513 } | 516 } |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 662 LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; | 665 LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; |
| 663 return StringPrintf("op%d", op_); | 666 return StringPrintf("op%d", op_); |
| 664 case NONE: | 667 case NONE: |
| 665 return "*no-matches*"; | 668 return "*no-matches*"; |
| 666 case ATOM: | 669 case ATOM: |
| 667 return atom_; | 670 return atom_; |
| 668 case ALL: | 671 case ALL: |
| 669 return ""; | 672 return ""; |
| 670 case AND: { | 673 case AND: { |
| 671 string s = ""; | 674 string s = ""; |
| 672 for (int i = 0; i < subs_->size(); i++) { | 675 for (size_t i = 0; i < subs_->size(); i++) { |
| 673 if (i > 0) | 676 if (i > 0) |
| 674 s += " "; | 677 s += " "; |
| 675 Prefilter* sub = (*subs_)[i]; | 678 Prefilter* sub = (*subs_)[i]; |
| 676 s += sub ? sub->DebugString() : "<nil>"; | 679 s += sub ? sub->DebugString() : "<nil>"; |
| 677 } | 680 } |
| 678 return s; | 681 return s; |
| 679 } | 682 } |
| 680 case OR: { | 683 case OR: { |
| 681 string s = "("; | 684 string s = "("; |
| 682 for (int i = 0; i < subs_->size(); i++) { | 685 for (size_t i = 0; i < subs_->size(); i++) { |
| 683 if (i > 0) | 686 if (i > 0) |
| 684 s += "|"; | 687 s += "|"; |
| 685 Prefilter* sub = (*subs_)[i]; | 688 Prefilter* sub = (*subs_)[i]; |
| 686 s += sub ? sub->DebugString() : "<nil>"; | 689 s += sub ? sub->DebugString() : "<nil>"; |
| 687 } | 690 } |
| 688 s += ")"; | 691 s += ")"; |
| 689 return s; | 692 return s; |
| 690 } | 693 } |
| 691 } | 694 } |
| 692 } | 695 } |
| 693 | 696 |
| 694 Prefilter* Prefilter::FromRE2(const RE2* re2) { | 697 Prefilter* Prefilter::FromRE2(const RE2* re2) { |
| 695 if (re2 == NULL) | 698 if (re2 == NULL) |
| 696 return NULL; | 699 return NULL; |
| 697 | 700 |
| 698 Regexp* regexp = re2->Regexp(); | 701 Regexp* regexp = re2->Regexp(); |
| 699 if (regexp == NULL) | 702 if (regexp == NULL) |
| 700 return NULL; | 703 return NULL; |
| 701 | 704 |
| 702 return FromRegexp(regexp); | 705 return FromRegexp(regexp); |
| 703 } | 706 } |
| 704 | 707 |
| 705 | 708 |
| 706 } // namespace re2 | 709 } // namespace re2 |
| OLD | NEW |