| Index: third_party/re2/re2/prefilter_tree.cc
|
| diff --git a/third_party/re2/re2/prefilter_tree.cc b/third_party/re2/re2/prefilter_tree.cc
|
| deleted file mode 100644
|
| index be9b5844b8f05718929305a388eccb1a117c100a..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/prefilter_tree.cc
|
| +++ /dev/null
|
| @@ -1,402 +0,0 @@
|
| -// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -#include "util/util.h"
|
| -#include "util/flags.h"
|
| -#include "re2/prefilter.h"
|
| -#include "re2/prefilter_tree.h"
|
| -#include "re2/re2.h"
|
| -
|
| -DEFINE_int32(filtered_re2_min_atom_len,
|
| - 3,
|
| - "Strings less than this length are not stored as atoms");
|
| -
|
| -namespace re2 {
|
| -
|
| -PrefilterTree::PrefilterTree()
|
| - : compiled_(false) {
|
| -}
|
| -
|
| -PrefilterTree::~PrefilterTree() {
|
| - for (size_t i = 0; i < prefilter_vec_.size(); i++)
|
| - delete prefilter_vec_[i];
|
| -
|
| - for (size_t i = 0; i < entries_.size(); i++)
|
| - delete entries_[i].parents;
|
| -}
|
| -
|
| -// Functions used for adding and Compiling prefilters to the
|
| -// PrefilterTree.
|
| -static bool KeepPart(Prefilter* prefilter, int level) {
|
| - if (prefilter == NULL)
|
| - return false;
|
| -
|
| - switch (prefilter->op()) {
|
| - default:
|
| - LOG(DFATAL) << "Unexpected op in KeepPart: "
|
| - << prefilter->op();
|
| - return false;
|
| -
|
| - case Prefilter::ALL:
|
| - return false;
|
| -
|
| - case Prefilter::ATOM:
|
| - return prefilter->atom().size() >=
|
| - static_cast<size_t>(FLAGS_filtered_re2_min_atom_len);
|
| -
|
| - case Prefilter::AND: {
|
| - int j = 0;
|
| - vector<Prefilter*>* subs = prefilter->subs();
|
| - for (size_t i = 0; i < subs->size(); i++)
|
| - if (KeepPart((*subs)[i], level + 1))
|
| - (*subs)[j++] = (*subs)[i];
|
| - else
|
| - delete (*subs)[i];
|
| -
|
| - subs->resize(j);
|
| - return j > 0;
|
| - }
|
| -
|
| - case Prefilter::OR:
|
| - for (size_t i = 0; i < prefilter->subs()->size(); i++)
|
| - if (!KeepPart((*prefilter->subs())[i], level + 1))
|
| - return false;
|
| - return true;
|
| - }
|
| -}
|
| -
|
| -void PrefilterTree::Add(Prefilter *f) {
|
| - if (compiled_) {
|
| - LOG(DFATAL) << "Add after Compile.";
|
| - return;
|
| - }
|
| - if (f != NULL && !KeepPart(f, 0)) {
|
| - delete f;
|
| - f = NULL;
|
| - }
|
| -
|
| - prefilter_vec_.push_back(f);
|
| -}
|
| -
|
| -void PrefilterTree::Compile(vector<string>* atom_vec) {
|
| - if (compiled_) {
|
| - LOG(DFATAL) << "Compile after Compile.";
|
| - return;
|
| - }
|
| -
|
| - // We do this check to support some legacy uses of
|
| - // PrefilterTree that call Compile before adding any regexps,
|
| - // and expect Compile not to have effect.
|
| - if (prefilter_vec_.empty())
|
| - return;
|
| -
|
| - compiled_ = true;
|
| -
|
| - AssignUniqueIds(atom_vec);
|
| -
|
| - // Identify nodes that are too common among prefilters and are
|
| - // triggering too many parents. Then get rid of them if possible.
|
| - // Note that getting rid of a prefilter node simply means they are
|
| - // no longer necessary for their parent to trigger; that is, we do
|
| - // not miss out on any regexps triggering by getting rid of a
|
| - // prefilter node.
|
| - for (size_t i = 0; i < entries_.size(); i++) {
|
| - StdIntMap* parents = entries_[i].parents;
|
| - if (parents->size() > 8) {
|
| - // This one triggers too many things. If all the parents are AND
|
| - // nodes and have other things guarding them, then get rid of
|
| - // this trigger. TODO(vsri): Adjust the threshold appropriately,
|
| - // make it a function of total number of nodes?
|
| - bool have_other_guard = true;
|
| - for (StdIntMap::iterator it = parents->begin();
|
| - it != parents->end(); ++it) {
|
| - have_other_guard = have_other_guard &&
|
| - (entries_[it->first].propagate_up_at_count > 1);
|
| - }
|
| -
|
| - if (have_other_guard) {
|
| - for (StdIntMap::iterator it = parents->begin();
|
| - it != parents->end(); ++it)
|
| - entries_[it->first].propagate_up_at_count -= 1;
|
| -
|
| - parents->clear(); // Forget the parents
|
| - }
|
| - }
|
| - }
|
| -
|
| - PrintDebugInfo();
|
| -}
|
| -
|
| -Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) {
|
| - string node_string = NodeString(node);
|
| - map<string, Prefilter*>::iterator iter = node_map_.find(node_string);
|
| - if (iter == node_map_.end())
|
| - return NULL;
|
| - return (*iter).second;
|
| -}
|
| -
|
| -static string Itoa(int n) {
|
| - char buf[100];
|
| - snprintf(buf, sizeof buf, "%d", n);
|
| - return string(buf);
|
| -}
|
| -
|
| -string PrefilterTree::NodeString(Prefilter* node) const {
|
| - // Adding the operation disambiguates AND/OR/atom nodes.
|
| - string s = Itoa(node->op()) + ":";
|
| - if (node->op() == Prefilter::ATOM) {
|
| - s += node->atom();
|
| - } else {
|
| - for (size_t i = 0; i < node->subs()->size(); i++) {
|
| - if (i > 0)
|
| - s += ',';
|
| - s += Itoa((*node->subs())[i]->unique_id());
|
| - }
|
| - }
|
| - return s;
|
| -}
|
| -
|
| -void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
|
| - atom_vec->clear();
|
| -
|
| - // Build vector of all filter nodes, sorted topologically
|
| - // from top to bottom in v.
|
| - vector<Prefilter*> v;
|
| -
|
| - // Add the top level nodes of each regexp prefilter.
|
| - for (size_t i = 0; i < prefilter_vec_.size(); i++) {
|
| - Prefilter* f = prefilter_vec_[i];
|
| - if (f == NULL)
|
| - unfiltered_.push_back(static_cast<int>(i));
|
| -
|
| - // We push NULL also on to v, so that we maintain the
|
| - // mapping of index==regexpid for level=0 prefilter nodes.
|
| - v.push_back(f);
|
| - }
|
| -
|
| - // Now add all the descendant nodes.
|
| - for (size_t i = 0; i < v.size(); i++) {
|
| - Prefilter* f = v[i];
|
| - if (f == NULL)
|
| - continue;
|
| - if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {
|
| - const vector<Prefilter*>& subs = *f->subs();
|
| - for (size_t j = 0; j < subs.size(); j++)
|
| - v.push_back(subs[j]);
|
| - }
|
| - }
|
| -
|
| - // Identify unique nodes.
|
| - int unique_id = 0;
|
| - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
|
| - Prefilter *node = v[i];
|
| - if (node == NULL)
|
| - continue;
|
| - node->set_unique_id(-1);
|
| - Prefilter* canonical = CanonicalNode(node);
|
| - if (canonical == NULL) {
|
| - // Any further nodes that have the same node string
|
| - // will find this node as the canonical node.
|
| - node_map_[NodeString(node)] = node;
|
| - if (node->op() == Prefilter::ATOM) {
|
| - atom_vec->push_back(node->atom());
|
| - atom_index_to_id_.push_back(unique_id);
|
| - }
|
| - node->set_unique_id(unique_id++);
|
| - } else {
|
| - node->set_unique_id(canonical->unique_id());
|
| - }
|
| - }
|
| - entries_.resize(node_map_.size());
|
| -
|
| - // Create parent StdIntMap for the entries.
|
| - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
|
| - Prefilter* prefilter = v[i];
|
| - if (prefilter == NULL)
|
| - continue;
|
| -
|
| - if (CanonicalNode(prefilter) != prefilter)
|
| - continue;
|
| -
|
| - Entry* entry = &entries_[prefilter->unique_id()];
|
| - entry->parents = new StdIntMap();
|
| - }
|
| -
|
| - // Fill the entries.
|
| - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
|
| - Prefilter* prefilter = v[i];
|
| - if (prefilter == NULL)
|
| - continue;
|
| -
|
| - if (CanonicalNode(prefilter) != prefilter)
|
| - continue;
|
| -
|
| - Entry* entry = &entries_[prefilter->unique_id()];
|
| -
|
| - switch (prefilter->op()) {
|
| - default:
|
| - case Prefilter::ALL:
|
| - LOG(DFATAL) << "Unexpected op: " << prefilter->op();
|
| - return;
|
| -
|
| - case Prefilter::ATOM:
|
| - entry->propagate_up_at_count = 1;
|
| - break;
|
| -
|
| - case Prefilter::OR:
|
| - case Prefilter::AND: {
|
| - set<int> uniq_child;
|
| - for (size_t j = 0; j < prefilter->subs()->size(); j++) {
|
| - Prefilter* child = (*prefilter->subs())[j];
|
| - Prefilter* canonical = CanonicalNode(child);
|
| - if (canonical == NULL) {
|
| - LOG(DFATAL) << "Null canonical node";
|
| - return;
|
| - }
|
| - int child_id = canonical->unique_id();
|
| - uniq_child.insert(child_id);
|
| - // To the child, we want to add to parent indices.
|
| - Entry* child_entry = &entries_[child_id];
|
| - if (child_entry->parents->find(prefilter->unique_id()) ==
|
| - child_entry->parents->end()) {
|
| - (*child_entry->parents)[prefilter->unique_id()] = 1;
|
| - }
|
| - }
|
| - entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
|
| - ? static_cast<int>(uniq_child.size())
|
| - : 1;
|
| -
|
| - break;
|
| - }
|
| - }
|
| - }
|
| -
|
| - // For top level nodes, populate regexp id.
|
| - for (size_t i = 0; i < prefilter_vec_.size(); i++) {
|
| - if (prefilter_vec_[i] == NULL)
|
| - continue;
|
| - int id = CanonicalNode(prefilter_vec_[i])->unique_id();
|
| - DCHECK_LE(0, id);
|
| - Entry* entry = &entries_[id];
|
| - entry->regexps.push_back(static_cast<int>(i));
|
| - }
|
| -}
|
| -
|
| -// Functions for triggering during search.
|
| -void PrefilterTree::RegexpsGivenStrings(
|
| - const vector<int>& matched_atoms,
|
| - vector<int>* regexps) const {
|
| - regexps->clear();
|
| - if (!compiled_) {
|
| - LOG(WARNING) << "Compile() not called";
|
| - for (size_t i = 0; i < prefilter_vec_.size(); ++i)
|
| - regexps->push_back(static_cast<int>(i));
|
| - } else {
|
| - if (!prefilter_vec_.empty()) {
|
| - IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
|
| - vector<int> matched_atom_ids;
|
| - for (size_t j = 0; j < matched_atoms.size(); j++) {
|
| - matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
|
| - VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]];
|
| - }
|
| - PropagateMatch(matched_atom_ids, ®exps_map);
|
| - for (IntMap::iterator it = regexps_map.begin();
|
| - it != regexps_map.end();
|
| - ++it)
|
| - regexps->push_back(it->index());
|
| -
|
| - regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
|
| - }
|
| - }
|
| - sort(regexps->begin(), regexps->end());
|
| -}
|
| -
|
| -void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
|
| - IntMap* regexps) const {
|
| - IntMap count(static_cast<int>(entries_.size()));
|
| - IntMap work(static_cast<int>(entries_.size()));
|
| - for (size_t i = 0; i < atom_ids.size(); i++)
|
| - work.set(atom_ids[i], 1);
|
| - for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
|
| - const Entry& entry = entries_[it->index()];
|
| - VLOG(10) << "Processing: " << it->index();
|
| - // Record regexps triggered.
|
| - for (size_t i = 0; i < entry.regexps.size(); i++) {
|
| - VLOG(10) << "Regexp triggered: " << entry.regexps[i];
|
| - regexps->set(entry.regexps[i], 1);
|
| - }
|
| - int c;
|
| - // Pass trigger up to parents.
|
| - for (StdIntMap::iterator it = entry.parents->begin();
|
| - it != entry.parents->end();
|
| - ++it) {
|
| - int j = it->first;
|
| - const Entry& parent = entries_[j];
|
| - VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
|
| - // Delay until all the children have succeeded.
|
| - if (parent.propagate_up_at_count > 1) {
|
| - if (count.has_index(j)) {
|
| - c = count.get_existing(j) + 1;
|
| - count.set_existing(j, c);
|
| - } else {
|
| - c = 1;
|
| - count.set_new(j, c);
|
| - }
|
| - if (c < parent.propagate_up_at_count)
|
| - continue;
|
| - }
|
| - VLOG(10) << "Triggering: " << j;
|
| - // Trigger the parent.
|
| - work.set(j, 1);
|
| - }
|
| - }
|
| -}
|
| -
|
| -// Debugging help.
|
| -void PrefilterTree::PrintPrefilter(int regexpid) {
|
| - LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]);
|
| -}
|
| -
|
| -void PrefilterTree::PrintDebugInfo() {
|
| - VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size();
|
| - VLOG(10) << "#Unique Nodes: " << entries_.size();
|
| -
|
| - for (size_t i = 0; i < entries_.size(); ++i) {
|
| - StdIntMap* parents = entries_[i].parents;
|
| - const vector<int>& regexps = entries_[i].regexps;
|
| - VLOG(10) << "EntryId: " << i
|
| - << " N: " << parents->size() << " R: " << regexps.size();
|
| - for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
|
| - VLOG(10) << it->first;
|
| - }
|
| - VLOG(10) << "Map:";
|
| - for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
|
| - iter != node_map_.end(); ++iter)
|
| - VLOG(10) << "NodeId: " << (*iter).second->unique_id()
|
| - << " Str: " << (*iter).first;
|
| -}
|
| -
|
| -string PrefilterTree::DebugNodeString(Prefilter* node) const {
|
| - string node_string = "";
|
| -
|
| - if (node->op() == Prefilter::ATOM) {
|
| - DCHECK(!node->atom().empty());
|
| - node_string += node->atom();
|
| - } else {
|
| - // Adding the operation disambiguates AND and OR nodes.
|
| - node_string += node->op() == Prefilter::AND ? "AND" : "OR";
|
| - node_string += "(";
|
| - for (size_t i = 0; i < node->subs()->size(); i++) {
|
| - if (i > 0)
|
| - node_string += ',';
|
| - node_string += Itoa((*node->subs())[i]->unique_id());
|
| - node_string += ":";
|
| - node_string += DebugNodeString((*node->subs())[i]);
|
| - }
|
| - node_string += ")";
|
| - }
|
| - return node_string;
|
| -}
|
| -
|
| -} // namespace re2
|
|
|