Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(108)

Side by Side Diff: chrome/renderer/safe_browsing/scorer.cc

Issue 268673007: Extracting page shingle hashes for similarity detection. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix a reference problem. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/scorer.h" 5 #include "chrome/renderer/safe_browsing/scorer.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 } 88 }
89 89
90 const base::hash_set<std::string>& Scorer::page_terms() const { 90 const base::hash_set<std::string>& Scorer::page_terms() const {
91 return page_terms_; 91 return page_terms_;
92 } 92 }
93 93
94 const base::hash_set<uint32>& Scorer::page_words() const { 94 const base::hash_set<uint32>& Scorer::page_words() const {
95 return page_words_; 95 return page_words_;
96 } 96 }
97 97
98 size_t Scorer::max_hashes_per_page() const {
99 return model_.max_hashes_per_page();
100 }
101
98 size_t Scorer::max_words_per_term() const { 102 size_t Scorer::max_words_per_term() const {
99 return model_.max_words_per_term(); 103 return model_.max_words_per_term();
100 } 104 }
101 105
102 uint32 Scorer::murmurhash3_seed() const { 106 uint32 Scorer::murmurhash3_seed() const {
103 return model_.murmur_hash_seed(); 107 return model_.murmur_hash_seed();
104 } 108 }
105 109
110 size_t Scorer::shingle_size() const {
111 return model_.shingle_size();
112 }
113
106 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, 114 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
107 const FeatureMap& features) const { 115 const FeatureMap& features) const {
108 const base::hash_map<std::string, double>& feature_map = features.features(); 116 const base::hash_map<std::string, double>& feature_map = features.features();
109 double rule_score = 1.0; 117 double rule_score = 1.0;
110 for (int i = 0; i < rule.feature_size(); ++i) { 118 for (int i = 0; i < rule.feature_size(); ++i) {
111 base::hash_map<std::string, double>::const_iterator it = feature_map.find( 119 base::hash_map<std::string, double>::const_iterator it = feature_map.find(
112 model_.hashes(rule.feature(i))); 120 model_.hashes(rule.feature(i)));
113 if (it == feature_map.end() || it->second == 0.0) { 121 if (it == feature_map.end() || it->second == 0.0) {
114 // If the feature of the rule does not exist in the given feature map the 122 // If the feature of the rule does not exist in the given feature map the
115 // feature weight is considered to be zero. If the feature weight is zero 123 // feature weight is considered to be zero. If the feature weight is zero
116 // we leave early since we know that the rule score will be zero. 124 // we leave early since we know that the rule score will be zero.
117 return 0.0; 125 return 0.0;
118 } 126 }
119 rule_score *= it->second; 127 rule_score *= it->second;
120 } 128 }
121 return rule_score * rule.weight(); 129 return rule_score * rule.weight();
122 } 130 }
123 } // namespace safe_browsing 131 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698