Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1767)

Side by Side Diff: chrome/renderer/safe_browsing/scorer.cc

Issue 268673007: Extracting page shingle hashes for similarity detection. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address 1st round comment Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/scorer.h" 5 #include "chrome/renderer/safe_browsing/scorer.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 } 96 }
97 97
98 size_t Scorer::max_words_per_term() const { 98 size_t Scorer::max_words_per_term() const {
99 return model_.max_words_per_term(); 99 return model_.max_words_per_term();
100 } 100 }
101 101
102 uint32 Scorer::murmurhash3_seed() const { 102 uint32 Scorer::murmurhash3_seed() const {
103 return model_.murmur_hash_seed(); 103 return model_.murmur_hash_seed();
104 } 104 }
105 105
106 size_t Scorer::max_shingles_per_page() const {
107 return model_.max_shingles_per_page();
108 }
109
110 size_t Scorer::shingle_size() const {
111 return model_.shingle_size();
112 }
113
106 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, 114 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
107 const FeatureMap& features) const { 115 const FeatureMap& features) const {
108 const base::hash_map<std::string, double>& feature_map = features.features(); 116 const base::hash_map<std::string, double>& feature_map = features.features();
109 double rule_score = 1.0; 117 double rule_score = 1.0;
110 for (int i = 0; i < rule.feature_size(); ++i) { 118 for (int i = 0; i < rule.feature_size(); ++i) {
111 base::hash_map<std::string, double>::const_iterator it = feature_map.find( 119 base::hash_map<std::string, double>::const_iterator it = feature_map.find(
112 model_.hashes(rule.feature(i))); 120 model_.hashes(rule.feature(i)));
113 if (it == feature_map.end() || it->second == 0.0) { 121 if (it == feature_map.end() || it->second == 0.0) {
114 // If the feature of the rule does not exist in the given feature map the 122 // If the feature of the rule does not exist in the given feature map the
115 // feature weight is considered to be zero. If the feature weight is zero 123 // feature weight is considered to be zero. If the feature weight is zero
116 // we leave early since we know that the rule score will be zero. 124 // we leave early since we know that the rule score will be zero.
117 return 0.0; 125 return 0.0;
118 } 126 }
119 rule_score *= it->second; 127 rule_score *= it->second;
120 } 128 }
121 return rule_score * rule.weight(); 129 return rule_score * rule.weight();
122 } 130 }
123 } // namespace safe_browsing 131 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698