Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(464)

Side by Side Diff: chrome/renderer/safe_browsing/scorer.cc

Issue 7866011: Switch to the new client-side phishing model that uses Murmurhash for word hashes. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix compile problems and add another test Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/scorer.h" 5 #include "chrome/renderer/safe_browsing/scorer.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
63 DLOG(ERROR) << "Unable to parse phishing model. The model is missing " 63 DLOG(ERROR) << "Unable to parse phishing model. The model is missing "
64 << "some required fields. Maybe the .proto file changed?"; 64 << "some required fields. Maybe the .proto file changed?";
65 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); 65 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS);
66 return NULL; 66 return NULL;
67 } 67 }
68 RecordScorerCreationStatus(SCORER_SUCCESS); 68 RecordScorerCreationStatus(SCORER_SUCCESS);
69 for (int i = 0; i < model.page_term_size(); ++i) { 69 for (int i = 0; i < model.page_term_size(); ++i) {
70 scorer->page_terms_.insert(model.hashes(model.page_term(i))); 70 scorer->page_terms_.insert(model.hashes(model.page_term(i)));
71 } 71 }
72 for (int i = 0; i < model.page_word_size(); ++i) { 72 for (int i = 0; i < model.page_word_size(); ++i) {
73 scorer->page_words_.insert(model.hashes(model.page_word(i))); 73 scorer->page_words_.insert(model.page_word(i));
74 } 74 }
75 return scorer.release(); 75 return scorer.release();
76 } 76 }
77 77
78 double Scorer::ComputeScore(const FeatureMap& features) const { 78 double Scorer::ComputeScore(const FeatureMap& features) const {
79 double logodds = 0.0; 79 double logodds = 0.0;
80 for (int i = 0; i < model_.rule_size(); ++i) { 80 for (int i = 0; i < model_.rule_size(); ++i) {
81 logodds += ComputeRuleScore(model_.rule(i), features); 81 logodds += ComputeRuleScore(model_.rule(i), features);
82 } 82 }
83 return LogOdds2Prob(logodds); 83 return LogOdds2Prob(logodds);
84 } 84 }
85 85
86 int Scorer::model_version() const { 86 int Scorer::model_version() const {
87 return model_.version(); 87 return model_.version();
88 } 88 }
89 89
90 const base::hash_set<std::string>& Scorer::page_terms() const { 90 const base::hash_set<std::string>& Scorer::page_terms() const {
91 return page_terms_; 91 return page_terms_;
92 } 92 }
93 93
94 const base::hash_set<std::string>& Scorer::page_words() const { 94 const base::hash_set<uint32>& Scorer::page_words() const {
95 return page_words_; 95 return page_words_;
96 } 96 }
97 97
98 size_t Scorer::max_words_per_term() const { 98 size_t Scorer::max_words_per_term() const {
99 return model_.max_words_per_term(); 99 return model_.max_words_per_term();
100 } 100 }
101 101
102 uint32 Scorer::murmurhash3_seed() const {
103 return model_.murmur_hash_seed();
104 }
105
102 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, 106 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule,
103 const FeatureMap& features) const { 107 const FeatureMap& features) const {
104 const base::hash_map<std::string, double>& feature_map = features.features(); 108 const base::hash_map<std::string, double>& feature_map = features.features();
105 double rule_score = 1.0; 109 double rule_score = 1.0;
106 for (int i = 0; i < rule.feature_size(); ++i) { 110 for (int i = 0; i < rule.feature_size(); ++i) {
107 base::hash_map<std::string, double>::const_iterator it = feature_map.find( 111 base::hash_map<std::string, double>::const_iterator it = feature_map.find(
108 model_.hashes(rule.feature(i))); 112 model_.hashes(rule.feature(i)));
109 if (it == feature_map.end() || it->second == 0.0) { 113 if (it == feature_map.end() || it->second == 0.0) {
110 // If the feature of the rule does not exist in the given feature map the 114 // If the feature of the rule does not exist in the given feature map the
111 // feature weight is considered to be zero. If the feature weight is zero 115 // feature weight is considered to be zero. If the feature weight is zero
112 // we leave early since we know that the rule score will be zero. 116 // we leave early since we know that the rule score will be zero.
113 return 0.0; 117 return 0.0;
114 } 118 }
115 rule_score *= it->second; 119 rule_score *= it->second;
116 } 120 }
117 return rule_score * rule.weight(); 121 return rule_score * rule.weight();
118 } 122 }
119 } // namespace safe_browsing 123 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/scorer.h ('k') | chrome/renderer/safe_browsing/scorer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698