OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/scorer.h" | 5 #include "chrome/renderer/safe_browsing/scorer.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
63 DLOG(ERROR) << "Unable to parse phishing model. The model is missing " | 63 DLOG(ERROR) << "Unable to parse phishing model. The model is missing " |
64 << "some required fields. Maybe the .proto file changed?"; | 64 << "some required fields. Maybe the .proto file changed?"; |
65 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); | 65 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); |
66 return NULL; | 66 return NULL; |
67 } | 67 } |
68 RecordScorerCreationStatus(SCORER_SUCCESS); | 68 RecordScorerCreationStatus(SCORER_SUCCESS); |
69 for (int i = 0; i < model.page_term_size(); ++i) { | 69 for (int i = 0; i < model.page_term_size(); ++i) { |
70 scorer->page_terms_.insert(model.hashes(model.page_term(i))); | 70 scorer->page_terms_.insert(model.hashes(model.page_term(i))); |
71 } | 71 } |
72 for (int i = 0; i < model.page_word_size(); ++i) { | 72 for (int i = 0; i < model.page_word_size(); ++i) { |
73 scorer->page_words_.insert(model.hashes(model.page_word(i))); | 73 scorer->page_words_.insert(model.page_word(i)); |
74 } | 74 } |
75 return scorer.release(); | 75 return scorer.release(); |
76 } | 76 } |
77 | 77 |
78 double Scorer::ComputeScore(const FeatureMap& features) const { | 78 double Scorer::ComputeScore(const FeatureMap& features) const { |
79 double logodds = 0.0; | 79 double logodds = 0.0; |
80 for (int i = 0; i < model_.rule_size(); ++i) { | 80 for (int i = 0; i < model_.rule_size(); ++i) { |
81 logodds += ComputeRuleScore(model_.rule(i), features); | 81 logodds += ComputeRuleScore(model_.rule(i), features); |
82 } | 82 } |
83 return LogOdds2Prob(logodds); | 83 return LogOdds2Prob(logodds); |
84 } | 84 } |
85 | 85 |
86 int Scorer::model_version() const { | 86 int Scorer::model_version() const { |
87 return model_.version(); | 87 return model_.version(); |
88 } | 88 } |
89 | 89 |
90 const base::hash_set<std::string>& Scorer::page_terms() const { | 90 const base::hash_set<std::string>& Scorer::page_terms() const { |
91 return page_terms_; | 91 return page_terms_; |
92 } | 92 } |
93 | 93 |
94 const base::hash_set<std::string>& Scorer::page_words() const { | 94 const base::hash_set<uint32>& Scorer::page_words() const { |
95 return page_words_; | 95 return page_words_; |
96 } | 96 } |
97 | 97 |
98 size_t Scorer::max_words_per_term() const { | 98 size_t Scorer::max_words_per_term() const { |
99 return model_.max_words_per_term(); | 99 return model_.max_words_per_term(); |
100 } | 100 } |
101 | 101 |
| 102 uint32 Scorer::murmurhash3_seed() const { |
| 103 return model_.murmur_hash_seed(); |
| 104 } |
| 105 |
102 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, | 106 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, |
103 const FeatureMap& features) const { | 107 const FeatureMap& features) const { |
104 const base::hash_map<std::string, double>& feature_map = features.features(); | 108 const base::hash_map<std::string, double>& feature_map = features.features(); |
105 double rule_score = 1.0; | 109 double rule_score = 1.0; |
106 for (int i = 0; i < rule.feature_size(); ++i) { | 110 for (int i = 0; i < rule.feature_size(); ++i) { |
107 base::hash_map<std::string, double>::const_iterator it = feature_map.find( | 111 base::hash_map<std::string, double>::const_iterator it = feature_map.find( |
108 model_.hashes(rule.feature(i))); | 112 model_.hashes(rule.feature(i))); |
109 if (it == feature_map.end() || it->second == 0.0) { | 113 if (it == feature_map.end() || it->second == 0.0) { |
110 // If the feature of the rule does not exist in the given feature map the | 114 // If the feature of the rule does not exist in the given feature map the |
111 // feature weight is considered to be zero. If the feature weight is zero | 115 // feature weight is considered to be zero. If the feature weight is zero |
112 // we leave early since we know that the rule score will be zero. | 116 // we leave early since we know that the rule score will be zero. |
113 return 0.0; | 117 return 0.0; |
114 } | 118 } |
115 rule_score *= it->second; | 119 rule_score *= it->second; |
116 } | 120 } |
117 return rule_score * rule.weight(); | 121 return rule_score * rule.weight(); |
118 } | 122 } |
119 } // namespace safe_browsing | 123 } // namespace safe_browsing |
OLD | NEW |