| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/scorer.h" | |
| 6 | |
| 7 #include <math.h> | |
| 8 | |
| 9 #include <memory> | |
| 10 | |
| 11 #include "base/logging.h" | |
| 12 #include "base/metrics/histogram_macros.h" | |
| 13 #include "base/strings/string_piece.h" | |
| 14 #include "chrome/common/safe_browsing/client_model.pb.h" | |
| 15 #include "chrome/renderer/safe_browsing/features.h" | |
| 16 | |
| 17 namespace { | |
| 18 // Enum used to keep stats about the status of the Scorer creation. | |
| 19 enum ScorerCreationStatus { | |
| 20 SCORER_SUCCESS, | |
| 21 SCORER_FAIL_MODEL_OPEN_FAIL, // Not used anymore | |
| 22 SCORER_FAIL_MODEL_FILE_EMPTY, // Not used anymore | |
| 23 SCORER_FAIL_MODEL_FILE_TOO_LARGE, // Not used anymore | |
| 24 SCORER_FAIL_MODEL_PARSE_ERROR, | |
| 25 SCORER_FAIL_MODEL_MISSING_FIELDS, | |
| 26 SCORER_STATUS_MAX // Always add new values before this one. | |
| 27 }; | |
| 28 | |
| 29 void RecordScorerCreationStatus(ScorerCreationStatus status) { | |
| 30 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.ScorerCreationStatus", | |
| 31 status, | |
| 32 SCORER_STATUS_MAX); | |
| 33 } | |
| 34 } // namespace | |
| 35 | |
| 36 namespace safe_browsing { | |
| 37 | |
| 38 // Helper function which converts log odds to a probability in the range | |
| 39 // [0.0,1.0]. | |
| 40 static double LogOdds2Prob(double log_odds) { | |
| 41 // 709 = floor(1023*ln(2)). 2**1023 is the largest finite double. | |
| 42 // Small log odds aren't a problem. as the odds will be 0. It's only | |
| 43 // when we get +infinity for the odds, that odds/(odds+1) would be NaN. | |
| 44 if (log_odds >= 709) { | |
| 45 return 1.0; | |
| 46 } | |
| 47 double odds = exp(log_odds); | |
| 48 return odds/(odds+1.0); | |
| 49 } | |
| 50 | |
| 51 Scorer::Scorer() {} | |
| 52 Scorer::~Scorer() {} | |
| 53 | |
| 54 /* static */ | |
| 55 Scorer* Scorer::Create(const base::StringPiece& model_str) { | |
| 56 std::unique_ptr<Scorer> scorer(new Scorer()); | |
| 57 ClientSideModel& model = scorer->model_; | |
| 58 if (!model.ParseFromArray(model_str.data(), model_str.size())) { | |
| 59 DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is " | |
| 60 << "invalid."; | |
| 61 RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR); | |
| 62 return NULL; | |
| 63 } else if (!model.IsInitialized()) { | |
| 64 DLOG(ERROR) << "Unable to parse phishing model. The model is missing " | |
| 65 << "some required fields. Maybe the .proto file changed?"; | |
| 66 RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); | |
| 67 return NULL; | |
| 68 } | |
| 69 RecordScorerCreationStatus(SCORER_SUCCESS); | |
| 70 for (int i = 0; i < model.page_term_size(); ++i) { | |
| 71 scorer->page_terms_.insert(model.hashes(model.page_term(i))); | |
| 72 } | |
| 73 for (int i = 0; i < model.page_word_size(); ++i) { | |
| 74 scorer->page_words_.insert(model.page_word(i)); | |
| 75 } | |
| 76 return scorer.release(); | |
| 77 } | |
| 78 | |
| 79 double Scorer::ComputeScore(const FeatureMap& features) const { | |
| 80 double logodds = 0.0; | |
| 81 for (int i = 0; i < model_.rule_size(); ++i) { | |
| 82 logodds += ComputeRuleScore(model_.rule(i), features); | |
| 83 } | |
| 84 return LogOdds2Prob(logodds); | |
| 85 } | |
| 86 | |
| 87 int Scorer::model_version() const { | |
| 88 return model_.version(); | |
| 89 } | |
| 90 | |
| 91 const base::hash_set<std::string>& Scorer::page_terms() const { | |
| 92 return page_terms_; | |
| 93 } | |
| 94 | |
| 95 const base::hash_set<uint32_t>& Scorer::page_words() const { | |
| 96 return page_words_; | |
| 97 } | |
| 98 | |
| 99 size_t Scorer::max_words_per_term() const { | |
| 100 return model_.max_words_per_term(); | |
| 101 } | |
| 102 | |
| 103 uint32_t Scorer::murmurhash3_seed() const { | |
| 104 return model_.murmur_hash_seed(); | |
| 105 } | |
| 106 | |
| 107 size_t Scorer::max_shingles_per_page() const { | |
| 108 return model_.max_shingles_per_page(); | |
| 109 } | |
| 110 | |
| 111 size_t Scorer::shingle_size() const { | |
| 112 return model_.shingle_size(); | |
| 113 } | |
| 114 | |
| 115 double Scorer::ComputeRuleScore(const ClientSideModel::Rule& rule, | |
| 116 const FeatureMap& features) const { | |
| 117 const base::hash_map<std::string, double>& feature_map = features.features(); | |
| 118 double rule_score = 1.0; | |
| 119 for (int i = 0; i < rule.feature_size(); ++i) { | |
| 120 base::hash_map<std::string, double>::const_iterator it = feature_map.find( | |
| 121 model_.hashes(rule.feature(i))); | |
| 122 if (it == feature_map.end() || it->second == 0.0) { | |
| 123 // If the feature of the rule does not exist in the given feature map the | |
| 124 // feature weight is considered to be zero. If the feature weight is zero | |
| 125 // we leave early since we know that the rule score will be zero. | |
| 126 return 0.0; | |
| 127 } | |
| 128 rule_score *= it->second; | |
| 129 } | |
| 130 return rule_score * rule.weight(); | |
| 131 } | |
| 132 } // namespace safe_browsing | |
| OLD | NEW |