| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 // | |
| 5 // This class loads a client-side model and lets you compute a phishing score | |
| 6 // for a set of previously extracted features. The phishing score corresponds | |
| 7 // to the probability that the features are indicative of a phishing site. | |
| 8 // | |
| 9 // For more details on how the score is actually computed for a given model | |
| 10 // and a given set of features read the comments in client_model.proto file. | |
| 11 // | |
| 12 // See features.h for a list of features that are currently used. | |
| 13 | |
| 14 #ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | |
| 15 #define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | |
| 16 | |
| 17 #include <stddef.h> | |
| 18 #include <stdint.h> | |
| 19 | |
| 20 #include <string> | |
| 21 | |
| 22 #include "base/containers/hash_tables.h" | |
| 23 #include "base/macros.h" | |
| 24 #include "base/strings/string_piece.h" | |
| 25 #include "chrome/common/safe_browsing/client_model.pb.h" | |
| 26 | |
| 27 namespace safe_browsing { | |
| 28 class FeatureMap; | |
| 29 | |
| 30 // Scorer methods are virtual to simplify mocking of this class. | |
| 31 class Scorer { | |
| 32 public: | |
| 33 virtual ~Scorer(); | |
| 34 | |
| 35 // Factory method which creates a new Scorer object by parsing the given | |
| 36 // model. If parsing fails this method returns NULL. | |
| 37 static Scorer* Create(const base::StringPiece& model_str); | |
| 38 | |
| 39 // This method computes the probability that the given features are indicative | |
| 40 // of phishing. It returns a score value that falls in the range [0.0,1.0] | |
| 41 // (range is inclusive on both ends). | |
| 42 virtual double ComputeScore(const FeatureMap& features) const; | |
| 43 | |
| 44 // Returns the version number of the loaded client model. | |
| 45 int model_version() const; | |
| 46 | |
| 47 // -- Accessors used by the page feature extractor --------------------------- | |
| 48 | |
| 49 // Returns a set of hashed page terms that appear in the model in binary | |
| 50 // format. | |
| 51 const base::hash_set<std::string>& page_terms() const; | |
| 52 | |
| 53 // Returns a set of hashed page words that appear in the model in binary | |
| 54 // format. | |
| 55 const base::hash_set<uint32_t>& page_words() const; | |
| 56 | |
| 57 // Return the maximum number of words per term for the loaded model. | |
| 58 size_t max_words_per_term() const; | |
| 59 | |
| 60 // Returns the murmurhash3 seed for the loaded model. | |
| 61 uint32_t murmurhash3_seed() const; | |
| 62 | |
| 63 // Return the maximum number of unique shingle hashes per page. | |
| 64 size_t max_shingles_per_page() const; | |
| 65 | |
| 66 // Return the number of words in a shingle. | |
| 67 size_t shingle_size() const; | |
| 68 | |
| 69 protected: | |
| 70 // Most clients should use the factory method. This constructor is public | |
| 71 // to allow for mock implementations. | |
| 72 Scorer(); | |
| 73 | |
| 74 private: | |
| 75 friend class PhishingScorerTest; | |
| 76 | |
| 77 // Computes the score for a given rule and feature map. The score is computed | |
| 78 // by multiplying the rule weight with the product of feature weights for the | |
| 79 // given rule. The feature weights are stored in the feature map. If a | |
| 80 // particular feature does not exist in the feature map we set its weight to | |
| 81 // zero. | |
| 82 double ComputeRuleScore(const ClientSideModel::Rule& rule, | |
| 83 const FeatureMap& features) const; | |
| 84 | |
| 85 ClientSideModel model_; | |
| 86 base::hash_set<std::string> page_terms_; | |
| 87 base::hash_set<uint32_t> page_words_; | |
| 88 | |
| 89 DISALLOW_COPY_AND_ASSIGN(Scorer); | |
| 90 }; | |
| 91 } // namespace safe_browsing | |
| 92 | |
| 93 #endif // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | |
| OLD | NEW |