| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This class loads a client-side model and lets you compute a phishing score | 5 // This class loads a client-side model and lets you compute a phishing score |
| 6 // for a set of previously extracted features. The phishing score corresponds | 6 // for a set of previously extracted features. The phishing score corresponds |
| 7 // to the probability that the features are indicative of a phishing site. | 7 // to the probability that the features are indicative of a phishing site. |
| 8 // | 8 // |
| 9 // For more details on how the score is actually computed for a given model | 9 // For more details on how the score is actually computed for a given model |
| 10 // and a given set of features read the comments in client_model.proto file. | 10 // and a given set of features read the comments in client_model.proto file. |
| 11 // | 11 // |
| 12 // See features.h for a list of features that are currently used. | 12 // See features.h for a list of features that are currently used. |
| 13 | 13 |
| 14 #ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 14 #ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
| 15 #define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 15 #define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
| 16 | 16 |
| 17 #include <stddef.h> |
| 18 #include <stdint.h> |
| 19 |
| 17 #include <string> | 20 #include <string> |
| 18 | 21 |
| 19 #include "base/basictypes.h" | |
| 20 #include "base/containers/hash_tables.h" | 22 #include "base/containers/hash_tables.h" |
| 23 #include "base/macros.h" |
| 21 #include "base/strings/string_piece.h" | 24 #include "base/strings/string_piece.h" |
| 22 #include "chrome/common/safe_browsing/client_model.pb.h" | 25 #include "chrome/common/safe_browsing/client_model.pb.h" |
| 23 | 26 |
| 24 namespace safe_browsing { | 27 namespace safe_browsing { |
| 25 class FeatureMap; | 28 class FeatureMap; |
| 26 | 29 |
| 27 // Scorer methods are virtual to simplify mocking of this class. | 30 // Scorer methods are virtual to simplify mocking of this class. |
| 28 class Scorer { | 31 class Scorer { |
| 29 public: | 32 public: |
| 30 virtual ~Scorer(); | 33 virtual ~Scorer(); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 42 int model_version() const; | 45 int model_version() const; |
| 43 | 46 |
| 44 // -- Accessors used by the page feature extractor --------------------------- | 47 // -- Accessors used by the page feature extractor --------------------------- |
| 45 | 48 |
| 46 // Returns a set of hashed page terms that appear in the model in binary | 49 // Returns a set of hashed page terms that appear in the model in binary |
| 47 // format. | 50 // format. |
| 48 const base::hash_set<std::string>& page_terms() const; | 51 const base::hash_set<std::string>& page_terms() const; |
| 49 | 52 |
| 50 // Returns a set of hashed page words that appear in the model in binary | 53 // Returns a set of hashed page words that appear in the model in binary |
| 51 // format. | 54 // format. |
| 52 const base::hash_set<uint32>& page_words() const; | 55 const base::hash_set<uint32_t>& page_words() const; |
| 53 | 56 |
| 54 // Return the maximum number of words per term for the loaded model. | 57 // Return the maximum number of words per term for the loaded model. |
| 55 size_t max_words_per_term() const; | 58 size_t max_words_per_term() const; |
| 56 | 59 |
| 57 // Returns the murmurhash3 seed for the loaded model. | 60 // Returns the murmurhash3 seed for the loaded model. |
| 58 uint32 murmurhash3_seed() const; | 61 uint32_t murmurhash3_seed() const; |
| 59 | 62 |
| 60 // Return the maximum number of unique shingle hashes per page. | 63 // Return the maximum number of unique shingle hashes per page. |
| 61 size_t max_shingles_per_page() const; | 64 size_t max_shingles_per_page() const; |
| 62 | 65 |
| 63 // Return the number of words in a shingle. | 66 // Return the number of words in a shingle. |
| 64 size_t shingle_size() const; | 67 size_t shingle_size() const; |
| 65 | 68 |
| 66 protected: | 69 protected: |
| 67 // Most clients should use the factory method. This constructor is public | 70 // Most clients should use the factory method. This constructor is public |
| 68 // to allow for mock implementations. | 71 // to allow for mock implementations. |
| 69 Scorer(); | 72 Scorer(); |
| 70 | 73 |
| 71 private: | 74 private: |
| 72 friend class PhishingScorerTest; | 75 friend class PhishingScorerTest; |
| 73 | 76 |
| 74 // Computes the score for a given rule and feature map. The score is computed | 77 // Computes the score for a given rule and feature map. The score is computed |
| 75 // by multiplying the rule weight with the product of feature weights for the | 78 // by multiplying the rule weight with the product of feature weights for the |
| 76 // given rule. The feature weights are stored in the feature map. If a | 79 // given rule. The feature weights are stored in the feature map. If a |
| 77 // particular feature does not exist in the feature map we set its weight to | 80 // particular feature does not exist in the feature map we set its weight to |
| 78 // zero. | 81 // zero. |
| 79 double ComputeRuleScore(const ClientSideModel::Rule& rule, | 82 double ComputeRuleScore(const ClientSideModel::Rule& rule, |
| 80 const FeatureMap& features) const; | 83 const FeatureMap& features) const; |
| 81 | 84 |
| 82 ClientSideModel model_; | 85 ClientSideModel model_; |
| 83 base::hash_set<std::string> page_terms_; | 86 base::hash_set<std::string> page_terms_; |
| 84 base::hash_set<uint32> page_words_; | 87 base::hash_set<uint32_t> page_words_; |
| 85 | 88 |
| 86 DISALLOW_COPY_AND_ASSIGN(Scorer); | 89 DISALLOW_COPY_AND_ASSIGN(Scorer); |
| 87 }; | 90 }; |
| 88 } // namespace safe_browsing | 91 } // namespace safe_browsing |
| 89 | 92 |
| 90 #endif // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 93 #endif // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
| OLD | NEW |