OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This class loads a client-side model and lets you compute a phishing score | 5 // This class loads a client-side model and lets you compute a phishing score |
6 // for a set of previously extracted features. The phishing score corresponds | 6 // for a set of previously extracted features. The phishing score corresponds |
7 // to the probability that the features are indicative of a phishing site. | 7 // to the probability that the features are indicative of a phishing site. |
8 // | 8 // |
9 // For more details on how the score is actually computed for a given model | 9 // For more details on how the score is actually computed for a given model |
10 // and a given set of features read the comments in client_model.proto file. | 10 // and a given set of features read the comments in client_model.proto file. |
11 // | 11 // |
12 // See features.h for a list of features that are currently used. | 12 // See features.h for a list of features that are currently used. |
13 | 13 |
14 #ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 14 #ifndef CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
15 #define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 15 #define CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
16 | 16 |
| 17 #include <stddef.h> |
| 18 #include <stdint.h> |
| 19 |
17 #include <string> | 20 #include <string> |
18 | 21 |
19 #include "base/basictypes.h" | |
20 #include "base/containers/hash_tables.h" | 22 #include "base/containers/hash_tables.h" |
| 23 #include "base/macros.h" |
21 #include "base/strings/string_piece.h" | 24 #include "base/strings/string_piece.h" |
22 #include "chrome/common/safe_browsing/client_model.pb.h" | 25 #include "chrome/common/safe_browsing/client_model.pb.h" |
23 | 26 |
24 namespace safe_browsing { | 27 namespace safe_browsing { |
25 class FeatureMap; | 28 class FeatureMap; |
26 | 29 |
27 // Scorer methods are virtual to simplify mocking of this class. | 30 // Scorer methods are virtual to simplify mocking of this class. |
28 class Scorer { | 31 class Scorer { |
29 public: | 32 public: |
30 virtual ~Scorer(); | 33 virtual ~Scorer(); |
(...skipping 11 matching lines...) Expand all Loading... |
42 int model_version() const; | 45 int model_version() const; |
43 | 46 |
44 // -- Accessors used by the page feature extractor --------------------------- | 47 // -- Accessors used by the page feature extractor --------------------------- |
45 | 48 |
46 // Returns a set of hashed page terms that appear in the model in binary | 49 // Returns a set of hashed page terms that appear in the model in binary |
47 // format. | 50 // format. |
48 const base::hash_set<std::string>& page_terms() const; | 51 const base::hash_set<std::string>& page_terms() const; |
49 | 52 |
50 // Returns a set of hashed page words that appear in the model in binary | 53 // Returns a set of hashed page words that appear in the model in binary |
51 // format. | 54 // format. |
52 const base::hash_set<uint32>& page_words() const; | 55 const base::hash_set<uint32_t>& page_words() const; |
53 | 56 |
54 // Return the maximum number of words per term for the loaded model. | 57 // Return the maximum number of words per term for the loaded model. |
55 size_t max_words_per_term() const; | 58 size_t max_words_per_term() const; |
56 | 59 |
57 // Returns the murmurhash3 seed for the loaded model. | 60 // Returns the murmurhash3 seed for the loaded model. |
58 uint32 murmurhash3_seed() const; | 61 uint32_t murmurhash3_seed() const; |
59 | 62 |
60 // Return the maximum number of unique shingle hashes per page. | 63 // Return the maximum number of unique shingle hashes per page. |
61 size_t max_shingles_per_page() const; | 64 size_t max_shingles_per_page() const; |
62 | 65 |
63 // Return the number of words in a shingle. | 66 // Return the number of words in a shingle. |
64 size_t shingle_size() const; | 67 size_t shingle_size() const; |
65 | 68 |
66 protected: | 69 protected: |
67 // Most clients should use the factory method. This constructor is public | 70 // Most clients should use the factory method. This constructor is public |
68 // to allow for mock implementations. | 71 // to allow for mock implementations. |
69 Scorer(); | 72 Scorer(); |
70 | 73 |
71 private: | 74 private: |
72 friend class PhishingScorerTest; | 75 friend class PhishingScorerTest; |
73 | 76 |
74 // Computes the score for a given rule and feature map. The score is computed | 77 // Computes the score for a given rule and feature map. The score is computed |
75 // by multiplying the rule weight with the product of feature weights for the | 78 // by multiplying the rule weight with the product of feature weights for the |
76 // given rule. The feature weights are stored in the feature map. If a | 79 // given rule. The feature weights are stored in the feature map. If a |
77 // particular feature does not exist in the feature map we set its weight to | 80 // particular feature does not exist in the feature map we set its weight to |
78 // zero. | 81 // zero. |
79 double ComputeRuleScore(const ClientSideModel::Rule& rule, | 82 double ComputeRuleScore(const ClientSideModel::Rule& rule, |
80 const FeatureMap& features) const; | 83 const FeatureMap& features) const; |
81 | 84 |
82 ClientSideModel model_; | 85 ClientSideModel model_; |
83 base::hash_set<std::string> page_terms_; | 86 base::hash_set<std::string> page_terms_; |
84 base::hash_set<uint32> page_words_; | 87 base::hash_set<uint32_t> page_words_; |
85 | 88 |
86 DISALLOW_COPY_AND_ASSIGN(Scorer); | 89 DISALLOW_COPY_AND_ASSIGN(Scorer); |
87 }; | 90 }; |
88 } // namespace safe_browsing | 91 } // namespace safe_browsing |
89 | 92 |
90 #endif // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ | 93 #endif // CHROME_RENDERER_SAFE_BROWSING_SCORER_H_ |
OLD | NEW |