OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
11 // | 11 // |
12 // Some features are boolean features. If these features are set, they always | 12 // Some features are boolean features. If these features are set, they always |
13 // have a value of 0.0 or 1.0. In practice, the features are only set if the | 13 // have a value of 0.0 or 1.0. In practice, the features are only set if the |
14 // value is true (1.0). | 14 // value is true (1.0). |
15 // | 15 // |
16 // We also use token features. These features have a unique name that is | 16 // We also use token features. These features have a unique name that is |
17 // constructed from the URL or page contents that we are classifying, for | 17 // constructed from the URL or page contents that we are classifying, for |
18 // example, "UrlDomain=chromium". These features are also always set to 1.0 | 18 // example, "UrlDomain=chromium". These features are also always set to 1.0 |
19 // if they are present. | 19 // if they are present. |
20 // | 20 // |
21 // The intermediate storage of the features for a URL is a FeatureMap, which is | 21 // The intermediate storage of the features for a URL is a FeatureMap, which is |
22 // just a thin wrapper around a map of feature name to value. The entire set | 22 // just a thin wrapper around a map of feature name to value. The entire set |
23 // of features for a URL is extracted before we do any scoring. | 23 // of features for a URL is extracted before we do any scoring. |
24 | 24 |
25 #ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 25 #ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
26 #define CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 26 #define CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
27 | 27 |
28 #include <string> | 28 #include <string> |
29 #include "base/basictypes.h" | 29 #include "base/basictypes.h" |
30 #include "base/hash_tables.h" | 30 #include "base/containers/hash_tables.h" |
31 | 31 |
32 namespace safe_browsing { | 32 namespace safe_browsing { |
33 | 33 |
34 // Container for a map of features to values, which enforces behavior | 34 // Container for a map of features to values, which enforces behavior |
35 // such as a maximum number of features in the map. | 35 // such as a maximum number of features in the map. |
36 class FeatureMap { | 36 class FeatureMap { |
37 public: | 37 public: |
38 FeatureMap(); | 38 FeatureMap(); |
39 ~FeatureMap(); | 39 ~FeatureMap(); |
40 | 40 |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
168 // Token feature for a term (whitespace-delimited) on a page. Terms can be | 168 // Token feature for a term (whitespace-delimited) on a page. Terms can be |
169 // single words or multi-word n-grams. Rather than adding this feature for | 169 // single words or multi-word n-grams. Rather than adding this feature for |
170 // every possible token on a page, only the terms that are mentioned in the | 170 // every possible token on a page, only the terms that are mentioned in the |
171 // classification model are added. | 171 // classification model are added. |
172 extern const char kPageTerm[]; | 172 extern const char kPageTerm[]; |
173 | 173 |
174 } // namespace features | 174 } // namespace features |
175 } // namepsace safe_browsing | 175 } // namepsace safe_browsing |
176 | 176 |
177 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 177 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
OLD | NEW |