| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
| 6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
| 7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
| 8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
| 9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
| 10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
| 11 // | 11 // |
| 12 // Some features are boolean features. If these features are set, they always | 12 // Some features are boolean features. If these features are set, they always |
| 13 // have a value of 0.0 or 1.0. In practice, the features are only set if the | 13 // have a value of 0.0 or 1.0. In practice, the features are only set if the |
| 14 // value is true (1.0). | 14 // value is true (1.0). |
| 15 // | 15 // |
| 16 // We also use token features. These features have a unique name that is | 16 // We also use token features. These features have a unique name that is |
| 17 // constructed from the URL or page contents that we are classifying, for | 17 // constructed from the URL or page contents that we are classifying, for |
| 18 // example, "UrlDomain=chromium". These features are also always set to 1.0 | 18 // example, "UrlDomain=chromium". These features are also always set to 1.0 |
| 19 // if they are present. | 19 // if they are present. |
| 20 // | 20 // |
| 21 // The intermediate storage of the features for a URL is a FeatureMap, which is | 21 // The intermediate storage of the features for a URL is a FeatureMap, which is |
| 22 // just a thin wrapper around a map of feature name to value. The entire set | 22 // just a thin wrapper around a map of feature name to value. The entire set |
| 23 // of features for a URL is extracted before we do any scoring. | 23 // of features for a URL is extracted before we do any scoring. |
| 24 | 24 |
| 25 #ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 25 #ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
| 26 #define CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 26 #define CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
| 27 | 27 |
| 28 #include <string> | 28 #include <string> |
| 29 #include "base/basictypes.h" | 29 #include "base/basictypes.h" |
| 30 #include "base/hash_tables.h" | 30 #include "base/containers/hash_tables.h" |
| 31 | 31 |
| 32 namespace safe_browsing { | 32 namespace safe_browsing { |
| 33 | 33 |
| 34 // Container for a map of features to values, which enforces behavior | 34 // Container for a map of features to values, which enforces behavior |
| 35 // such as a maximum number of features in the map. | 35 // such as a maximum number of features in the map. |
| 36 class FeatureMap { | 36 class FeatureMap { |
| 37 public: | 37 public: |
| 38 FeatureMap(); | 38 FeatureMap(); |
| 39 ~FeatureMap(); | 39 ~FeatureMap(); |
| 40 | 40 |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 168 // Token feature for a term (whitespace-delimited) on a page. Terms can be | 168 // Token feature for a term (whitespace-delimited) on a page. Terms can be |
| 169 // single words or multi-word n-grams. Rather than adding this feature for | 169 // single words or multi-word n-grams. Rather than adding this feature for |
| 170 // every possible token on a page, only the terms that are mentioned in the | 170 // every possible token on a page, only the terms that are mentioned in the |
| 171 // classification model are added. | 171 // classification model are added. |
| 172 extern const char kPageTerm[]; | 172 extern const char kPageTerm[]; |
| 173 | 173 |
| 174 } // namespace features | 174 } // namespace features |
| 175 } // namepsace safe_browsing | 175 } // namepsace safe_browsing |
| 176 | 176 |
| 177 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 177 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
| OLD | NEW |