OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 static const size_t kMaxFeatureMapSize; | 64 static const size_t kMaxFeatureMapSize; |
65 | 65 |
66 private: | 66 private: |
67 base::hash_map<std::string, double> features_; | 67 base::hash_map<std::string, double> features_; |
68 | 68 |
69 DISALLOW_COPY_AND_ASSIGN(FeatureMap); | 69 DISALLOW_COPY_AND_ASSIGN(FeatureMap); |
70 }; | 70 }; |
71 | 71 |
72 namespace features { | 72 namespace features { |
73 // Constants for the various feature names that we use. | 73 // Constants for the various feature names that we use. |
| 74 // |
| 75 // IMPORTANT: when adding new features, you must update kAllowedFeatures in |
| 76 // chrome/browser/safe_browsing/client_side_detection_service.cc if the feature |
| 77 // should be sent in sanitized pingbacks. |
74 | 78 |
75 //////////////////////////////////////////////////// | 79 //////////////////////////////////////////////////// |
76 // URL host features | 80 // URL host features |
77 //////////////////////////////////////////////////// | 81 //////////////////////////////////////////////////// |
78 | 82 |
79 // Set if the URL's hostname is an IP address. | 83 // Set if the URL's hostname is an IP address. |
80 extern const char kUrlHostIsIpAddress[]; | 84 extern const char kUrlHostIsIpAddress[]; |
81 // Token feature containing the portion of the hostname controlled by a | 85 // Token feature containing the portion of the hostname controlled by a |
82 // registrar, for example "com" or "co.uk". | 86 // registrar, for example "com" or "co.uk". |
83 extern const char kUrlTldToken[]; | 87 extern const char kUrlTldToken[]; |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
165 // Token feature for a term (whitespace-delimited) on a page. Terms can be | 169 // Token feature for a term (whitespace-delimited) on a page. Terms can be |
166 // single words or multi-word n-grams. Rather than adding this feature for | 170 // single words or multi-word n-grams. Rather than adding this feature for |
167 // every possible token on a page, only the terms that are mentioned in the | 171 // every possible token on a page, only the terms that are mentioned in the |
168 // classification model are added. | 172 // classification model are added. |
169 extern const char kPageTerm[]; | 173 extern const char kPageTerm[]; |
170 | 174 |
171 } // namespace features | 175 } // namespace features |
172 } // namepsace safe_browsing | 176 } // namepsace safe_browsing |
173 | 177 |
174 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 178 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
OLD | NEW |