OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
11 // | 11 // |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 static const size_t kMaxFeatureMapSize; | 64 static const size_t kMaxFeatureMapSize; |
65 | 65 |
66 private: | 66 private: |
67 base::hash_map<std::string, double> features_; | 67 base::hash_map<std::string, double> features_; |
68 | 68 |
69 DISALLOW_COPY_AND_ASSIGN(FeatureMap); | 69 DISALLOW_COPY_AND_ASSIGN(FeatureMap); |
70 }; | 70 }; |
71 | 71 |
72 namespace features { | 72 namespace features { |
73 // Constants for the various feature names that we use. | 73 // Constants for the various feature names that we use. |
74 // | |
75 // IMPORTANT: when adding new features, you must update kAllowedFeatures in | |
76 // chrome/browser/safe_browsing/client_side_detection_service.cc if the feature | |
77 // should be sent in sanitized pingbacks. | |
78 | 74 |
79 //////////////////////////////////////////////////// | 75 //////////////////////////////////////////////////// |
80 // URL host features | 76 // URL host features |
81 //////////////////////////////////////////////////// | 77 //////////////////////////////////////////////////// |
82 | 78 |
83 // Set if the URL's hostname is an IP address. | 79 // Set if the URL's hostname is an IP address. |
84 extern const char kUrlHostIsIpAddress[]; | 80 extern const char kUrlHostIsIpAddress[]; |
85 // Token feature containing the portion of the hostname controlled by a | 81 // Token feature containing the portion of the hostname controlled by a |
86 // registrar, for example "com" or "co.uk". | 82 // registrar, for example "com" or "co.uk". |
87 extern const char kUrlTldToken[]; | 83 extern const char kUrlTldToken[]; |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
169 // Token feature for a term (whitespace-delimited) on a page. Terms can be | 165 // Token feature for a term (whitespace-delimited) on a page. Terms can be |
170 // single words or multi-word n-grams. Rather than adding this feature for | 166 // single words or multi-word n-grams. Rather than adding this feature for |
171 // every possible token on a page, only the terms that are mentioned in the | 167 // every possible token on a page, only the terms that are mentioned in the |
172 // classification model are added. | 168 // classification model are added. |
173 extern const char kPageTerm[]; | 169 extern const char kPageTerm[]; |
174 | 170 |
175 } // namespace features | 171 } // namespace features |
176 } // namepsace safe_browsing | 172 } // namepsace safe_browsing |
177 | 173 |
178 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 174 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
OLD | NEW |