OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
(...skipping 26 matching lines...) Expand all Loading... |
37 class FeatureMap { | 37 class FeatureMap { |
38 public: | 38 public: |
39 FeatureMap(); | 39 FeatureMap(); |
40 ~FeatureMap(); | 40 ~FeatureMap(); |
41 | 41 |
42 // Adds a boolean feature to a FeatureMap with a value of 1.0. | 42 // Adds a boolean feature to a FeatureMap with a value of 1.0. |
43 // Returns true on success, or false if the feature map exceeds | 43 // Returns true on success, or false if the feature map exceeds |
44 // kMaxFeatureMapSize. | 44 // kMaxFeatureMapSize. |
45 bool AddBooleanFeature(const std::string& name); | 45 bool AddBooleanFeature(const std::string& name); |
46 | 46 |
| 47 // Adds a real-valued feature to a FeatureMap with the given value. |
| 48 // Values must always be in the range [0.0, 1.0]. Returns true on |
| 49 // success, or false if the feature map exceeds kMaxFeatureMapSize |
| 50 // or the value is outside of the allowed range. |
| 51 bool AddRealFeature(const std::string& name, double value); |
| 52 |
47 // Provides read-only access to the current set of features. | 53 // Provides read-only access to the current set of features. |
48 const base::hash_map<std::string, double>& features() const { | 54 const base::hash_map<std::string, double>& features() const { |
49 return features_; | 55 return features_; |
50 } | 56 } |
51 | 57 |
52 // Clears the set of features in the map. | 58 // Clears the set of features in the map. |
53 void Clear(); | 59 void Clear(); |
54 | 60 |
55 // This is an upper bound on the number of features that will be extracted. | 61 // This is an upper bound on the number of features that will be extracted. |
56 // We should never hit this cap; it is intended as a sanity check to prevent | 62 // We should never hit this cap; it is intended as a sanity check to prevent |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
96 | 102 |
97 //////////////////////////////////////////////////// | 103 //////////////////////////////////////////////////// |
98 // URL path token features | 104 // URL path token features |
99 //////////////////////////////////////////////////// | 105 //////////////////////////////////////////////////// |
100 | 106 |
101 // Token feature containing each alphanumeric string in the path that is at | 107 // Token feature containing each alphanumeric string in the path that is at |
102 // least 3 characters long. For example, "/abc/d/efg" would have 2 path | 108 // least 3 characters long. For example, "/abc/d/efg" would have 2 path |
103 // token features, "abc" and "efg". Query parameters are not included. | 109 // token features, "abc" and "efg". Query parameters are not included. |
104 extern const char kUrlPathToken[]; | 110 extern const char kUrlPathToken[]; |
105 | 111 |
| 112 //////////////////////////////////////////////////// |
| 113 // DOM HTML form features |
| 114 //////////////////////////////////////////////////// |
| 115 |
| 116 // Set if the page has any <form> elements. |
| 117 extern const char kPageHasForms[]; |
| 118 // The fraction of form elements whose |action| attribute points to a |
| 119 // URL on a different domain from the document URL. |
| 120 extern const char kPageActionOtherDomainFreq[]; |
| 121 |
| 122 // Set if the page has any <input type="text"> elements |
| 123 // (includes inputs with missing or unknown types). |
| 124 extern const char kPageHasTextInputs[]; |
| 125 // Set if the page has any <input type="password"> elements. |
| 126 extern const char kPageHasPswdInputs[]; |
| 127 // Set if the page has any <input type="radio"> elements. |
| 128 extern const char kPageHasRadioInputs[]; |
| 129 // Set if the page has any <input type="checkbox"> elements. |
| 130 extern const char kPageHasCheckInputs[]; |
| 131 |
| 132 //////////////////////////////////////////////////// |
| 133 // DOM HTML link features |
| 134 //////////////////////////////////////////////////// |
| 135 |
| 136 // The fraction of links in the page which point to a domain other than the |
| 137 // domain of the document. See "URL host features" above for a discussion |
| 138 // of how the doamin is computed. |
| 139 extern const char kPageExternalLinksFreq[]; |
| 140 // Token feature containing each external domain that is linked to. |
| 141 extern const char kPageLinkDomain[]; |
| 142 // Fraction of links in the page that use https. |
| 143 extern const char kPageSecureLinksFreq[]; |
| 144 |
| 145 //////////////////////////////////////////////////// |
| 146 // DOM HTML script features |
| 147 //////////////////////////////////////////////////// |
| 148 |
| 149 // Set if the number of <script> elements in the page is greater than 1. |
| 150 extern const char kPageNumScriptTagsGTOne[]; |
| 151 // Set if the number of <script> elements in the page is greater than 6. |
| 152 extern const char kPageNumScriptTagsGTSix[]; |
| 153 |
| 154 //////////////////////////////////////////////////// |
| 155 // Other DOM HTML features |
| 156 //////////////////////////////////////////////////// |
| 157 |
| 158 // The fraction of images whose src attribute points to an external domain. |
| 159 extern const char kPageImgOtherDomainFreq[]; |
| 160 |
106 } // namespace features | 161 } // namespace features |
107 } // namepsace safe_browsing | 162 } // namepsace safe_browsing |
108 | 163 |
109 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 164 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
OLD | NEW |