| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // Common types and constants for extracting and evaluating features in the | 5 // Common types and constants for extracting and evaluating features in the |
| 6 // client-side phishing detection model. A feature is simply a string and an | 6 // client-side phishing detection model. A feature is simply a string and an |
| 7 // associated floating-point value between 0 and 1. The phishing | 7 // associated floating-point value between 0 and 1. The phishing |
| 8 // classification model contains rules which give an appropriate weight to each | 8 // classification model contains rules which give an appropriate weight to each |
| 9 // feature or combination of features. These values can then be summed to | 9 // feature or combination of features. These values can then be summed to |
| 10 // compute a final phishiness score. | 10 // compute a final phishiness score. |
| (...skipping 26 matching lines...) Expand all Loading... |
| 37 class FeatureMap { | 37 class FeatureMap { |
| 38 public: | 38 public: |
| 39 FeatureMap(); | 39 FeatureMap(); |
| 40 ~FeatureMap(); | 40 ~FeatureMap(); |
| 41 | 41 |
| 42 // Adds a boolean feature to a FeatureMap with a value of 1.0. | 42 // Adds a boolean feature to a FeatureMap with a value of 1.0. |
| 43 // Returns true on success, or false if the feature map exceeds | 43 // Returns true on success, or false if the feature map exceeds |
| 44 // kMaxFeatureMapSize. | 44 // kMaxFeatureMapSize. |
| 45 bool AddBooleanFeature(const std::string& name); | 45 bool AddBooleanFeature(const std::string& name); |
| 46 | 46 |
| 47 // Adds a real-valued feature to a FeatureMap with the given value. |
| 48 // Values must always be in the range [0.0, 1.0]. Returns true on |
| 49 // success, or false if the feature map exceeds kMaxFeatureMapSize |
| 50 // or the value is outside of the allowed range. |
| 51 bool AddRealFeature(const std::string& name, double value); |
| 52 |
| 47 // Provides read-only access to the current set of features. | 53 // Provides read-only access to the current set of features. |
| 48 const base::hash_map<std::string, double>& features() const { | 54 const base::hash_map<std::string, double>& features() const { |
| 49 return features_; | 55 return features_; |
| 50 } | 56 } |
| 51 | 57 |
| 52 // Clears the set of features in the map. | 58 // Clears the set of features in the map. |
| 53 void Clear(); | 59 void Clear(); |
| 54 | 60 |
| 55 // This is an upper bound on the number of features that will be extracted. | 61 // This is an upper bound on the number of features that will be extracted. |
| 56 // We should never hit this cap; it is intended as a sanity check to prevent | 62 // We should never hit this cap; it is intended as a sanity check to prevent |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 | 102 |
| 97 //////////////////////////////////////////////////// | 103 //////////////////////////////////////////////////// |
| 98 // URL path token features | 104 // URL path token features |
| 99 //////////////////////////////////////////////////// | 105 //////////////////////////////////////////////////// |
| 100 | 106 |
| 101 // Token feature containing each alphanumeric string in the path that is at | 107 // Token feature containing each alphanumeric string in the path that is at |
| 102 // least 3 characters long. For example, "/abc/d/efg" would have 2 path | 108 // least 3 characters long. For example, "/abc/d/efg" would have 2 path |
| 103 // token features, "abc" and "efg". Query parameters are not included. | 109 // token features, "abc" and "efg". Query parameters are not included. |
| 104 extern const char kUrlPathToken[]; | 110 extern const char kUrlPathToken[]; |
| 105 | 111 |
| 112 //////////////////////////////////////////////////// |
| 113 // DOM HTML form features |
| 114 //////////////////////////////////////////////////// |
| 115 |
| 116 // Set if the page has any <form> elements. |
| 117 extern const char kPageHasForms[]; |
| 118 // The fraction of form elements whose |action| attribute points to a |
| 119 // URL on a different domain from the document URL. |
| 120 extern const char kPageActionOtherDomainFreq[]; |
| 121 |
| 122 // Set if the page has any <input type="text"> elements |
| 123 // (includes inputs with missing or unknown types). |
| 124 extern const char kPageHasTextInputs[]; |
| 125 // Set if the page has any <input type="password"> elements. |
| 126 extern const char kPageHasPswdInputs[]; |
| 127 // Set if the page has any <input type="radio"> elements. |
| 128 extern const char kPageHasRadioInputs[]; |
| 129 // Set if the page has any <input type="checkbox"> elements. |
| 130 extern const char kPageHasCheckInputs[]; |
| 131 |
| 132 //////////////////////////////////////////////////// |
| 133 // DOM HTML link features |
| 134 //////////////////////////////////////////////////// |
| 135 |
| 136 // The fraction of links in the page which point to a domain other than the |
| 137 // domain of the document. See "URL host features" above for a discussion |
| 138 // of how the doamin is computed. |
| 139 extern const char kPageExternalLinksFreq[]; |
| 140 // Token feature containing each external domain that is linked to. |
| 141 extern const char kPageLinkDomain[]; |
| 142 // Fraction of links in the page that use https. |
| 143 extern const char kPageSecureLinksFreq[]; |
| 144 |
| 145 //////////////////////////////////////////////////// |
| 146 // DOM HTML script features |
| 147 //////////////////////////////////////////////////// |
| 148 |
| 149 // Set if the number of <script> elements in the page is greater than 1. |
| 150 extern const char kPageNumScriptTagsGTOne[]; |
| 151 // Set if the number of <script> elements in the page is greater than 6. |
| 152 extern const char kPageNumScriptTagsGTSix[]; |
| 153 |
| 154 //////////////////////////////////////////////////// |
| 155 // Other DOM HTML features |
| 156 //////////////////////////////////////////////////// |
| 157 |
| 158 // The fraction of images whose src attribute points to an external domain. |
| 159 extern const char kPageImgOtherDomainFreq[]; |
| 160 |
| 106 } // namespace features | 161 } // namespace features |
| 107 } // namepsace safe_browsing | 162 } // namepsace safe_browsing |
| 108 | 163 |
| 109 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ | 164 #endif // CHROME_RENDERER_SAFE_BROWSING_FEATURES_H_ |
| OLD | NEW |