OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 12 matching lines...) Expand all Loading... |
23 | 23 |
24 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} | 24 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} |
25 | 25 |
26 bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, | 26 bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, |
27 FeatureMap* features) { | 27 FeatureMap* features) { |
28 base::ElapsedTimer timer; | 28 base::ElapsedTimer timer; |
29 if (url.HostIsIPAddress()) { | 29 if (url.HostIsIPAddress()) { |
30 if (!features->AddBooleanFeature(features::kUrlHostIsIpAddress)) | 30 if (!features->AddBooleanFeature(features::kUrlHostIsIpAddress)) |
31 return false; | 31 return false; |
32 } else { | 32 } else { |
| 33 // Remove any leading/trailing dots. |
33 std::string host; | 34 std::string host; |
34 TrimString(url.host(), ".", &host); // Remove any leading/trailing dots. | 35 base::TrimString(url.host(), ".", &host); |
35 | 36 |
36 // TODO(bryner): Ensure that the url encoding is consistent with | 37 // TODO(bryner): Ensure that the url encoding is consistent with |
37 // the features in the model. | 38 // the features in the model. |
38 | 39 |
39 // Disallow unknown registries so that we don't classify | 40 // Disallow unknown registries so that we don't classify |
40 // partial hostnames (e.g. "www.subdomain"). | 41 // partial hostnames (e.g. "www.subdomain"). |
41 size_t registry_length = | 42 size_t registry_length = |
42 net::registry_controlled_domains::GetRegistryLength( | 43 net::registry_controlled_domains::GetRegistryLength( |
43 host, | 44 host, |
44 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 45 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
115 // Copy over only the splits that are 3 or more chars long. | 116 // Copy over only the splits that are 3 or more chars long. |
116 // TODO(bryner): Determine a meaningful min size. | 117 // TODO(bryner): Determine a meaningful min size. |
117 for (std::vector<std::string>::iterator it = raw_splits.begin(); | 118 for (std::vector<std::string>::iterator it = raw_splits.begin(); |
118 it != raw_splits.end(); ++it) { | 119 it != raw_splits.end(); ++it) { |
119 if (it->length() >= kMinPathComponentLength) | 120 if (it->length() >= kMinPathComponentLength) |
120 tokens->push_back(*it); | 121 tokens->push_back(*it); |
121 } | 122 } |
122 } | 123 } |
123 | 124 |
124 } // namespace safe_browsing | 125 } // namespace safe_browsing |
OLD | NEW |