OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 21 matching lines...) Expand all Loading... |
32 } else { | 32 } else { |
33 std::string host; | 33 std::string host; |
34 TrimString(url.host(), ".", &host); // Remove any leading/trailing dots. | 34 TrimString(url.host(), ".", &host); // Remove any leading/trailing dots. |
35 | 35 |
36 // TODO(bryner): Ensure that the url encoding is consistent with | 36 // TODO(bryner): Ensure that the url encoding is consistent with |
37 // the features in the model. | 37 // the features in the model. |
38 | 38 |
39 // Disallow unknown registries so that we don't classify | 39 // Disallow unknown registries so that we don't classify |
40 // partial hostnames (e.g. "www.subdomain"). | 40 // partial hostnames (e.g. "www.subdomain"). |
41 size_t registry_length = | 41 size_t registry_length = |
42 net::RegistryControlledDomainService::GetRegistryLength(host, false); | 42 net::RegistryControlledDomainService::GetRegistryLength( |
| 43 host, |
| 44 net::RCDS::EXCLUDE_UNKNOWN_REGISTRIES, |
| 45 net::RCDS::EXCLUDE_PRIVATE_REGISTRIES); |
43 | 46 |
44 if (registry_length == 0 || registry_length == std::string::npos) { | 47 if (registry_length == 0 || registry_length == std::string::npos) { |
45 DVLOG(1) << "Could not find TLD for host: " << host; | 48 DVLOG(1) << "Could not find TLD for host: " << host; |
46 return false; | 49 return false; |
47 } | 50 } |
48 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " | 51 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " |
49 "host is only a TLD: " << host; | 52 "host is only a TLD: " << host; |
50 size_t tld_start = host.size() - registry_length; | 53 size_t tld_start = host.size() - registry_length; |
51 if (!features->AddBooleanFeature(features::kUrlTldToken + | 54 if (!features->AddBooleanFeature(features::kUrlTldToken + |
52 host.substr(tld_start))) | 55 host.substr(tld_start))) |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
112 // Copy over only the splits that are 3 or more chars long. | 115 // Copy over only the splits that are 3 or more chars long. |
113 // TODO(bryner): Determine a meaningful min size. | 116 // TODO(bryner): Determine a meaningful min size. |
114 for (std::vector<std::string>::iterator it = raw_splits.begin(); | 117 for (std::vector<std::string>::iterator it = raw_splits.begin(); |
115 it != raw_splits.end(); ++it) { | 118 it != raw_splits.end(); ++it) { |
116 if (it->length() >= kMinPathComponentLength) | 119 if (it->length() >= kMinPathComponentLength) |
117 tokens->push_back(*it); | 120 tokens->push_back(*it); |
118 } | 121 } |
119 } | 122 } |
120 | 123 |
121 } // namespace safe_browsing | 124 } // namespace safe_browsing |
OLD | NEW |