OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... |
33 // Remove any leading/trailing dots. | 33 // Remove any leading/trailing dots. |
34 std::string host; | 34 std::string host; |
35 base::TrimString(url.host(), ".", &host); | 35 base::TrimString(url.host(), ".", &host); |
36 | 36 |
37 // TODO(bryner): Ensure that the url encoding is consistent with | 37 // TODO(bryner): Ensure that the url encoding is consistent with |
38 // the features in the model. | 38 // the features in the model. |
39 | 39 |
40 // Disallow unknown registries so that we don't classify | 40 // Disallow unknown registries so that we don't classify |
41 // partial hostnames (e.g. "www.subdomain"). | 41 // partial hostnames (e.g. "www.subdomain"). |
42 size_t registry_length = | 42 size_t registry_length = |
43 net::registry_controlled_domains::GetCanonicalHostRegistryLength( | 43 net::registry_controlled_domains::GetRegistryLength( |
44 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 44 host, |
| 45 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
45 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 46 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
46 | 47 |
47 if (registry_length == 0 || registry_length == std::string::npos) { | 48 if (registry_length == 0 || registry_length == std::string::npos) { |
48 DVLOG(1) << "Could not find TLD for host: " << host; | 49 DVLOG(1) << "Could not find TLD for host: " << host; |
49 return false; | 50 return false; |
50 } | 51 } |
51 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " | 52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " |
52 "host is only a TLD: " << host; | 53 "host is only a TLD: " << host; |
53 size_t tld_start = host.size() - registry_length; | 54 size_t tld_start = host.size() - registry_length; |
54 if (!features->AddBooleanFeature(features::kUrlTldToken + | 55 if (!features->AddBooleanFeature(features::kUrlTldToken + |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, | 109 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, |
109 base::SPLIT_WANT_NONEMPTY)) { | 110 base::SPLIT_WANT_NONEMPTY)) { |
110 // Copy over only the splits that are 3 or more chars long. | 111 // Copy over only the splits that are 3 or more chars long. |
111 // TODO(bryner): Determine a meaningful min size. | 112 // TODO(bryner): Determine a meaningful min size. |
112 if (token.length() >= kMinPathComponentLength) | 113 if (token.length() >= kMinPathComponentLength) |
113 tokens->push_back(token.as_string()); | 114 tokens->push_back(token.as_string()); |
114 } | 115 } |
115 } | 116 } |
116 | 117 |
117 } // namespace safe_browsing | 118 } // namespace safe_browsing |
OLD | NEW |