OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... |
33 // Remove any leading/trailing dots. | 33 // Remove any leading/trailing dots. |
34 std::string host; | 34 std::string host; |
35 base::TrimString(url.host(), ".", &host); | 35 base::TrimString(url.host(), ".", &host); |
36 | 36 |
37 // TODO(bryner): Ensure that the url encoding is consistent with | 37 // TODO(bryner): Ensure that the url encoding is consistent with |
38 // the features in the model. | 38 // the features in the model. |
39 | 39 |
40 // Disallow unknown registries so that we don't classify | 40 // Disallow unknown registries so that we don't classify |
41 // partial hostnames (e.g. "www.subdomain"). | 41 // partial hostnames (e.g. "www.subdomain"). |
42 size_t registry_length = | 42 size_t registry_length = |
43 net::registry_controlled_domains::GetRegistryLength( | 43 net::registry_controlled_domains::GetCanonicalHostRegistryLength( |
44 host, | 44 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
45 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | |
46 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 45 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
47 | 46 |
48 if (registry_length == 0 || registry_length == std::string::npos) { | 47 if (registry_length == 0 || registry_length == std::string::npos) { |
49 DVLOG(1) << "Could not find TLD for host: " << host; | 48 DVLOG(1) << "Could not find TLD for host: " << host; |
50 return false; | 49 return false; |
51 } | 50 } |
52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " | 51 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " |
53 "host is only a TLD: " << host; | 52 "host is only a TLD: " << host; |
54 size_t tld_start = host.size() - registry_length; | 53 size_t tld_start = host.size() - registry_length; |
55 if (!features->AddBooleanFeature(features::kUrlTldToken + | 54 if (!features->AddBooleanFeature(features::kUrlTldToken + |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
109 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, | 108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, |
110 base::SPLIT_WANT_NONEMPTY)) { | 109 base::SPLIT_WANT_NONEMPTY)) { |
111 // Copy over only the splits that are 3 or more chars long. | 110 // Copy over only the splits that are 3 or more chars long. |
112 // TODO(bryner): Determine a meaningful min size. | 111 // TODO(bryner): Determine a meaningful min size. |
113 if (token.length() >= kMinPathComponentLength) | 112 if (token.length() >= kMinPathComponentLength) |
114 tokens->push_back(token.as_string()); | 113 tokens->push_back(token.as_string()); |
115 } | 114 } |
116 } | 115 } |
117 | 116 |
118 } // namespace safe_browsing | 117 } // namespace safe_browsing |
OLD | NEW |