Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(736)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc

Issue 2451353002: Reduce buggy usage of the registry controlled domain service. (Closed)
Patch Set: Fix Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
(...skipping 22 matching lines...) Expand all
33 // Remove any leading/trailing dots. 33 // Remove any leading/trailing dots.
34 std::string host; 34 std::string host;
35 base::TrimString(url.host(), ".", &host); 35 base::TrimString(url.host(), ".", &host);
36 36
37 // TODO(bryner): Ensure that the url encoding is consistent with 37 // TODO(bryner): Ensure that the url encoding is consistent with
38 // the features in the model. 38 // the features in the model.
39 39
40 // Disallow unknown registries so that we don't classify 40 // Disallow unknown registries so that we don't classify
41 // partial hostnames (e.g. "www.subdomain"). 41 // partial hostnames (e.g. "www.subdomain").
42 size_t registry_length = 42 size_t registry_length =
43 net::registry_controlled_domains::GetRegistryLength( 43 net::registry_controlled_domains::GetCanonicalHostRegistryLength(
44 host, 44 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
45 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
46 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 45 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
47 46
48 if (registry_length == 0 || registry_length == std::string::npos) { 47 if (registry_length == 0 || registry_length == std::string::npos) {
49 DVLOG(1) << "Could not find TLD for host: " << host; 48 DVLOG(1) << "Could not find TLD for host: " << host;
50 return false; 49 return false;
51 } 50 }
52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " 51 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but "
53 "host is only a TLD: " << host; 52 "host is only a TLD: " << host;
54 size_t tld_start = host.size() - registry_length; 53 size_t tld_start = host.size() - registry_length;
55 if (!features->AddBooleanFeature(features::kUrlTldToken + 54 if (!features->AddBooleanFeature(features::kUrlTldToken +
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, 108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE,
110 base::SPLIT_WANT_NONEMPTY)) { 109 base::SPLIT_WANT_NONEMPTY)) {
111 // Copy over only the splits that are 3 or more chars long. 110 // Copy over only the splits that are 3 or more chars long.
112 // TODO(bryner): Determine a meaningful min size. 111 // TODO(bryner): Determine a meaningful min size.
113 if (token.length() >= kMinPathComponentLength) 112 if (token.length() >= kMinPathComponentLength)
114 tokens->push_back(token.as_string()); 113 tokens->push_back(token.as_string());
115 } 114 }
116 } 115 }
117 116
118 } // namespace safe_browsing 117 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/browser/supervised_user/supervised_user_url_filter.cc ('k') | components/google/core/browser/google_util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698