Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc

Issue 13979002: Add support for split PSL list distinctions. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed chrome_frame compilation issue Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
(...skipping 21 matching lines...) Expand all
32 } else { 32 } else {
33 std::string host; 33 std::string host;
34 TrimString(url.host(), ".", &host); // Remove any leading/trailing dots. 34 TrimString(url.host(), ".", &host); // Remove any leading/trailing dots.
35 35
36 // TODO(bryner): Ensure that the url encoding is consistent with 36 // TODO(bryner): Ensure that the url encoding is consistent with
37 // the features in the model. 37 // the features in the model.
38 38
39 // Disallow unknown registries so that we don't classify 39 // Disallow unknown registries so that we don't classify
40 // partial hostnames (e.g. "www.subdomain"). 40 // partial hostnames (e.g. "www.subdomain").
41 size_t registry_length = 41 size_t registry_length =
42 net::RegistryControlledDomainService::GetRegistryLength(host, false); 42 net::RegistryControlledDomainService::GetRegistryLength(
43 host,
44 net::RCDS::EXCLUDE_UNKNOWN_REGISTRIES,
45 net::RCDS::EXCLUDE_PRIVATE_REGISTRIES);
43 46
44 if (registry_length == 0 || registry_length == std::string::npos) { 47 if (registry_length == 0 || registry_length == std::string::npos) {
45 DVLOG(1) << "Could not find TLD for host: " << host; 48 DVLOG(1) << "Could not find TLD for host: " << host;
46 return false; 49 return false;
47 } 50 }
48 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " 51 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but "
49 "host is only a TLD: " << host; 52 "host is only a TLD: " << host;
50 size_t tld_start = host.size() - registry_length; 53 size_t tld_start = host.size() - registry_length;
51 if (!features->AddBooleanFeature(features::kUrlTldToken + 54 if (!features->AddBooleanFeature(features::kUrlTldToken +
52 host.substr(tld_start))) 55 host.substr(tld_start)))
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
112 // Copy over only the splits that are 3 or more chars long. 115 // Copy over only the splits that are 3 or more chars long.
113 // TODO(bryner): Determine a meaningful min size. 116 // TODO(bryner): Determine a meaningful min size.
114 for (std::vector<std::string>::iterator it = raw_splits.begin(); 117 for (std::vector<std::string>::iterator it = raw_splits.begin();
115 it != raw_splits.end(); ++it) { 118 it != raw_splits.end(); ++it) {
116 if (it->length() >= kMinPathComponentLength) 119 if (it->length() >= kMinPathComponentLength)
117 tokens->push_back(*it); 120 tokens->push_back(*it);
118 } 121 }
119 } 122 }
120 123
121 } // namespace safe_browsing 124 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698