OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 } | 51 } |
52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " | 52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " |
53 "host is only a TLD: " << host; | 53 "host is only a TLD: " << host; |
54 size_t tld_start = host.size() - registry_length; | 54 size_t tld_start = host.size() - registry_length; |
55 if (!features->AddBooleanFeature(features::kUrlTldToken + | 55 if (!features->AddBooleanFeature(features::kUrlTldToken + |
56 host.substr(tld_start))) | 56 host.substr(tld_start))) |
57 return false; | 57 return false; |
58 | 58 |
59 // Pull off the TLD and the preceeding dot. | 59 // Pull off the TLD and the preceeding dot. |
60 host.erase(tld_start - 1); | 60 host.erase(tld_start - 1); |
61 std::vector<std::string> host_tokens; | 61 std::vector<std::string> host_tokens = base::SplitString( |
62 base::SplitStringDontTrim(host, '.', &host_tokens); | 62 host, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
63 // Get rid of any empty components. | |
64 std::vector<std::string>::iterator new_end = | |
65 std::remove(host_tokens.begin(), host_tokens.end(), ""); | |
66 host_tokens.erase(new_end, host_tokens.end()); | |
67 if (host_tokens.empty()) { | 63 if (host_tokens.empty()) { |
68 DVLOG(1) << "Could not find domain for host: " << host; | 64 DVLOG(1) << "Could not find domain for host: " << host; |
69 return false; | 65 return false; |
70 } | 66 } |
71 if (!features->AddBooleanFeature(features::kUrlDomainToken + | 67 if (!features->AddBooleanFeature(features::kUrlDomainToken + |
72 host_tokens.back())) | 68 host_tokens.back())) |
73 return false; | 69 return false; |
74 host_tokens.pop_back(); | 70 host_tokens.pop_back(); |
75 | 71 |
76 // Now we're just left with the "other" host tokens. | 72 // Now we're just left with the "other" host tokens. |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
113 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, | 109 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, |
114 base::SPLIT_WANT_NONEMPTY)) { | 110 base::SPLIT_WANT_NONEMPTY)) { |
115 // Copy over only the splits that are 3 or more chars long. | 111 // Copy over only the splits that are 3 or more chars long. |
116 // TODO(bryner): Determine a meaningful min size. | 112 // TODO(bryner): Determine a meaningful min size. |
117 if (token.length() >= kMinPathComponentLength) | 113 if (token.length() >= kMinPathComponentLength) |
118 tokens->push_back(token.as_string()); | 114 tokens->push_back(token.as_string()); |
119 } | 115 } |
120 } | 116 } |
121 | 117 |
122 } // namespace safe_browsing | 118 } // namespace safe_browsing |
OLD | NEW |