| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <string> | 8 #include <string> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 51 } | 51 } |
| 52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " | 52 DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " |
| 53 "host is only a TLD: " << host; | 53 "host is only a TLD: " << host; |
| 54 size_t tld_start = host.size() - registry_length; | 54 size_t tld_start = host.size() - registry_length; |
| 55 if (!features->AddBooleanFeature(features::kUrlTldToken + | 55 if (!features->AddBooleanFeature(features::kUrlTldToken + |
| 56 host.substr(tld_start))) | 56 host.substr(tld_start))) |
| 57 return false; | 57 return false; |
| 58 | 58 |
| 59 // Pull off the TLD and the preceeding dot. | 59 // Pull off the TLD and the preceeding dot. |
| 60 host.erase(tld_start - 1); | 60 host.erase(tld_start - 1); |
| 61 std::vector<std::string> host_tokens; | 61 std::vector<std::string> host_tokens = base::SplitString( |
| 62 base::SplitStringDontTrim(host, '.', &host_tokens); | 62 host, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| 63 // Get rid of any empty components. | |
| 64 std::vector<std::string>::iterator new_end = | |
| 65 std::remove(host_tokens.begin(), host_tokens.end(), ""); | |
| 66 host_tokens.erase(new_end, host_tokens.end()); | |
| 67 if (host_tokens.empty()) { | 63 if (host_tokens.empty()) { |
| 68 DVLOG(1) << "Could not find domain for host: " << host; | 64 DVLOG(1) << "Could not find domain for host: " << host; |
| 69 return false; | 65 return false; |
| 70 } | 66 } |
| 71 if (!features->AddBooleanFeature(features::kUrlDomainToken + | 67 if (!features->AddBooleanFeature(features::kUrlDomainToken + |
| 72 host_tokens.back())) | 68 host_tokens.back())) |
| 73 return false; | 69 return false; |
| 74 host_tokens.pop_back(); | 70 host_tokens.pop_back(); |
| 75 | 71 |
| 76 // Now we're just left with the "other" host tokens. | 72 // Now we're just left with the "other" host tokens. |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 113 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, | 109 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, |
| 114 base::SPLIT_WANT_NONEMPTY)) { | 110 base::SPLIT_WANT_NONEMPTY)) { |
| 115 // Copy over only the splits that are 3 or more chars long. | 111 // Copy over only the splits that are 3 or more chars long. |
| 116 // TODO(bryner): Determine a meaningful min size. | 112 // TODO(bryner): Determine a meaningful min size. |
| 117 if (token.length() >= kMinPathComponentLength) | 113 if (token.length() >= kMinPathComponentLength) |
| 118 tokens->push_back(token.as_string()); | 114 tokens->push_back(token.as_string()); |
| 119 } | 115 } |
| 120 } | 116 } |
| 121 | 117 |
| 122 } // namespace safe_browsing | 118 } // namespace safe_browsing |
| OLD | NEW |