| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <string> | 8 #include <string> |
| 9 #include <vector> | 9 #include <vector> |
| 10 |
| 10 #include "base/histogram.h" | 11 #include "base/histogram.h" |
| 11 #include "base/logging.h" | 12 #include "base/logging.h" |
| 12 #include "base/perftimer.h" | 13 #include "base/perftimer.h" |
| 14 #include "base/string_split.h" |
| 13 #include "base/string_util.h" | 15 #include "base/string_util.h" |
| 14 #include "chrome/renderer/safe_browsing/features.h" | 16 #include "chrome/renderer/safe_browsing/features.h" |
| 15 #include "googleurl/src/gurl.h" | 17 #include "googleurl/src/gurl.h" |
| 16 #include "net/base/registry_controlled_domain.h" | 18 #include "net/base/registry_controlled_domain.h" |
| 17 | 19 |
| 18 namespace safe_browsing { | 20 namespace safe_browsing { |
| 19 | 21 |
| 20 PhishingUrlFeatureExtractor::PhishingUrlFeatureExtractor() {} | 22 PhishingUrlFeatureExtractor::PhishingUrlFeatureExtractor() {} |
| 21 | 23 |
| 22 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} | 24 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} |
| (...skipping 24 matching lines...) Expand all Loading... |
| 47 << "Non-zero registry length, but host is only a TLD: " << host; | 49 << "Non-zero registry length, but host is only a TLD: " << host; |
| 48 size_t tld_start = host.size() - registry_length; | 50 size_t tld_start = host.size() - registry_length; |
| 49 if (!features->AddBooleanFeature(features::kUrlTldToken + | 51 if (!features->AddBooleanFeature(features::kUrlTldToken + |
| 50 host.substr(tld_start))) { | 52 host.substr(tld_start))) { |
| 51 return false; | 53 return false; |
| 52 } | 54 } |
| 53 | 55 |
| 54 // Pull off the TLD and the preceeding dot. | 56 // Pull off the TLD and the preceeding dot. |
| 55 host.erase(tld_start - 1); | 57 host.erase(tld_start - 1); |
| 56 std::vector<std::string> host_tokens; | 58 std::vector<std::string> host_tokens; |
| 57 SplitStringDontTrim(host, '.', &host_tokens); | 59 base::SplitStringDontTrim(host, '.', &host_tokens); |
| 58 // Get rid of any empty components. | 60 // Get rid of any empty components. |
| 59 std::vector<std::string>::iterator new_end = | 61 std::vector<std::string>::iterator new_end = |
| 60 std::remove(host_tokens.begin(), host_tokens.end(), ""); | 62 std::remove(host_tokens.begin(), host_tokens.end(), ""); |
| 61 host_tokens.erase(new_end, host_tokens.end()); | 63 host_tokens.erase(new_end, host_tokens.end()); |
| 62 if (host_tokens.empty()) { | 64 if (host_tokens.empty()) { |
| 63 LOG(ERROR) << "Could not find domain for host: " << host; | 65 LOG(ERROR) << "Could not find domain for host: " << host; |
| 64 return false; | 66 return false; |
| 65 } | 67 } |
| 66 if (!features->AddBooleanFeature(features::kUrlDomainToken + | 68 if (!features->AddBooleanFeature(features::kUrlDomainToken + |
| 67 host_tokens.back())) { | 69 host_tokens.back())) { |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 118 // TODO(bryner): Determine a meaningful min size. | 120 // TODO(bryner): Determine a meaningful min size. |
| 119 for (std::vector<std::string>::iterator it = raw_splits.begin(); | 121 for (std::vector<std::string>::iterator it = raw_splits.begin(); |
| 120 it != raw_splits.end(); ++it) { | 122 it != raw_splits.end(); ++it) { |
| 121 if (it->length() >= kMinPathComponentLength) { | 123 if (it->length() >= kMinPathComponentLength) { |
| 122 tokens->push_back(*it); | 124 tokens->push_back(*it); |
| 123 } | 125 } |
| 124 } | 126 } |
| 125 } | 127 } |
| 126 | 128 |
| 127 } // namespace safe_browsing | 129 } // namespace safe_browsing |
| OLD | NEW |