OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
| 10 |
10 #include "base/histogram.h" | 11 #include "base/histogram.h" |
11 #include "base/logging.h" | 12 #include "base/logging.h" |
12 #include "base/perftimer.h" | 13 #include "base/perftimer.h" |
| 14 #include "base/string_split.h" |
13 #include "base/string_util.h" | 15 #include "base/string_util.h" |
14 #include "chrome/renderer/safe_browsing/features.h" | 16 #include "chrome/renderer/safe_browsing/features.h" |
15 #include "googleurl/src/gurl.h" | 17 #include "googleurl/src/gurl.h" |
16 #include "net/base/registry_controlled_domain.h" | 18 #include "net/base/registry_controlled_domain.h" |
17 | 19 |
18 namespace safe_browsing { | 20 namespace safe_browsing { |
19 | 21 |
20 PhishingUrlFeatureExtractor::PhishingUrlFeatureExtractor() {} | 22 PhishingUrlFeatureExtractor::PhishingUrlFeatureExtractor() {} |
21 | 23 |
22 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} | 24 PhishingUrlFeatureExtractor::~PhishingUrlFeatureExtractor() {} |
(...skipping 24 matching lines...) Expand all Loading... |
47 << "Non-zero registry length, but host is only a TLD: " << host; | 49 << "Non-zero registry length, but host is only a TLD: " << host; |
48 size_t tld_start = host.size() - registry_length; | 50 size_t tld_start = host.size() - registry_length; |
49 if (!features->AddBooleanFeature(features::kUrlTldToken + | 51 if (!features->AddBooleanFeature(features::kUrlTldToken + |
50 host.substr(tld_start))) { | 52 host.substr(tld_start))) { |
51 return false; | 53 return false; |
52 } | 54 } |
53 | 55 |
54 // Pull off the TLD and the preceeding dot. | 56 // Pull off the TLD and the preceeding dot. |
55 host.erase(tld_start - 1); | 57 host.erase(tld_start - 1); |
56 std::vector<std::string> host_tokens; | 58 std::vector<std::string> host_tokens; |
57 SplitStringDontTrim(host, '.', &host_tokens); | 59 base::SplitStringDontTrim(host, '.', &host_tokens); |
58 // Get rid of any empty components. | 60 // Get rid of any empty components. |
59 std::vector<std::string>::iterator new_end = | 61 std::vector<std::string>::iterator new_end = |
60 std::remove(host_tokens.begin(), host_tokens.end(), ""); | 62 std::remove(host_tokens.begin(), host_tokens.end(), ""); |
61 host_tokens.erase(new_end, host_tokens.end()); | 63 host_tokens.erase(new_end, host_tokens.end()); |
62 if (host_tokens.empty()) { | 64 if (host_tokens.empty()) { |
63 LOG(ERROR) << "Could not find domain for host: " << host; | 65 LOG(ERROR) << "Could not find domain for host: " << host; |
64 return false; | 66 return false; |
65 } | 67 } |
66 if (!features->AddBooleanFeature(features::kUrlDomainToken + | 68 if (!features->AddBooleanFeature(features::kUrlDomainToken + |
67 host_tokens.back())) { | 69 host_tokens.back())) { |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
118 // TODO(bryner): Determine a meaningful min size. | 120 // TODO(bryner): Determine a meaningful min size. |
119 for (std::vector<std::string>::iterator it = raw_splits.begin(); | 121 for (std::vector<std::string>::iterator it = raw_splits.begin(); |
120 it != raw_splits.end(); ++it) { | 122 it != raw_splits.end(); ++it) { |
121 if (it->length() >= kMinPathComponentLength) { | 123 if (it->length() >= kMinPathComponentLength) { |
122 tokens->push_back(*it); | 124 tokens->push_back(*it); |
123 } | 125 } |
124 } | 126 } |
125 } | 127 } |
126 | 128 |
127 } // namespace safe_browsing | 129 } // namespace safe_browsing |
OLD | NEW |