| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <string> | 8 #include <string> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 80 return false; | 80 return false; |
| 81 if (host_tokens.size() > 3) { | 81 if (host_tokens.size() > 3) { |
| 82 if (!features->AddBooleanFeature( | 82 if (!features->AddBooleanFeature( |
| 83 features::kUrlNumOtherHostTokensGTThree)) | 83 features::kUrlNumOtherHostTokensGTThree)) |
| 84 return false; | 84 return false; |
| 85 } | 85 } |
| 86 } | 86 } |
| 87 } | 87 } |
| 88 | 88 |
| 89 std::vector<std::string> long_tokens; | 89 std::vector<std::string> long_tokens; |
| 90 SplitStringIntoLongAlphanumTokens(url.path(), &long_tokens); | 90 SplitStringIntoLongAlphanumTokens(url.path().as_string(), &long_tokens); |
| 91 for (const std::string& token : long_tokens) { | 91 for (const std::string& token : long_tokens) { |
| 92 if (!features->AddBooleanFeature(features::kUrlPathToken + token)) | 92 if (!features->AddBooleanFeature(features::kUrlPathToken + token)) |
| 93 return false; | 93 return false; |
| 94 } | 94 } |
| 95 | 95 |
| 96 UMA_HISTOGRAM_TIMES("SBClientPhishing.URLFeatureTime", timer.Elapsed()); | 96 UMA_HISTOGRAM_TIMES("SBClientPhishing.URLFeatureTime", timer.Elapsed()); |
| 97 return true; | 97 return true; |
| 98 } | 98 } |
| 99 | 99 |
| 100 // static | 100 // static |
| 101 void PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens( | 101 void PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens( |
| 102 const std::string& full, | 102 const std::string& full, |
| 103 std::vector<std::string>* tokens) { | 103 std::vector<std::string>* tokens) { |
| 104 // Split on common non-alphanumerics. | 104 // Split on common non-alphanumerics. |
| 105 // TODO(bryner): Split on all(?) non-alphanumerics and handle %XX properly. | 105 // TODO(bryner): Split on all(?) non-alphanumerics and handle %XX properly. |
| 106 static const char kTokenSeparators[] = ".,\\/_-|=%:!&"; | 106 static const char kTokenSeparators[] = ".,\\/_-|=%:!&"; |
| 107 for (const base::StringPiece& token : | 107 for (const base::StringPiece& token : |
| 108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, | 108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, |
| 109 base::SPLIT_WANT_NONEMPTY)) { | 109 base::SPLIT_WANT_NONEMPTY)) { |
| 110 // Copy over only the splits that are 3 or more chars long. | 110 // Copy over only the splits that are 3 or more chars long. |
| 111 // TODO(bryner): Determine a meaningful min size. | 111 // TODO(bryner): Determine a meaningful min size. |
| 112 if (token.length() >= kMinPathComponentLength) | 112 if (token.length() >= kMinPathComponentLength) |
| 113 tokens->push_back(token.as_string()); | 113 tokens->push_back(token.as_string()); |
| 114 } | 114 } |
| 115 } | 115 } |
| 116 | 116 |
| 117 } // namespace safe_browsing | 117 } // namespace safe_browsing |
| OLD | NEW |