Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(232)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc

Issue 2481923002: [WIP] make GURL::path() return a StringPiece (Closed)
Patch Set: thanks asan Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/renderer/extensions/webstore_bindings.cc ('k') | chrome/renderer/searchbox/searchbox.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
80 return false; 80 return false;
81 if (host_tokens.size() > 3) { 81 if (host_tokens.size() > 3) {
82 if (!features->AddBooleanFeature( 82 if (!features->AddBooleanFeature(
83 features::kUrlNumOtherHostTokensGTThree)) 83 features::kUrlNumOtherHostTokensGTThree))
84 return false; 84 return false;
85 } 85 }
86 } 86 }
87 } 87 }
88 88
89 std::vector<std::string> long_tokens; 89 std::vector<std::string> long_tokens;
90 SplitStringIntoLongAlphanumTokens(url.path(), &long_tokens); 90 SplitStringIntoLongAlphanumTokens(url.path().as_string(), &long_tokens);
91 for (const std::string& token : long_tokens) { 91 for (const std::string& token : long_tokens) {
92 if (!features->AddBooleanFeature(features::kUrlPathToken + token)) 92 if (!features->AddBooleanFeature(features::kUrlPathToken + token))
93 return false; 93 return false;
94 } 94 }
95 95
96 UMA_HISTOGRAM_TIMES("SBClientPhishing.URLFeatureTime", timer.Elapsed()); 96 UMA_HISTOGRAM_TIMES("SBClientPhishing.URLFeatureTime", timer.Elapsed());
97 return true; 97 return true;
98 } 98 }
99 99
100 // static 100 // static
101 void PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens( 101 void PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(
102 const std::string& full, 102 const std::string& full,
103 std::vector<std::string>* tokens) { 103 std::vector<std::string>* tokens) {
104 // Split on common non-alphanumerics. 104 // Split on common non-alphanumerics.
105 // TODO(bryner): Split on all(?) non-alphanumerics and handle %XX properly. 105 // TODO(bryner): Split on all(?) non-alphanumerics and handle %XX properly.
106 static const char kTokenSeparators[] = ".,\\/_-|=%:!&"; 106 static const char kTokenSeparators[] = ".,\\/_-|=%:!&";
107 for (const base::StringPiece& token : 107 for (const base::StringPiece& token :
108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE, 108 base::SplitStringPiece(full, kTokenSeparators, base::KEEP_WHITESPACE,
109 base::SPLIT_WANT_NONEMPTY)) { 109 base::SPLIT_WANT_NONEMPTY)) {
110 // Copy over only the splits that are 3 or more chars long. 110 // Copy over only the splits that are 3 or more chars long.
111 // TODO(bryner): Determine a meaningful min size. 111 // TODO(bryner): Determine a meaningful min size.
112 if (token.length() >= kMinPathComponentLength) 112 if (token.length() >= kMinPathComponentLength)
113 tokens->push_back(token.as_string()); 113 tokens->push_back(token.as_string());
114 } 114 }
115 } 115 }
116 116
117 } // namespace safe_browsing 117 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/extensions/webstore_bindings.cc ('k') | chrome/renderer/searchbox/searchbox.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698