Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 42553002: Mostly integrate new malware IP blacklist with the csd client. When (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address more comments Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6 6
7 #include <map> 7 #include <map>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/bind_helpers.h" 11 #include "base/bind_helpers.h"
12 #include "base/format_macros.h" 12 #include "base/format_macros.h"
13 #include "base/stl_util.h" 13 #include "base/stl_util.h"
14 #include "base/strings/stringprintf.h" 14 #include "base/strings/stringprintf.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "chrome/browser/common/cancelable_request.h" 16 #include "chrome/browser/common/cancelable_request.h"
17 #include "chrome/browser/history/history_service.h" 17 #include "chrome/browser/history/history_service.h"
18 #include "chrome/browser/history/history_service_factory.h" 18 #include "chrome/browser/history/history_service_factory.h"
19 #include "chrome/browser/history/history_types.h" 19 #include "chrome/browser/history/history_types.h"
20 #include "chrome/browser/profiles/profile.h" 20 #include "chrome/browser/profiles/profile.h"
21 #include "chrome/browser/safe_browsing/browser_features.h" 21 #include "chrome/browser/safe_browsing/browser_features.h"
22 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 22 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
23 #include "chrome/browser/safe_browsing/database_manager.h"
23 #include "chrome/common/safe_browsing/csd.pb.h" 24 #include "chrome/common/safe_browsing/csd.pb.h"
24 #include "content/public/browser/browser_thread.h" 25 #include "content/public/browser/browser_thread.h"
25 #include "content/public/browser/navigation_controller.h" 26 #include "content/public/browser/navigation_controller.h"
26 #include "content/public/browser/navigation_entry.h" 27 #include "content/public/browser/navigation_entry.h"
27 #include "content/public/browser/web_contents.h" 28 #include "content/public/browser/web_contents.h"
28 #include "content/public/common/page_transition_types.h" 29 #include "content/public/common/page_transition_types.h"
29 #include "url/gurl.h" 30 #include "url/gurl.h"
30 31
31 using content::BrowserThread; 32 using content::BrowserThread;
32 using content::NavigationController; 33 using content::NavigationController;
33 using content::NavigationEntry; 34 using content::NavigationEntry;
34 using content::WebContents; 35 using content::WebContents;
35 36
36 namespace safe_browsing { 37 namespace safe_browsing {
37 38
38 const int BrowserFeatureExtractor::kMaxMalwareIPPerRequest = 5; 39 namespace {
40
41 const int kMaxMalwareIPPerRequest = 5;
42
43 void FilterBenignIpsOnIOThread(
44 scoped_refptr<SafeBrowsingDatabaseManager> database_manager,
45 IPUrlMap* ips) {
mattm 2013/10/31 05:26:40 could add DCHECK(BrowserThread::CurrentlyOn(Brows
noé 2013/10/31 20:39:58 Done.
46 std::vector<std::string> to_delete;
47 for (IPUrlMap::const_iterator it = ips->begin();
48 it != ips->end(); ++it) {
49 if (!database_manager.get() ||
50 !database_manager->MatchMalwareIP(it->first)) {
51 to_delete.push_back(it->first);
mattm 2013/10/31 05:26:40 It should be possible to do this without the secon
noé 2013/10/31 20:39:58 Actually, we don't even need a copy iterator. Don
52 }
53 }
54 for (std::vector<std::string>::const_iterator it = to_delete.begin();
55 it != to_delete.end(); ++it) {
56 ips->erase(*it);
57 }
58 }
59 } // namespace
39 60
40 BrowseInfo::BrowseInfo() : http_status_code(0) {} 61 BrowseInfo::BrowseInfo() : http_status_code(0) {}
41 62
42 BrowseInfo::~BrowseInfo() {} 63 BrowseInfo::~BrowseInfo() {}
43 64
44 static void AddFeature(const std::string& feature_name, 65 static void AddFeature(const std::string& feature_name,
45 double feature_value, 66 double feature_value,
46 ClientPhishingRequest* request) { 67 ClientPhishingRequest* request) {
47 DCHECK(request); 68 DCHECK(request);
48 ClientPhishingRequest::Feature* feature = 69 ClientPhishingRequest::Feature* feature =
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 features::kRedirect, 146 features::kRedirect,
126 i, 147 i,
127 printable_redirect.c_str()), 148 printable_redirect.c_str()),
128 1.0, 149 1.0,
129 request); 150 request);
130 } 151 }
131 } 152 }
132 153
133 BrowserFeatureExtractor::BrowserFeatureExtractor( 154 BrowserFeatureExtractor::BrowserFeatureExtractor(
134 WebContents* tab, 155 WebContents* tab,
135 ClientSideDetectionService* service) 156 ClientSideDetectionHost* host)
136 : tab_(tab), 157 : tab_(tab),
137 service_(service), 158 host_(host),
138 weak_factory_(this) { 159 weak_factory_(this) {
139 DCHECK(tab); 160 DCHECK(tab);
140 } 161 }
141 162
142 BrowserFeatureExtractor::~BrowserFeatureExtractor() { 163 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
143 weak_factory_.InvalidateWeakPtrs(); 164 weak_factory_.InvalidateWeakPtrs();
144 // Delete all the pending extractions (delete callback and request objects). 165 // Delete all the pending extractions (delete callback and request objects).
145 STLDeleteContainerPairFirstPointers(pending_extractions_.begin(), 166 STLDeleteContainerPairFirstPointers(pending_extractions_.begin(),
146 pending_extractions_.end()); 167 pending_extractions_.end());
147 168
(...skipping 14 matching lines...) Expand all
162 } 183 }
163 184
164 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info, 185 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info,
165 ClientPhishingRequest* request, 186 ClientPhishingRequest* request,
166 const DoneCallback& callback) { 187 const DoneCallback& callback) {
167 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 188 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
168 DCHECK(request); 189 DCHECK(request);
169 DCHECK(info); 190 DCHECK(info);
170 DCHECK_EQ(0U, request->url().find("http:")); 191 DCHECK_EQ(0U, request->url().find("http:"));
171 DCHECK(!callback.is_null()); 192 DCHECK(!callback.is_null());
172 if (callback.is_null()) {
173 DLOG(ERROR) << "ExtractFeatures called without a callback object";
174 return;
175 }
176
177 // Extract features pertaining to this navigation. 193 // Extract features pertaining to this navigation.
178 const NavigationController& controller = tab_->GetController(); 194 const NavigationController& controller = tab_->GetController();
179 int url_index = -1; 195 int url_index = -1;
180 int first_host_index = -1; 196 int first_host_index = -1;
181 197
182 GURL request_url(request->url()); 198 GURL request_url(request->url());
183 int index = controller.GetCurrentEntryIndex(); 199 int index = controller.GetCurrentEntryIndex();
184 // The url that we are extracting features for should already be commited. 200 // The url that we are extracting features for should already be commited.
185 DCHECK_NE(index, -1); 201 DCHECK_NE(index, -1);
186 for (; index >= 0; index--) { 202 for (; index >= 0; index--) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
223 239
224 ExtractBrowseInfoFeatures(*info, request); 240 ExtractBrowseInfoFeatures(*info, request);
225 pending_extractions_[request] = callback; 241 pending_extractions_[request] = callback;
226 base::MessageLoop::current()->PostTask( 242 base::MessageLoop::current()->PostTask(
227 FROM_HERE, 243 FROM_HERE,
228 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures, 244 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures,
229 weak_factory_.GetWeakPtr(), request, callback)); 245 weak_factory_.GetWeakPtr(), request, callback));
230 } 246 }
231 247
232 void BrowserFeatureExtractor::ExtractMalwareFeatures( 248 void BrowserFeatureExtractor::ExtractMalwareFeatures(
233 const BrowseInfo* info, 249 BrowseInfo* info,
234 ClientMalwareRequest* request) { 250 ClientMalwareRequest* request,
251 const MalwareDoneCallback& callback) {
235 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 252 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
236 DCHECK(request);
237 DCHECK(info);
238 DCHECK_EQ(0U, request->url().find("http:")); 253 DCHECK_EQ(0U, request->url().find("http:"));
239 // get the IPs and urls that match the malware blacklisted IP list. 254 DCHECK(!callback.is_null());
240 if (service_) { 255
241 int matched_bad_ips = 0; 256 // Copy the IPs because they might go away before we're done
mattm 2013/10/31 05:26:40 s/Copy/Grab/ (or something)
noé 2013/10/31 20:39:58 Done.
242 for (IPUrlMap::const_iterator it = info->ips.begin(); 257 // checking them against the IP blacklist on the IO thread.
243 it != info->ips.end(); ++it) { 258 scoped_ptr<IPUrlMap> ips(new IPUrlMap);
244 if (service_->IsBadIpAddress(it->first)) { 259 ips->swap(info->ips);
245 AddMalwareFeature(features::kBadIpFetch + it->first, 260
246 it->second, 1.0, request); 261 IPUrlMap* ips_ptr = ips.get();
247 ++matched_bad_ips; 262
248 // Limit the number of matched bad IPs in one request to control 263 // The API doesn't take a scoped_ptr because the API gets mocked and we
249 // the request's size 264 // cannot mock an API that takes scoped_ptr as arguments.
250 if (matched_bad_ips >= kMaxMalwareIPPerRequest) { 265 scoped_ptr<ClientMalwareRequest> req(request);
251 return; 266
252 } 267 // IP blacklist lookups have to happen on the IO thread.
253 } 268 BrowserThread::PostTaskAndReply(
254 } 269 BrowserThread::IO,
255 } 270 FROM_HERE,
271 base::Bind(&FilterBenignIpsOnIOThread,
272 host_->database_manager(),
273 ips_ptr),
274 base::Bind(&BrowserFeatureExtractor::FinishExtractMalwareFeatures,
275 weak_factory_.GetWeakPtr(),
276 base::Passed(&ips), callback, base::Passed(&req)));
256 } 277 }
257 278
258 void BrowserFeatureExtractor::ExtractBrowseInfoFeatures( 279 void BrowserFeatureExtractor::ExtractBrowseInfoFeatures(
259 const BrowseInfo& info, 280 const BrowseInfo& info,
260 ClientPhishingRequest* request) { 281 ClientPhishingRequest* request) {
261 if (service_) {
262 for (IPUrlMap::const_iterator it = info.ips.begin();
263 it != info.ips.end(); ++it) {
264 if (service_->IsBadIpAddress(it->first)) {
265 AddFeature(features::kBadIpFetch + it->first, 1.0, request);
266 }
267 }
268 }
269 if (info.unsafe_resource.get()) { 282 if (info.unsafe_resource.get()) {
270 // A SafeBrowsing interstitial was shown for the current URL. 283 // A SafeBrowsing interstitial was shown for the current URL.
271 AddFeature(features::kSafeBrowsingMaliciousUrl + 284 AddFeature(features::kSafeBrowsingMaliciousUrl +
272 info.unsafe_resource->url.spec(), 285 info.unsafe_resource->url.spec(),
273 1.0, 286 1.0,
274 request); 287 request);
275 AddFeature(features::kSafeBrowsingOriginalUrl + 288 AddFeature(features::kSafeBrowsingOriginalUrl +
276 info.unsafe_resource->original_url.spec(), 289 info.unsafe_resource->original_url.spec(),
277 1.0, 290 1.0,
278 request); 291 request);
(...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 *history = HistoryServiceFactory::GetForProfile(profile, 501 *history = HistoryServiceFactory::GetForProfile(profile,
489 Profile::EXPLICIT_ACCESS); 502 Profile::EXPLICIT_ACCESS);
490 if (*history) { 503 if (*history) {
491 return true; 504 return true;
492 } 505 }
493 } 506 }
494 VLOG(2) << "Unable to query history. No history service available."; 507 VLOG(2) << "Unable to query history. No history service available.";
495 return false; 508 return false;
496 } 509 }
497 510
511 void BrowserFeatureExtractor::FinishExtractMalwareFeatures(
512 scoped_ptr<IPUrlMap> bad_ips,
513 MalwareDoneCallback callback,
514 scoped_ptr<ClientMalwareRequest> request) {
mattm 2013/10/31 05:26:40 could add DCHECK(BrowserThread::CurrentlyOn(Browse
noé 2013/10/31 20:39:58 Done.
515 int matched_bad_ips = 0;
516 for (IPUrlMap::const_iterator it = bad_ips->begin();
517 it != bad_ips->end(); ++it) {
518 AddMalwareFeature(features::kBadIpFetch + it->first,
519 it->second, 1.0, request.get());
520 ++matched_bad_ips;
521 // Limit the number of matched bad IPs in one request to control
522 // the request's size
523 if (matched_bad_ips >= kMaxMalwareIPPerRequest) {
524 break;
525 }
526 }
527 callback.Run(true, request.Pass());
528 }
529
498 } // namespace safe_browsing 530 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698