Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(889)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 42553002: Mostly integrate new malware IP blacklist with the csd client. When (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Remove inline accessor Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6 6
7 #include <map> 7 #include <map>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/bind_helpers.h" 11 #include "base/bind_helpers.h"
12 #include "base/format_macros.h" 12 #include "base/format_macros.h"
13 #include "base/stl_util.h" 13 #include "base/stl_util.h"
14 #include "base/strings/stringprintf.h" 14 #include "base/strings/stringprintf.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "chrome/browser/common/cancelable_request.h" 16 #include "chrome/browser/common/cancelable_request.h"
17 #include "chrome/browser/history/history_service.h" 17 #include "chrome/browser/history/history_service.h"
18 #include "chrome/browser/history/history_service_factory.h" 18 #include "chrome/browser/history/history_service_factory.h"
19 #include "chrome/browser/history/history_types.h" 19 #include "chrome/browser/history/history_types.h"
20 #include "chrome/browser/profiles/profile.h" 20 #include "chrome/browser/profiles/profile.h"
21 #include "chrome/browser/safe_browsing/browser_features.h" 21 #include "chrome/browser/safe_browsing/browser_features.h"
22 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 22 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
23 #include "chrome/browser/safe_browsing/database_manager.h"
23 #include "chrome/common/safe_browsing/csd.pb.h" 24 #include "chrome/common/safe_browsing/csd.pb.h"
24 #include "content/public/browser/browser_thread.h" 25 #include "content/public/browser/browser_thread.h"
25 #include "content/public/browser/navigation_controller.h" 26 #include "content/public/browser/navigation_controller.h"
26 #include "content/public/browser/navigation_entry.h" 27 #include "content/public/browser/navigation_entry.h"
27 #include "content/public/browser/web_contents.h" 28 #include "content/public/browser/web_contents.h"
28 #include "content/public/common/page_transition_types.h" 29 #include "content/public/common/page_transition_types.h"
29 #include "url/gurl.h" 30 #include "url/gurl.h"
30 31
31 using content::BrowserThread; 32 using content::BrowserThread;
32 using content::NavigationController; 33 using content::NavigationController;
33 using content::NavigationEntry; 34 using content::NavigationEntry;
34 using content::WebContents; 35 using content::WebContents;
35 36
36 namespace safe_browsing { 37 namespace safe_browsing {
37 38
38 const int BrowserFeatureExtractor::kMaxMalwareIPPerRequest = 5;
39
40 BrowseInfo::BrowseInfo() : http_status_code(0) {} 39 BrowseInfo::BrowseInfo() : http_status_code(0) {}
41 40
42 BrowseInfo::~BrowseInfo() {} 41 BrowseInfo::~BrowseInfo() {}
43 42
44 static void AddFeature(const std::string& feature_name, 43 static void AddFeature(const std::string& feature_name,
45 double feature_value, 44 double feature_value,
46 ClientPhishingRequest* request) { 45 ClientPhishingRequest* request) {
47 DCHECK(request); 46 DCHECK(request);
48 ClientPhishingRequest::Feature* feature = 47 ClientPhishingRequest::Feature* feature =
49 request->add_non_model_feature_map(); 48 request->add_non_model_feature_map();
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 features::kRedirect, 124 features::kRedirect,
126 i, 125 i,
127 printable_redirect.c_str()), 126 printable_redirect.c_str()),
128 1.0, 127 1.0,
129 request); 128 request);
130 } 129 }
131 } 130 }
132 131
133 BrowserFeatureExtractor::BrowserFeatureExtractor( 132 BrowserFeatureExtractor::BrowserFeatureExtractor(
134 WebContents* tab, 133 WebContents* tab,
135 ClientSideDetectionService* service) 134 ClientSideDetectionHost* host)
136 : tab_(tab), 135 : tab_(tab),
137 service_(service), 136 host_(host),
138 weak_factory_(this) { 137 weak_factory_(this) {
139 DCHECK(tab); 138 DCHECK(tab);
140 } 139 }
141 140
142 BrowserFeatureExtractor::~BrowserFeatureExtractor() { 141 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
143 weak_factory_.InvalidateWeakPtrs(); 142 weak_factory_.InvalidateWeakPtrs();
144 // Delete all the pending extractions (delete callback and request objects). 143 // Delete all the pending extractions (delete callback and request objects).
145 STLDeleteContainerPairFirstPointers(pending_extractions_.begin(), 144 STLDeleteContainerPairFirstPointers(pending_extractions_.begin(),
146 pending_extractions_.end()); 145 pending_extractions_.end());
147 146
(...skipping 14 matching lines...) Expand all
162 } 161 }
163 162
164 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info, 163 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info,
165 ClientPhishingRequest* request, 164 ClientPhishingRequest* request,
166 const DoneCallback& callback) { 165 const DoneCallback& callback) {
167 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
168 DCHECK(request); 167 DCHECK(request);
169 DCHECK(info); 168 DCHECK(info);
170 DCHECK_EQ(0U, request->url().find("http:")); 169 DCHECK_EQ(0U, request->url().find("http:"));
171 DCHECK(!callback.is_null()); 170 DCHECK(!callback.is_null());
172 if (callback.is_null()) {
173 DLOG(ERROR) << "ExtractFeatures called without a callback object";
174 return;
175 }
176
177 // Extract features pertaining to this navigation. 171 // Extract features pertaining to this navigation.
178 const NavigationController& controller = tab_->GetController(); 172 const NavigationController& controller = tab_->GetController();
179 int url_index = -1; 173 int url_index = -1;
180 int first_host_index = -1; 174 int first_host_index = -1;
181 175
182 GURL request_url(request->url()); 176 GURL request_url(request->url());
183 int index = controller.GetCurrentEntryIndex(); 177 int index = controller.GetCurrentEntryIndex();
184 // The url that we are extracting features for should already be commited. 178 // The url that we are extracting features for should already be commited.
185 DCHECK_NE(index, -1); 179 DCHECK_NE(index, -1);
186 for (; index >= 0; index--) { 180 for (; index >= 0; index--) {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 } 216 }
223 217
224 ExtractBrowseInfoFeatures(*info, request); 218 ExtractBrowseInfoFeatures(*info, request);
225 pending_extractions_[request] = callback; 219 pending_extractions_[request] = callback;
226 base::MessageLoop::current()->PostTask( 220 base::MessageLoop::current()->PostTask(
227 FROM_HERE, 221 FROM_HERE,
228 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures, 222 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures,
229 weak_factory_.GetWeakPtr(), request, callback)); 223 weak_factory_.GetWeakPtr(), request, callback));
230 } 224 }
231 225
232 void BrowserFeatureExtractor::ExtractMalwareFeatures( 226 namespace {
233 const BrowseInfo* info, 227
234 ClientMalwareRequest* request) { 228 const int kMaxMalwareIPPerRequest = 5;
235 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 229
236 DCHECK(request); 230 void FilterBenignIpsOnOnIOThread(
mattm 2013/10/29 01:11:47 OnOn
noé 2013/10/31 02:41:12 DoneDone.
noé 2013/10/31 02:41:12 DoneDone.
237 DCHECK(info); 231 scoped_refptr<RefCountedIPUrlMap> ips,
238 DCHECK_EQ(0U, request->url().find("http:")); 232 scoped_refptr<SafeBrowsingDatabaseManager> database_manager) {
239 // get the IPs and urls that match the malware blacklisted IP list. 233 IPUrlMap& ip_map = *(ips->data);
240 if (service_) { 234 std::vector<std::string> to_delete;
241 int matched_bad_ips = 0; 235 for (IPUrlMap::const_iterator it = ip_map.begin();
242 for (IPUrlMap::const_iterator it = info->ips.begin(); 236 it != ip_map.end(); ++it) {
243 it != info->ips.end(); ++it) { 237 if (!database_manager.get() ||
244 if (service_->IsBadIpAddress(it->first)) { 238 !database_manager->MatchMalwareIP(it->first)) {
245 AddMalwareFeature(features::kBadIpFetch + it->first, 239 to_delete.push_back(it->first);
246 it->second, 1.0, request);
247 ++matched_bad_ips;
248 // Limit the number of matched bad IPs in one request to control
249 // the request's size
250 if (matched_bad_ips >= kMaxMalwareIPPerRequest) {
251 return;
252 }
253 }
254 } 240 }
255 } 241 }
242 for (std::vector<std::string>::const_iterator it = to_delete.begin();
243 it != to_delete.end(); ++it) {
244 ip_map.erase(*it);
245 }
246 }
247 } // namespace
mattm 2013/10/29 01:11:47 The chromium style seems to be to have the anonymo
noé 2013/10/31 02:41:12 Done.
noé 2013/10/31 02:41:12 Done.
248
249 void BrowserFeatureExtractor::ExtractMalwareFeatures(
250 BrowseInfo* info,
251 ClientMalwareRequest* request,
252 const MalwareDoneCallback& callback) {
253 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
254 DCHECK_EQ(0U, request->url().find("http:"));
255 DCHECK(!callback.is_null());
256 if (callback.is_null()) {
257 DLOG(ERROR) << "ExtractMalwareFeatures called without a callback object";
258 return;
259 }
260 // Copy the IPs because they might go away before we're done
261 // checking them against the IP blacklist on the IO thread.
262 scoped_refptr<RefCountedIPUrlMap> ips(new RefCountedIPUrlMap(new IPUrlMap));
263 ips->data->swap(info->ips);
264
265 // The API doesn't take a scoped_ptr because the API gets mocked and we
266 // cannot mock an API that takes scoped_ptr as arguments.
267 scoped_ptr<ClientMalwareRequest> req(request);
268
269 // IP blacklist lookups have to happen on the IO thread.
270 BrowserThread::PostTaskAndReply(
mattm 2013/10/29 01:11:47 It should be possible to do this with ips still as
271 BrowserThread::IO,
272 FROM_HERE,
273 base::Bind(&FilterBenignIpsOnOnIOThread,
274 ips,
275 host_->database_manager()),
276 base::Bind(&BrowserFeatureExtractor::FinishExtractMalwareFeatures,
277 weak_factory_.GetWeakPtr(),
278 ips, callback, base::Passed(&req)));
256 } 279 }
257 280
258 void BrowserFeatureExtractor::ExtractBrowseInfoFeatures( 281 void BrowserFeatureExtractor::ExtractBrowseInfoFeatures(
259 const BrowseInfo& info, 282 const BrowseInfo& info,
260 ClientPhishingRequest* request) { 283 ClientPhishingRequest* request) {
261 if (service_) {
262 for (IPUrlMap::const_iterator it = info.ips.begin();
263 it != info.ips.end(); ++it) {
264 if (service_->IsBadIpAddress(it->first)) {
265 AddFeature(features::kBadIpFetch + it->first, 1.0, request);
266 }
267 }
268 }
269 if (info.unsafe_resource.get()) { 284 if (info.unsafe_resource.get()) {
270 // A SafeBrowsing interstitial was shown for the current URL. 285 // A SafeBrowsing interstitial was shown for the current URL.
271 AddFeature(features::kSafeBrowsingMaliciousUrl + 286 AddFeature(features::kSafeBrowsingMaliciousUrl +
272 info.unsafe_resource->url.spec(), 287 info.unsafe_resource->url.spec(),
273 1.0, 288 1.0,
274 request); 289 request);
275 AddFeature(features::kSafeBrowsingOriginalUrl + 290 AddFeature(features::kSafeBrowsingOriginalUrl +
276 info.unsafe_resource->original_url.spec(), 291 info.unsafe_resource->original_url.spec(),
277 1.0, 292 1.0,
278 request); 293 request);
(...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 *history = HistoryServiceFactory::GetForProfile(profile, 503 *history = HistoryServiceFactory::GetForProfile(profile,
489 Profile::EXPLICIT_ACCESS); 504 Profile::EXPLICIT_ACCESS);
490 if (*history) { 505 if (*history) {
491 return true; 506 return true;
492 } 507 }
493 } 508 }
494 VLOG(2) << "Unable to query history. No history service available."; 509 VLOG(2) << "Unable to query history. No history service available.";
495 return false; 510 return false;
496 } 511 }
497 512
513 void BrowserFeatureExtractor::FinishExtractMalwareFeatures(
514 scoped_refptr<RefCountedIPUrlMap> bad_ips,
515 MalwareDoneCallback callback,
516 scoped_ptr<ClientMalwareRequest> request) {
517 int matched_bad_ips = 0;
518 const IPUrlMap& bad_ips_map = *(bad_ips->data);
519 for (IPUrlMap::const_iterator it = bad_ips_map.begin();
520 it != bad_ips_map.end(); ++it) {
521 AddMalwareFeature(features::kBadIpFetch + it->first,
522 it->second, 1.0, request.get());
523 ++matched_bad_ips;
524 // Limit the number of matched bad IPs in one request to control
525 // the request's size
526 if (matched_bad_ips >= kMaxMalwareIPPerRequest) {
527 break;
528 }
529 }
530 bool success = true;
mattm 2013/10/29 01:11:47 unnecessary?
noé 2013/10/31 02:41:12 Done.
531 callback.Run(success, request.Pass());
532 }
533
498 } // namespace safe_browsing 534 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698