Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(673)

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix the service unit-test. Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/client_side_detection_host.h" 5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
6 6
7 #include <vector> 7 #include <vector>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/ref_counted.h" 10 #include "base/memory/ref_counted.h"
11 #include "base/memory/scoped_ptr.h" 11 #include "base/memory/scoped_ptr.h"
12 #include "base/metrics/histogram.h" 12 #include "base/metrics/histogram.h"
13 #include "base/prefs/pref_service.h" 13 #include "base/prefs/pref_service.h"
14 #include "base/sequenced_task_runner_helpers.h" 14 #include "base/sequenced_task_runner_helpers.h"
15 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
16 #include "chrome/browser/browser_process.h" 16 #include "chrome/browser/browser_process.h"
17 #include "chrome/browser/profiles/profile.h" 17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
19 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
20 #include "chrome/browser/safe_browsing/database_manager.h" 20 #include "chrome/browser/safe_browsing/database_manager.h"
21 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
22 #include "chrome/common/chrome_switches.h" 22 #include "chrome/common/chrome_switches.h"
mattm 2014/03/18 02:19:06 unused?
noé 2014/03/20 17:01:45 Done.
23 #include "chrome/common/chrome_version_info.h" 23 #include "chrome/common/chrome_version_info.h"
mattm 2014/03/18 02:19:06 unused?
noé 2014/03/20 17:01:45 Done.
24 #include "chrome/common/pref_names.h" 24 #include "chrome/common/pref_names.h"
25 #include "chrome/common/safe_browsing/csd.pb.h" 25 #include "chrome/common/safe_browsing/csd.pb.h"
26 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 26 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
27 #include "content/public/browser/browser_thread.h" 27 #include "content/public/browser/browser_thread.h"
28 #include "content/public/browser/navigation_controller.h" 28 #include "content/public/browser/navigation_controller.h"
29 #include "content/public/browser/navigation_details.h" 29 #include "content/public/browser/navigation_details.h"
30 #include "content/public/browser/navigation_entry.h" 30 #include "content/public/browser/navigation_entry.h"
31 #include "content/public/browser/notification_details.h" 31 #include "content/public/browser/notification_details.h"
32 #include "content/public/browser/notification_source.h" 32 #include "content/public/browser/notification_source.h"
33 #include "content/public/browser/notification_types.h" 33 #include "content/public/browser/notification_types.h"
34 #include "content/public/browser/render_process_host.h" 34 #include "content/public/browser/render_process_host.h"
35 #include "content/public/browser/render_view_host.h" 35 #include "content/public/browser/render_view_host.h"
36 #include "content/public/browser/resource_request_details.h" 36 #include "content/public/browser/resource_request_details.h"
37 #include "content/public/browser/web_contents.h" 37 #include "content/public/browser/web_contents.h"
38 #include "content/public/common/frame_navigate_params.h" 38 #include "content/public/common/frame_navigate_params.h"
39 #include "url/gurl.h" 39 #include "url/gurl.h"
40 40
41 using content::BrowserThread; 41 using content::BrowserThread;
42 using content::NavigationEntry; 42 using content::NavigationEntry;
43 using content::ResourceRequestDetails; 43 using content::ResourceRequestDetails;
44 using content::WebContents; 44 using content::WebContents;
45 45
46 namespace safe_browsing { 46 namespace safe_browsing {
47 47
48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20; 48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;
49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200; 49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
50 50
51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match"; 51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
52 52
53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
54
53 // This class is instantiated each time a new toplevel URL loads, and 55 // This class is instantiated each time a new toplevel URL loads, and
54 // asynchronously checks whether the phishing classifier should run for this 56 // asynchronously checks whether the malware and phishing classifiers should run
55 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC. 57 // for this URL. If so, it notifies the host class by calling the provided
56 // Objects of this class are ref-counted and will be destroyed once nobody 58 // callback form the UI thread. Objects of this class are ref-counted and will
57 // uses it anymore. If |web_contents|, |csd_service| or |host| go away you need 59 // be destroyed once nobody uses it anymore. If |web_contents|, |csd_service|
58 // to call Cancel(). We keep the |database_manager| alive in a ref pointer for 60 // or |host| go away you need to call Cancel(). We keep the |database_manager|
59 // as long as it takes. 61 // alive in a ref pointer for as long as it takes.
60 class ClientSideDetectionHost::ShouldClassifyUrlRequest 62 class ClientSideDetectionHost::ShouldClassifyUrlRequest
61 : public base::RefCountedThreadSafe< 63 : public base::RefCountedThreadSafe<
62 ClientSideDetectionHost::ShouldClassifyUrlRequest> { 64 ClientSideDetectionHost::ShouldClassifyUrlRequest> {
63 public: 65 public:
64 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params, 66 ShouldClassifyUrlRequest(
65 WebContents* web_contents, 67 const content::FrameNavigateParams& params,
66 ClientSideDetectionService* csd_service, 68 const ShouldClassifyUrlCallback& start_phishing_classification,
67 SafeBrowsingDatabaseManager* database_manager, 69 const ShouldClassifyUrlCallback& start_malware_classification,
68 ClientSideDetectionHost* host) 70 WebContents* web_contents,
69 : canceled_(false), 71 ClientSideDetectionService* csd_service,
70 params_(params), 72 SafeBrowsingDatabaseManager* database_manager,
73 ClientSideDetectionHost* host)
74 : params_(params),
71 web_contents_(web_contents), 75 web_contents_(web_contents),
72 csd_service_(csd_service), 76 csd_service_(csd_service),
73 database_manager_(database_manager), 77 database_manager_(database_manager),
74 host_(host) { 78 host_(host),
79 start_phishing_classification_cb_(start_phishing_classification),
80 start_malware_classification_cb_(start_malware_classification) {
75 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 81 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
76 DCHECK(web_contents_); 82 DCHECK(web_contents_);
77 DCHECK(csd_service_); 83 DCHECK(csd_service_);
78 DCHECK(database_manager_.get()); 84 DCHECK(database_manager_.get());
79 DCHECK(host_); 85 DCHECK(host_);
80 } 86 }
81 87
82 void Start() { 88 void Start() {
83 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
84 90
85 // We start by doing some simple checks that can run on the UI thread. 91 // We start by doing some simple checks that can run on the UI thread.
86 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); 92 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
93 UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);
87 94
88 // Only classify [X]HTML documents. 95 // Only classify [X]HTML documents.
89 if (params_.contents_mime_type != "text/html" && 96 if (params_.contents_mime_type != "text/html" &&
90 params_.contents_mime_type != "application/xhtml+xml") { 97 params_.contents_mime_type != "application/xhtml+xml") {
91 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 98 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
92 << " because it has an unsupported MIME type: " 99 << " because it has an unsupported MIME type: "
93 << params_.contents_mime_type; 100 << params_.contents_mime_type;
94 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 101 DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
95 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
96 NO_CLASSIFY_MAX);
97 return;
98 } 102 }
99 103
100 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { 104 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
101 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 105 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
102 << " because of hosting on private IP: " 106 << " because of hosting on private IP: "
103 << params_.socket_address.host(); 107 << params_.socket_address.host();
104 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 108 DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
105 NO_CLASSIFY_PRIVATE_IP, 109 DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
106 NO_CLASSIFY_MAX);
107 return;
108 } 110 }
109 111
110 // Don't run the phishing classifier if the tab is incognito. 112 // Don't run any classifier if the tab is incognito.
111 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) { 113 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
112 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 114 VLOG(1) << "Skipping phishing and malware classification for URL: "
113 << " because we're browsing incognito."; 115 << params_.url << " because we're browsing incognito.";
114 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 116 DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
115 NO_CLASSIFY_OFF_THE_RECORD, 117 DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
116 NO_CLASSIFY_MAX);
117
118 return;
119 } 118 }
120 119
121 // We lookup the csd-whitelist before we lookup the cache because 120 // We lookup the csd-whitelist before we lookup the cache because
122 // a URL may have recently been whitelisted. If the URL matches 121 // a URL may have recently been whitelisted. If the URL matches
123 // the csd-whitelist we won't start classification. The 122 // the csd-whitelist we won't phishing start classification. The
mattm 2014/03/18 02:19:06 word order?
noé 2014/03/20 17:01:45 Done.
124 // csd-whitelist check has to be done on the IO thread because it 123 // csd-whitelist check has to be done on the IO thread because it
125 // uses the SafeBrowsing service class. 124 // uses the SafeBrowsing service class.
126 BrowserThread::PostTask( 125 if (MaybeClassifyForPhishing() || MaybeClassifyForMalware()) {
127 BrowserThread::IO, 126 BrowserThread::PostTask(
128 FROM_HERE, 127 BrowserThread::IO,
129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist, 128 FROM_HERE,
130 this, params_.url)); 129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
130 this, params_.url));
131 }
131 } 132 }
132 133
133 void Cancel() { 134 void Cancel() {
134 canceled_ = true; 135 DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
136 DontClassifyForMalware(NO_CLASSIFY_CANCEL);
135 // Just to make sure we don't do anything stupid we reset all these 137 // Just to make sure we don't do anything stupid we reset all these
136 // pointers except for the safebrowsing service class which may be 138 // pointers except for the safebrowsing service class which may be
137 // accessed by CheckCsdWhitelist(). 139 // accessed by CheckCsdWhitelist().
138 web_contents_ = NULL; 140 web_contents_ = NULL;
139 csd_service_ = NULL; 141 csd_service_ = NULL;
140 host_ = NULL; 142 host_ = NULL;
141 } 143 }
142 144
143 private: 145 private:
144 friend class base::RefCountedThreadSafe< 146 friend class base::RefCountedThreadSafe<
145 ClientSideDetectionHost::ShouldClassifyUrlRequest>; 147 ClientSideDetectionHost::ShouldClassifyUrlRequest>;
146 148
147 // Enum used to keep stats about why the pre-classification check failed. 149 // Enum used to keep stats about why the pre-classification check failed.
148 enum PreClassificationCheckFailures { 150 enum PreClassificationCheckFailures {
149 OBSOLETE_NO_CLASSIFY_PROXY_FETCH, 151 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
150 NO_CLASSIFY_PRIVATE_IP, 152 NO_CLASSIFY_PRIVATE_IP,
151 NO_CLASSIFY_OFF_THE_RECORD, 153 NO_CLASSIFY_OFF_THE_RECORD,
152 NO_CLASSIFY_MATCH_CSD_WHITELIST, 154 NO_CLASSIFY_MATCH_CSD_WHITELIST,
153 NO_CLASSIFY_TOO_MANY_REPORTS, 155 NO_CLASSIFY_TOO_MANY_REPORTS,
154 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 156 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
157 NO_CLASSIFY_NO_DATABASE_MANAGER,
158 NO_CLASSIFY_KILLSWITCH,
159 NO_CLASSIFY_CANCEL,
160 NO_CLASSIFY_RESULT_FROM_CACHE,
155 161
156 NO_CLASSIFY_MAX // Always add new values before this one. 162 NO_CLASSIFY_MAX // Always add new values before this one.
157 }; 163 };
158 164
159 // The destructor can be called either from the UI or the IO thread. 165 // The destructor can be called either from the UI or the IO thread.
160 virtual ~ShouldClassifyUrlRequest() { } 166 virtual ~ShouldClassifyUrlRequest() { }
161 167
168 bool MaybeClassifyForPhishing() const {
mattm 2014/03/18 02:19:06 Calling these MaybeFoo is a bit confusing, usually
noé 2014/03/20 17:01:45 Done.
169 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
170 return !start_phishing_classification_cb_.is_null();
171 }
172
173 bool MaybeClassifyForMalware() const {
174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
175 return !start_malware_classification_cb_.is_null();
176 }
177
178 void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
179 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
180 if (MaybeClassifyForPhishing()) {
181 // Track the first reason why we stopped classifying for phishing.
182 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
183 reason, NO_CLASSIFY_MAX);
184 VLOG(2) << "Failed phishing pre-classification checks. Reason: "
mattm 2014/03/18 02:19:06 prefer DVLOG instead of VLOG, unless there is a re
noé 2014/03/20 17:01:45 Done.
185 << reason;
186 start_phishing_classification_cb_.Run(false);
187 }
188 start_phishing_classification_cb_.Reset();
189 }
190
191 void DontClassifyForMalware(PreClassificationCheckFailures reason) {
192 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
193 if (MaybeClassifyForMalware()) {
194 // Track the first reason why we stopped classifying for malware.
195 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
196 reason, NO_CLASSIFY_MAX);
197 VLOG(2) << "Failed malware pre-classification checks. Reason: "
198 << reason;
199 start_malware_classification_cb_.Run(false);
200 }
201 start_malware_classification_cb_.Reset();
202 }
203
162 void CheckCsdWhitelist(const GURL& url) { 204 void CheckCsdWhitelist(const GURL& url) {
mattm 2014/03/18 02:19:06 update name
noé 2014/03/20 17:01:45 Done.
163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 205 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
164 if (!database_manager_.get() || 206 // We don't want to call the classification callbacks from the IO
165 database_manager_->MatchCsdWhitelistUrl(url)) { 207 // thread so we simply pass the results of this method to CheckCache()
166 // We're done. There is no point in going back to the UI thread. 208 // which is called on the UI thread;
167 VLOG(1) << "Skipping phishing classification for URL: " << url 209 PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
168 << " because it matches the csd whitelist"; 210 PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
169 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 211 if (!database_manager_.get()) {
170 NO_CLASSIFY_MATCH_CSD_WHITELIST, 212 // We cannot check the Safe Browsing whitelists so we stop here
171 NO_CLASSIFY_MAX); 213 // for safety.
172 return; 214 malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
215 } else {
216 if (database_manager_->MatchCsdWhitelistUrl(url)) {
217 VLOG(1) << "Skipping phishing classification for URL: " << url
218 << " because it matches the csd whitelist";
219 phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
220 }
221 if (database_manager_->IsMalwareKillSwitchOn()) {
222 malware_reason = NO_CLASSIFY_KILLSWITCH;
223 }
173 } 224 }
174
175 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();
176
177 BrowserThread::PostTask( 225 BrowserThread::PostTask(
178 BrowserThread::UI, 226 BrowserThread::UI,
179 FROM_HERE, 227 FROM_HERE,
180 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this, 228 base::Bind(&ShouldClassifyUrlRequest::CheckCache,
181 malware_killswitch_on)); 229 this,
230 phishing_reason,
231 malware_reason));
182 } 232 }
183 233
184 void CheckCache(bool malware_killswitch_on) { 234 void CheckCache(PreClassificationCheckFailures phishing_reason,
235 PreClassificationCheckFailures malware_reason) {
185 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
186 if (canceled_) { 237 if (phishing_reason != NO_CLASSIFY_MAX)
187 return; 238 DontClassifyForPhishing(phishing_reason);
239 if (malware_reason != NO_CLASSIFY_MAX)
240 DontClassifyForMalware(malware_reason);
241 if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {
242 return; // No point in doing anything else.
188 } 243 }
189 244 // If result is cached, we don't want to run classification again.
190 host_->SetMalwareKillSwitch(malware_killswitch_on); 245 // In that case we're just trying to show the warning.
191 // If result is cached, we don't want to run classification again
192 bool is_phishing; 246 bool is_phishing;
193 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { 247 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
194 VLOG(1) << "Satisfying request for " << params_.url << " from cache"; 248 VLOG(1) << "Satisfying request for " << params_.url << " from cache";
195 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); 249 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
196 // Since we are already on the UI thread, this is safe. 250 // Since we are already on the UI thread, this is safe.
197 host_->MaybeShowPhishingWarning(params_.url, is_phishing); 251 host_->MaybeShowPhishingWarning(params_.url, is_phishing);
198 return; 252 DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
199 } 253 }
200 254
201 // We want to limit the number of requests, though we will ignore the 255 // We want to limit the number of requests, though we will ignore the
202 // limit for urls in the cache. We don't want to start classifying 256 // limit for urls in the cache. We don't want to start classifying
203 // too many pages as phishing, but for those that we already think are 257 // too many pages as phishing, but for those that we already think are
204 // phishing we want to give ourselves a chance to fix false positives. 258 // phishing we want to send a request to the server to give ourselves
259 // a chance to fix misclassifications.
205 if (csd_service_->IsInCache(params_.url)) { 260 if (csd_service_->IsInCache(params_.url)) {
206 VLOG(1) << "Reporting limit skipped for " << params_.url 261 VLOG(1) << "Reporting limit skipped for " << params_.url
207 << " as it was in the cache."; 262 << " as it was in the cache.";
208 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); 263 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
209 } else if (csd_service_->OverPhishingReportLimit()) { 264 } else if (csd_service_->OverPhishingReportLimit()) {
210 VLOG(1) << "Too many report phishing requests sent recently, " 265 VLOG(1) << "Too many report phishing requests sent recently, "
211 << "not running classification for " << params_.url; 266 << "not running classification for " << params_.url;
212 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 267 DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
213 NO_CLASSIFY_TOO_MANY_REPORTS, 268 }
214 NO_CLASSIFY_MAX); 269 if (csd_service_->OverMalwareReportLimit()) {
215 return; 270 DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
216 } 271 }
217 272
218 // Everything checks out, so start classification. 273 // Everything checks out, so start classification.
219 // |web_contents_| is safe to call as we will be destructed 274 // |web_contents_| is safe to call as we will be destructed
220 // before it is. 275 // before it is.
221 VLOG(1) << "Instruct renderer to start phishing detection for URL: " 276 if (MaybeClassifyForPhishing())
222 << params_.url; 277 start_phishing_classification_cb_.Run(true);
223 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost(); 278 if (MaybeClassifyForMalware())
224 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( 279 start_malware_classification_cb_.Run(true);
225 rvh->GetRoutingID(), params_.url));
226 } 280 }
227 281
228 // No need to protect |canceled_| with a lock because it is only read and
229 // written by the UI thread.
230 bool canceled_;
231 content::FrameNavigateParams params_; 282 content::FrameNavigateParams params_;
232 WebContents* web_contents_; 283 WebContents* web_contents_;
233 ClientSideDetectionService* csd_service_; 284 ClientSideDetectionService* csd_service_;
234 // We keep a ref pointer here just to make sure the safe browsing 285 // We keep a ref pointer here just to make sure the safe browsing
235 // database manager stays alive long enough. 286 // database manager stays alive long enough.
236 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_; 287 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
237 ClientSideDetectionHost* host_; 288 ClientSideDetectionHost* host_;
238 289
290 ShouldClassifyUrlCallback start_phishing_classification_cb_;
291 ShouldClassifyUrlCallback start_malware_classification_cb_;
292
239 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); 293 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
240 }; 294 };
241 295
242 // static 296 // static
243 ClientSideDetectionHost* ClientSideDetectionHost::Create( 297 ClientSideDetectionHost* ClientSideDetectionHost::Create(
244 WebContents* tab) { 298 WebContents* tab) {
245 return new ClientSideDetectionHost(tab); 299 return new ClientSideDetectionHost(tab);
246 } 300 }
247 301
248 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab) 302 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
249 : content::WebContentsObserver(tab), 303 : content::WebContentsObserver(tab),
250 csd_service_(NULL), 304 csd_service_(NULL),
305 classification_request_(NULL),
306 should_extract_malware_features_(true),
307 onload_complete_(false),
251 weak_factory_(this), 308 weak_factory_(this),
252 unsafe_unique_page_id_(-1), 309 unsafe_unique_page_id_(-1) {
253 malware_killswitch_on_(false),
254 malware_report_enabled_(false) {
255 DCHECK(tab); 310 DCHECK(tab);
256 // Note: csd_service_ and sb_service will be NULL here in testing. 311 // Note: csd_service_ and sb_service will be NULL here in testing.
257 csd_service_ = g_browser_process->safe_browsing_detection_service(); 312 csd_service_ = g_browser_process->safe_browsing_detection_service();
258 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this)); 313 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
259 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED, 314 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
260 content::Source<WebContents>(tab)); 315 content::Source<WebContents>(tab));
261 316
262 scoped_refptr<SafeBrowsingService> sb_service = 317 scoped_refptr<SafeBrowsingService> sb_service =
263 g_browser_process->safe_browsing_service(); 318 g_browser_process->safe_browsing_service();
264 if (sb_service.get()) { 319 if (sb_service.get()) {
265 ui_manager_ = sb_service->ui_manager(); 320 ui_manager_ = sb_service->ui_manager();
266 database_manager_ = sb_service->database_manager(); 321 database_manager_ = sb_service->database_manager();
267 ui_manager_->AddObserver(this); 322 ui_manager_->AddObserver(this);
268 } 323 }
269
270 // Only enable the malware bad IP matching and report feature for canary
271 // and dev channel.
272 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
273 malware_report_enabled_ = (
274 channel == chrome::VersionInfo::CHANNEL_DEV ||
275 channel == chrome::VersionInfo::CHANNEL_CANARY);
mattm 2014/03/18 02:19:06 CL description should mention that it is removing
noé 2014/03/20 17:01:45 Done.
276 } 324 }
277 325
278 ClientSideDetectionHost::~ClientSideDetectionHost() { 326 ClientSideDetectionHost::~ClientSideDetectionHost() {
279 if (ui_manager_.get()) 327 if (ui_manager_.get())
280 ui_manager_->RemoveObserver(this); 328 ui_manager_->RemoveObserver(this);
281 } 329 }
282 330
283 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { 331 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
284 bool handled = true; 332 bool handled = true;
285 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) 333 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
286 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone, 334 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
287 OnPhishingDetectionDone) 335 OnPhishingDetectionDone)
288 IPC_MESSAGE_UNHANDLED(handled = false) 336 IPC_MESSAGE_UNHANDLED(handled = false)
289 IPC_END_MESSAGE_MAP() 337 IPC_END_MESSAGE_MAP()
290 return handled; 338 return handled;
291 } 339 }
292 340
293 void ClientSideDetectionHost::DidNavigateMainFrame( 341 void ClientSideDetectionHost::DidNavigateMainFrame(
294 const content::LoadCommittedDetails& details, 342 const content::LoadCommittedDetails& details,
295 const content::FrameNavigateParams& params) { 343 const content::FrameNavigateParams& params) {
296 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests 344 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
297 // that don't call this method on the UI thread. 345 // that don't call this method on the UI thread.
298 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 346 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
299 if (details.is_in_page) { 347 if (details.is_in_page) {
300 // If the navigation is within the same page, the user isn't really 348 // If the navigation is within the same page, the user isn't really
301 // navigating away. We don't need to cancel a pending callback or 349 // navigating away. We don't need to cancel a pending callback or
302 // begin a new classification. 350 // begin a new classification.
303 return; 351 return;
304 } 352 }
353 // Cancel any pending classification request.
354 if (classification_request_.get()) {
355 classification_request_->Cancel();
356 }
305 // If we navigate away and there currently is a pending phishing 357 // If we navigate away and there currently is a pending phishing
306 // report request we have to cancel it to make sure we don't display 358 // report request we have to cancel it to make sure we don't display
307 // an interstitial for the wrong page. Note that this won't cancel 359 // an interstitial for the wrong page. Note that this won't cancel
308 // the server ping back but only cancel the showing of the 360 // the server ping back but only cancel the showing of the
309 // interstial. 361 // interstial.
310 weak_factory_.InvalidateWeakPtrs(); 362 weak_factory_.InvalidateWeakPtrs();
311 363
312 if (!csd_service_) { 364 if (!csd_service_) {
313 return; 365 return;
314 } 366 }
315
316 // Cancel any pending classification request.
317 if (classification_request_.get()) {
318 classification_request_->Cancel();
319 }
320 browse_info_.reset(new BrowseInfo); 367 browse_info_.reset(new BrowseInfo);
321 368
322 // Store redirect chain information. 369 // Store redirect chain information.
323 if (params.url.host() != cur_host_) { 370 if (params.url.host() != cur_host_) {
324 cur_host_ = params.url.host(); 371 cur_host_ = params.url.host();
325 cur_host_redirects_ = params.redirects; 372 cur_host_redirects_ = params.redirects;
326 } 373 }
374 browse_info_->url = params.url;
327 browse_info_->host_redirects = cur_host_redirects_; 375 browse_info_->host_redirects = cur_host_redirects_;
328 browse_info_->url_redirects = params.redirects; 376 browse_info_->url_redirects = params.redirects;
329 browse_info_->referrer = params.referrer.url; 377 browse_info_->referrer = params.referrer.url;
330 browse_info_->http_status_code = details.http_status_code; 378 browse_info_->http_status_code = details.http_status_code;
379 browse_info_->page_id = params.page_id;
331 380
332 // Notify the renderer if it should classify this URL. 381 should_extract_malware_features_ = true;
382 should_classify_for_malware_.reset();
383 onload_complete_ = false;
384
385 // Check whether we can cassify the current URL for phishing or malware.
333 classification_request_ = new ShouldClassifyUrlRequest( 386 classification_request_ = new ShouldClassifyUrlRequest(
334 params, web_contents(), csd_service_, database_manager_.get(), this); 387 params,
388 base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
389 weak_factory_.GetWeakPtr()),
390 base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
391 weak_factory_.GetWeakPtr()),
392 web_contents(), csd_service_, database_manager_.get(), this);
335 classification_request_->Start(); 393 classification_request_->Start();
336 } 394 }
337 395
338 void ClientSideDetectionHost::OnSafeBrowsingHit( 396 void ClientSideDetectionHost::OnSafeBrowsingHit(
339 const SafeBrowsingUIManager::UnsafeResource& resource) { 397 const SafeBrowsingUIManager::UnsafeResource& resource) {
340 if (!web_contents() || !web_contents()->GetController().GetActiveEntry()) 398 if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
341 return; 399 return;
342 400
343 // Check that the hit is either malware or phishing. 401 // Check that the hit is either malware or phishing.
344 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING && 402 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
407 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) { 465 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {
408 DCHECK(tab); 466 DCHECK(tab);
409 // Tell any pending classification request that it is being canceled. 467 // Tell any pending classification request that it is being canceled.
410 if (classification_request_.get()) { 468 if (classification_request_.get()) {
411 classification_request_->Cancel(); 469 classification_request_->Cancel();
412 } 470 }
413 // Cancel all pending feature extractions. 471 // Cancel all pending feature extractions.
414 feature_extractor_.reset(); 472 feature_extractor_.reset();
415 } 473 }
416 474
475 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
476 bool should_classify) {
477 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
478 if (browse_info_.get() && should_classify) {
479 VLOG(1) << "Instruct renderer to start phishing detection for URL: "
480 << browse_info_->url;
481 content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
482 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
483 rvh->GetRoutingID(), browse_info_->url));
484 }
485 }
486
487 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
488 bool should_classify) {
489 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
490 // If classification checks failed we should stop extracting malware features.
491 VLOG(2) << "Malware pre-classification checks done. Should classify: "
492 << should_classify;
493 should_extract_malware_features_ = should_classify;
494 should_classify_for_malware_.reset(new bool(should_classify));
495 MaybeStartMalwareFeatureExtraction();
496 }
497
498 void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(
499 int32 page_id) {
500 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
501 if (!csd_service_ || !browse_info_.get())
502 return;
503 VLOG(2) << "Main frame onload hander called.";
504 if (browse_info_->page_id != page_id) {
505 // Something weird is happening here. The BrowseInfo page ID
506 // should always be the same as the most recent load.
507 UMA_HISTOGRAM_COUNTS("SBClientMalware.UnexpectedPageId", 1);
508 return;
509 }
510 onload_complete_ = true;
511 MaybeStartMalwareFeatureExtraction();
512 }
513
514 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
515 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
516 if (csd_service_ && browse_info_.get() &&
517 should_classify_for_malware_.get() &&
518 *should_classify_for_malware_ &&
519 onload_complete_) {
520 scoped_ptr<ClientMalwareRequest> malware_verdict(
mattm 2014/03/18 02:19:06 malware_request?
noé 2014/03/20 17:01:45 Done.
521 new ClientMalwareRequest);
522 // Start browser-side malware feature extraction. Once we're done it will
523 // send the malware client verdict request.
524 malware_verdict->set_url(browse_info_->url.spec());
525 const GURL& referrer = browse_info_->referrer;
526 if (referrer.SchemeIs("http")) { // Only send http urls.
527 malware_verdict->set_referrer_url(referrer.spec());
528 }
529 // This function doesn't expect browse_info_ to stay around after this
530 // function returns.
531 feature_extractor_->ExtractMalwareFeatures(
532 browse_info_.get(),
533 malware_verdict.release(),
534 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
535 weak_factory_.GetWeakPtr()));
536 }
537 }
538
417 void ClientSideDetectionHost::OnPhishingDetectionDone( 539 void ClientSideDetectionHost::OnPhishingDetectionDone(
418 const std::string& verdict_str) { 540 const std::string& verdict_str) {
419 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 541 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
420 // There is something seriously wrong if there is no service class but 542 // There is something seriously wrong if there is no service class but
421 // this method is called. The renderer should not start phishing detection 543 // this method is called. The renderer should not start phishing detection
422 // if there isn't any service class in the browser. 544 // if there isn't any service class in the browser.
423 DCHECK(csd_service_); 545 DCHECK(csd_service_);
424 // There shouldn't be any pending requests because we revoke them everytime
425 // we navigate away.
426 DCHECK(!weak_factory_.HasWeakPtrs());
427 DCHECK(browse_info_.get()); 546 DCHECK(browse_info_.get());
428 547
429 // We parse the protocol buffer here. If we're unable to parse it we won't 548 // We parse the protocol buffer here. If we're unable to parse it we won't
430 // send the verdict further. 549 // send the verdict further.
431 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); 550 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
432 if (csd_service_ && 551 if (csd_service_ &&
433 !weak_factory_.HasWeakPtrs() &&
434 browse_info_.get() && 552 browse_info_.get() &&
435 verdict->ParseFromString(verdict_str) && 553 verdict->ParseFromString(verdict_str) &&
436 verdict->IsInitialized()) { 554 verdict->IsInitialized()) {
437 // We do the malware IP matching and request sending if the feature
438 // is enabled.
439 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {
440 scoped_ptr<ClientMalwareRequest> malware_verdict(
441 new ClientMalwareRequest);
442 // Start browser-side malware feature extraction. Once we're done it will
443 // send the malware client verdict request.
444 malware_verdict->set_url(verdict->url());
445 const GURL& referrer = browse_info_->referrer;
446 if (referrer.SchemeIs("http")) { // Only send http urls.
447 malware_verdict->set_referrer_url(referrer.spec());
448 }
449 // This function doesn't expect browse_info_ to stay around after this
450 // function returns.
451 feature_extractor_->ExtractMalwareFeatures(
452 browse_info_.get(),
453 malware_verdict.release(),
454 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
455 weak_factory_.GetWeakPtr()));
456 }
457
458 // We only send phishing verdict to the server if the verdict is phishing or 555 // We only send phishing verdict to the server if the verdict is phishing or
459 // if a SafeBrowsing interstitial was already shown for this site. E.g., a 556 // if a SafeBrowsing interstitial was already shown for this site. E.g., a
460 // malware or phishing interstitial was shown but the user clicked 557 // malware or phishing interstitial was shown but the user clicked
461 // through. 558 // through.
462 if (verdict->is_phishing() || DidShowSBInterstitial()) { 559 if (verdict->is_phishing() || DidShowSBInterstitial()) {
463 if (DidShowSBInterstitial()) { 560 if (DidShowSBInterstitial()) {
464 browse_info_->unsafe_resource.reset(unsafe_resource_.release()); 561 browse_info_->unsafe_resource.reset(unsafe_resource_.release());
465 } 562 }
466 // Start browser-side feature extraction. Once we're done it will send 563 // Start browser-side feature extraction. Once we're done it will send
467 // the client verdict request. 564 // the client verdict request.
468 feature_extractor_->ExtractFeatures( 565 feature_extractor_->ExtractFeatures(
469 browse_info_.get(), 566 browse_info_.get(),
470 verdict.release(), 567 verdict.release(),
471 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone, 568 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
472 weak_factory_.GetWeakPtr())); 569 weak_factory_.GetWeakPtr()));
473 } 570 }
474 } 571 }
475 browse_info_.reset();
476 } 572 }
477 573
478 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, 574 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
479 bool is_phishing) { 575 bool is_phishing) {
480 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 576 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
481 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url 577 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url
482 << " is_phishing:" << is_phishing; 578 << " is_phishing:" << is_phishing;
483 if (is_phishing) { 579 if (is_phishing) {
484 DCHECK(web_contents()); 580 DCHECK(web_contents());
485 if (ui_manager_.get()) { 581 if (ui_manager_.get()) {
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
594 } 690 }
595 691
596 void ClientSideDetectionHost::Observe( 692 void ClientSideDetectionHost::Observe(
597 int type, 693 int type,
598 const content::NotificationSource& source, 694 const content::NotificationSource& source,
599 const content::NotificationDetails& details) { 695 const content::NotificationDetails& details) {
600 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 696 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
601 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED); 697 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
602 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>( 698 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
603 details).ptr(); 699 details).ptr();
604 if (req && browse_info_.get() && malware_report_enabled_ && 700 if (req && browse_info_.get() &&
605 !MalwareKillSwitchIsOn()) { 701 should_extract_malware_features_ && req->url.is_valid()) {
606 if (req->url.is_valid()) { 702 UpdateIPUrlMap(req->socket_address.host() /* ip */,
607 UpdateIPUrlMap(req->socket_address.host() /* ip */, 703 req->url.spec() /* url */,
608 req->url.spec() /* url */, 704 req->method,
609 req->method, 705 req->referrer,
610 req->referrer, 706 req->resource_type);
611 req->resource_type);
612 }
613 } 707 }
614 } 708 }
615 709
616 bool ClientSideDetectionHost::DidShowSBInterstitial() const { 710 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
617 if (unsafe_unique_page_id_ <= 0 || !web_contents()) { 711 if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
618 return false; 712 return false;
619 } 713 }
620 const NavigationEntry* nav_entry = 714 const NavigationEntry* nav_entry =
621 web_contents()->GetController().GetActiveEntry(); 715 web_contents()->GetController().GetActiveEntry();
622 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_); 716 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
(...skipping 10 matching lines...) Expand all
633 if (ui_manager_.get()) 727 if (ui_manager_.get())
634 ui_manager_->RemoveObserver(this); 728 ui_manager_->RemoveObserver(this);
635 729
636 ui_manager_ = ui_manager; 730 ui_manager_ = ui_manager;
637 if (ui_manager) 731 if (ui_manager)
638 ui_manager_->AddObserver(this); 732 ui_manager_->AddObserver(this);
639 733
640 database_manager_ = database_manager; 734 database_manager_ = database_manager;
641 } 735 }
642 736
643 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {
644 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
645 return malware_killswitch_on_;
646 }
647
648 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {
649 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
650 malware_killswitch_on_ = killswitch_on;
651 }
652
653 } // namespace safe_browsing 737 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698