chrome/browser/safe_browsing/client_side_detection_host.cc - Issue 173133004: Separate pre-classification checks for client-side malware and phishing

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix the service unit-test. Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/safe_browsing/browser_feature_extractor.cc ('K') | « chrome/browser/safe_browsing/client_side_detection_host.h ('k') | chrome/browser/safe_browsing/client_side_detection_host_unittest.cc » ('j') | chrome/browser/safe_browsing/client_side_detection_host_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"	5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"

6	6

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/ref_counted.h"	10 #include "base/memory/ref_counted.h"

11 #include "base/memory/scoped_ptr.h"	11 #include "base/memory/scoped_ptr.h"

12 #include "base/metrics/histogram.h"	12 #include "base/metrics/histogram.h"

13 #include "base/prefs/pref_service.h"	13 #include "base/prefs/pref_service.h"

14 #include "base/sequenced_task_runner_helpers.h"	14 #include "base/sequenced_task_runner_helpers.h"

15 #include "base/strings/utf_string_conversions.h"	15 #include "base/strings/utf_string_conversions.h"

16 #include "chrome/browser/browser_process.h"	16 #include "chrome/browser/browser_process.h"

17 #include "chrome/browser/profiles/profile.h"	17 #include "chrome/browser/profiles/profile.h"

18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"	18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"

19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"	19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"

20 #include "chrome/browser/safe_browsing/database_manager.h"	20 #include "chrome/browser/safe_browsing/database_manager.h"

21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"	21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"

22 #include "chrome/common/chrome_switches.h"	22 #include "chrome/common/chrome_switches.h"
	mattm 2014/03/18 02:19:06 unused? unused? noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > unused? Done.
23 #include "chrome/common/chrome_version_info.h"	23 #include "chrome/common/chrome_version_info.h"
	mattm 2014/03/18 02:19:06 unused? unused? noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > unused? Done.
24 #include "chrome/common/pref_names.h"	24 #include "chrome/common/pref_names.h"

25 #include "chrome/common/safe_browsing/csd.pb.h"	25 #include "chrome/common/safe_browsing/csd.pb.h"

26 #include "chrome/common/safe_browsing/safebrowsing_messages.h"	26 #include "chrome/common/safe_browsing/safebrowsing_messages.h"

27 #include "content/public/browser/browser_thread.h"	27 #include "content/public/browser/browser_thread.h"

28 #include "content/public/browser/navigation_controller.h"	28 #include "content/public/browser/navigation_controller.h"

29 #include "content/public/browser/navigation_details.h"	29 #include "content/public/browser/navigation_details.h"

30 #include "content/public/browser/navigation_entry.h"	30 #include "content/public/browser/navigation_entry.h"

31 #include "content/public/browser/notification_details.h"	31 #include "content/public/browser/notification_details.h"

32 #include "content/public/browser/notification_source.h"	32 #include "content/public/browser/notification_source.h"

33 #include "content/public/browser/notification_types.h"	33 #include "content/public/browser/notification_types.h"

34 #include "content/public/browser/render_process_host.h"	34 #include "content/public/browser/render_process_host.h"

35 #include "content/public/browser/render_view_host.h"	35 #include "content/public/browser/render_view_host.h"

36 #include "content/public/browser/resource_request_details.h"	36 #include "content/public/browser/resource_request_details.h"

37 #include "content/public/browser/web_contents.h"	37 #include "content/public/browser/web_contents.h"

38 #include "content/public/common/frame_navigate_params.h"	38 #include "content/public/common/frame_navigate_params.h"

39 #include "url/gurl.h"	39 #include "url/gurl.h"

40	40

41 using content::BrowserThread;	41 using content::BrowserThread;

42 using content::NavigationEntry;	42 using content::NavigationEntry;

43 using content::ResourceRequestDetails;	43 using content::ResourceRequestDetails;

44 using content::WebContents;	44 using content::WebContents;

45	45

46 namespace safe_browsing {	46 namespace safe_browsing {

47	47

48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;	48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;

49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;	49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;

50	50

51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";	51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";

52	52

	53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;

	54

53 // This class is instantiated each time a new toplevel URL loads, and	55 // This class is instantiated each time a new toplevel URL loads, and

54 // asynchronously checks whether the phishing classifier should run for this	56 // asynchronously checks whether the malware and phishing classifiers should run

55 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC.	57 // for this URL. If so, it notifies the host class by calling the provided

56 // Objects of this class are ref-counted and will be destroyed once nobody	58 // callback form the UI thread. Objects of this class are ref-counted and will

57 // uses it anymore. If \|web_contents\|, \|csd_service\| or \|host\| go away you need	59 // be destroyed once nobody uses it anymore. If \|web_contents\|, \|csd_service\|

58 // to call Cancel(). We keep the \|database_manager\| alive in a ref pointer for	60 // or \|host\| go away you need to call Cancel(). We keep the \|database_manager\|

59 // as long as it takes.	61 // alive in a ref pointer for as long as it takes.

60 class ClientSideDetectionHost::ShouldClassifyUrlRequest	62 class ClientSideDetectionHost::ShouldClassifyUrlRequest

61 : public base::RefCountedThreadSafe<	63 : public base::RefCountedThreadSafe<

62 ClientSideDetectionHost::ShouldClassifyUrlRequest> {	64 ClientSideDetectionHost::ShouldClassifyUrlRequest> {

63 public:	65 public:

64 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,	66 ShouldClassifyUrlRequest(

65 WebContents* web_contents,	67 const content::FrameNavigateParams& params,

66 ClientSideDetectionService* csd_service,	68 const ShouldClassifyUrlCallback& start_phishing_classification,

67 SafeBrowsingDatabaseManager* database_manager,	69 const ShouldClassifyUrlCallback& start_malware_classification,

68 ClientSideDetectionHost* host)	70 WebContents* web_contents,

69 : canceled_(false),	71 ClientSideDetectionService* csd_service,

70 params_(params),	72 SafeBrowsingDatabaseManager* database_manager,

	73 ClientSideDetectionHost* host)

	74 : params_(params),

71 web_contents_(web_contents),	75 web_contents_(web_contents),

72 csd_service_(csd_service),	76 csd_service_(csd_service),

73 database_manager_(database_manager),	77 database_manager_(database_manager),

74 host_(host) {	78 host_(host),

	79 start_phishing_classification_cb_(start_phishing_classification),

	80 start_malware_classification_cb_(start_malware_classification) {

75 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	81 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

76 DCHECK(web_contents_);	82 DCHECK(web_contents_);

77 DCHECK(csd_service_);	83 DCHECK(csd_service_);

78 DCHECK(database_manager_.get());	84 DCHECK(database_manager_.get());

79 DCHECK(host_);	85 DCHECK(host_);

80 }	86 }

81	87

82 void Start() {	88 void Start() {

83 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

84	90

85 // We start by doing some simple checks that can run on the UI thread.	91 // We start by doing some simple checks that can run on the UI thread.

86 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);	92 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);

	93 UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);

87	94

88 // Only classify [X]HTML documents.	95 // Only classify [X]HTML documents.

89 if (params_.contents_mime_type != "text/html" &&	96 if (params_.contents_mime_type != "text/html" &&

90 params_.contents_mime_type != "application/xhtml+xml") {	97 params_.contents_mime_type != "application/xhtml+xml") {

91 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	98 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

92 << " because it has an unsupported MIME type: "	99 << " because it has an unsupported MIME type: "

93 << params_.contents_mime_type;	100 << params_.contents_mime_type;

94 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	101 DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);

95 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

96 NO_CLASSIFY_MAX);

97 return;

98 }	102 }

99	103

100 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {	104 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {

101 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	105 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

102 << " because of hosting on private IP: "	106 << " because of hosting on private IP: "

103 << params_.socket_address.host();	107 << params_.socket_address.host();

104 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	108 DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);

105 NO_CLASSIFY_PRIVATE_IP,	109 DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);

106 NO_CLASSIFY_MAX);

107 return;

108 }	110 }

109	111

110 // Don't run the phishing classifier if the tab is incognito.	112 // Don't run any classifier if the tab is incognito.

111 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {	113 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {

112 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	114 VLOG(1) << "Skipping phishing and malware classification for URL: "

113 << " because we're browsing incognito.";	115 << params_.url << " because we're browsing incognito.";

114 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	116 DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);

115 NO_CLASSIFY_OFF_THE_RECORD,	117 DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);

116 NO_CLASSIFY_MAX);

117

118 return;

119 }	118 }

120	119

121 // We lookup the csd-whitelist before we lookup the cache because	120 // We lookup the csd-whitelist before we lookup the cache because

122 // a URL may have recently been whitelisted. If the URL matches	121 // a URL may have recently been whitelisted. If the URL matches

123 // the csd-whitelist we won't start classification. The	122 // the csd-whitelist we won't phishing start classification. The
	mattm 2014/03/18 02:19:06 word order? word order? noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > word order? Done.
124 // csd-whitelist check has to be done on the IO thread because it	123 // csd-whitelist check has to be done on the IO thread because it

125 // uses the SafeBrowsing service class.	124 // uses the SafeBrowsing service class.

126 BrowserThread::PostTask(	125 if (MaybeClassifyForPhishing() \|\| MaybeClassifyForMalware()) {

127 BrowserThread::IO,	126 BrowserThread::PostTask(

128 FROM_HERE,	127 BrowserThread::IO,

129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,	128 FROM_HERE,

130 this, params_.url));	129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,

	130 this, params_.url));

	131 }

131 }	132 }

132	133

133 void Cancel() {	134 void Cancel() {

134 canceled_ = true;	135 DontClassifyForPhishing(NO_CLASSIFY_CANCEL);

	136 DontClassifyForMalware(NO_CLASSIFY_CANCEL);

135 // Just to make sure we don't do anything stupid we reset all these	137 // Just to make sure we don't do anything stupid we reset all these

136 // pointers except for the safebrowsing service class which may be	138 // pointers except for the safebrowsing service class which may be

137 // accessed by CheckCsdWhitelist().	139 // accessed by CheckCsdWhitelist().

138 web_contents_ = NULL;	140 web_contents_ = NULL;

139 csd_service_ = NULL;	141 csd_service_ = NULL;

140 host_ = NULL;	142 host_ = NULL;

141 }	143 }

142	144

143 private:	145 private:

144 friend class base::RefCountedThreadSafe<	146 friend class base::RefCountedThreadSafe<

145 ClientSideDetectionHost::ShouldClassifyUrlRequest>;	147 ClientSideDetectionHost::ShouldClassifyUrlRequest>;

146	148

147 // Enum used to keep stats about why the pre-classification check failed.	149 // Enum used to keep stats about why the pre-classification check failed.

148 enum PreClassificationCheckFailures {	150 enum PreClassificationCheckFailures {

149 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,	151 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,

150 NO_CLASSIFY_PRIVATE_IP,	152 NO_CLASSIFY_PRIVATE_IP,

151 NO_CLASSIFY_OFF_THE_RECORD,	153 NO_CLASSIFY_OFF_THE_RECORD,

152 NO_CLASSIFY_MATCH_CSD_WHITELIST,	154 NO_CLASSIFY_MATCH_CSD_WHITELIST,

153 NO_CLASSIFY_TOO_MANY_REPORTS,	155 NO_CLASSIFY_TOO_MANY_REPORTS,

154 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,	156 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

	157 NO_CLASSIFY_NO_DATABASE_MANAGER,

	158 NO_CLASSIFY_KILLSWITCH,

	159 NO_CLASSIFY_CANCEL,

	160 NO_CLASSIFY_RESULT_FROM_CACHE,

155	161

156 NO_CLASSIFY_MAX // Always add new values before this one.	162 NO_CLASSIFY_MAX // Always add new values before this one.

157 };	163 };

158	164

159 // The destructor can be called either from the UI or the IO thread.	165 // The destructor can be called either from the UI or the IO thread.

160 virtual ~ShouldClassifyUrlRequest() { }	166 virtual ~ShouldClassifyUrlRequest() { }

161	167

	168 bool MaybeClassifyForPhishing() const {
	mattm 2014/03/18 02:19:06 Calling these MaybeFoo is a bit confusing, usually Calling these MaybeFoo is a bit confusing, usually that means the function method might do something itself. noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > Calling these MaybeFoo is a bit confusing, usually that means the function > method might do something itself. Done.
	169 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	170 return !start_phishing_classification_cb_.is_null();

	171 }

	172

	173 bool MaybeClassifyForMalware() const {

	174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	175 return !start_malware_classification_cb_.is_null();

	176 }

	177

	178 void DontClassifyForPhishing(PreClassificationCheckFailures reason) {

	179 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	180 if (MaybeClassifyForPhishing()) {

	181 // Track the first reason why we stopped classifying for phishing.

	182 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

	183 reason, NO_CLASSIFY_MAX);

	184 VLOG(2) << "Failed phishing pre-classification checks. Reason: "
	mattm 2014/03/18 02:19:06 prefer DVLOG instead of VLOG, unless there is a re prefer DVLOG instead of VLOG, unless there is a reason for VLOG noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > prefer DVLOG instead of VLOG, unless there is a reason for VLOG Done.
	185 << reason;

	186 start_phishing_classification_cb_.Run(false);

	187 }

	188 start_phishing_classification_cb_.Reset();

	189 }

	190

	191 void DontClassifyForMalware(PreClassificationCheckFailures reason) {

	192 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	193 if (MaybeClassifyForMalware()) {

	194 // Track the first reason why we stopped classifying for malware.

	195 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",

	196 reason, NO_CLASSIFY_MAX);

	197 VLOG(2) << "Failed malware pre-classification checks. Reason: "

	198 << reason;

	199 start_malware_classification_cb_.Run(false);

	200 }

	201 start_malware_classification_cb_.Reset();

	202 }

	203

162 void CheckCsdWhitelist(const GURL& url) {	204 void CheckCsdWhitelist(const GURL& url) {
	mattm 2014/03/18 02:19:06 update name update name noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > update name Done.
163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	205 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

164 if (!database_manager_.get() \|\|	206 // We don't want to call the classification callbacks from the IO

165 database_manager_->MatchCsdWhitelistUrl(url)) {	207 // thread so we simply pass the results of this method to CheckCache()

166 // We're done. There is no point in going back to the UI thread.	208 // which is called on the UI thread;

167 VLOG(1) << "Skipping phishing classification for URL: " << url	209 PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;

168 << " because it matches the csd whitelist";	210 PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;

169 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	211 if (!database_manager_.get()) {

170 NO_CLASSIFY_MATCH_CSD_WHITELIST,	212 // We cannot check the Safe Browsing whitelists so we stop here

171 NO_CLASSIFY_MAX);	213 // for safety.

172 return;	214 malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;

	215 } else {

	216 if (database_manager_->MatchCsdWhitelistUrl(url)) {

	217 VLOG(1) << "Skipping phishing classification for URL: " << url

	218 << " because it matches the csd whitelist";

	219 phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;

	220 }

	221 if (database_manager_->IsMalwareKillSwitchOn()) {

	222 malware_reason = NO_CLASSIFY_KILLSWITCH;

	223 }

173 }	224 }

174

175 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();

176

177 BrowserThread::PostTask(	225 BrowserThread::PostTask(

178 BrowserThread::UI,	226 BrowserThread::UI,

179 FROM_HERE,	227 FROM_HERE,

180 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,	228 base::Bind(&ShouldClassifyUrlRequest::CheckCache,

181 malware_killswitch_on));	229 this,

	230 phishing_reason,

	231 malware_reason));

182 }	232 }

183	233

184 void CheckCache(bool malware_killswitch_on) {	234 void CheckCache(PreClassificationCheckFailures phishing_reason,

	235 PreClassificationCheckFailures malware_reason) {

185 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

186 if (canceled_) {	237 if (phishing_reason != NO_CLASSIFY_MAX)

187 return;	238 DontClassifyForPhishing(phishing_reason);

	239 if (malware_reason != NO_CLASSIFY_MAX)

	240 DontClassifyForMalware(malware_reason);

	241 if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {

	242 return; // No point in doing anything else.

188 }	243 }

189	244 // If result is cached, we don't want to run classification again.

190 host_->SetMalwareKillSwitch(malware_killswitch_on);	245 // In that case we're just trying to show the warning.

191 // If result is cached, we don't want to run classification again

192 bool is_phishing;	246 bool is_phishing;

193 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {	247 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {

194 VLOG(1) << "Satisfying request for " << params_.url << " from cache";	248 VLOG(1) << "Satisfying request for " << params_.url << " from cache";

195 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);	249 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);

196 // Since we are already on the UI thread, this is safe.	250 // Since we are already on the UI thread, this is safe.

197 host_->MaybeShowPhishingWarning(params_.url, is_phishing);	251 host_->MaybeShowPhishingWarning(params_.url, is_phishing);

198 return;	252 DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);

199 }	253 }

200	254

201 // We want to limit the number of requests, though we will ignore the	255 // We want to limit the number of requests, though we will ignore the

202 // limit for urls in the cache. We don't want to start classifying	256 // limit for urls in the cache. We don't want to start classifying

203 // too many pages as phishing, but for those that we already think are	257 // too many pages as phishing, but for those that we already think are

204 // phishing we want to give ourselves a chance to fix false positives.	258 // phishing we want to send a request to the server to give ourselves

	259 // a chance to fix misclassifications.

205 if (csd_service_->IsInCache(params_.url)) {	260 if (csd_service_->IsInCache(params_.url)) {

206 VLOG(1) << "Reporting limit skipped for " << params_.url	261 VLOG(1) << "Reporting limit skipped for " << params_.url

207 << " as it was in the cache.";	262 << " as it was in the cache.";

208 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);	263 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);

209 } else if (csd_service_->OverPhishingReportLimit()) {	264 } else if (csd_service_->OverPhishingReportLimit()) {

210 VLOG(1) << "Too many report phishing requests sent recently, "	265 VLOG(1) << "Too many report phishing requests sent recently, "

211 << "not running classification for " << params_.url;	266 << "not running classification for " << params_.url;

212 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	267 DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);

213 NO_CLASSIFY_TOO_MANY_REPORTS,	268 }

214 NO_CLASSIFY_MAX);	269 if (csd_service_->OverMalwareReportLimit()) {

215 return;	270 DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);

216 }	271 }

217	272

218 // Everything checks out, so start classification.	273 // Everything checks out, so start classification.

219 // \|web_contents_\| is safe to call as we will be destructed	274 // \|web_contents_\| is safe to call as we will be destructed

220 // before it is.	275 // before it is.

221 VLOG(1) << "Instruct renderer to start phishing detection for URL: "	276 if (MaybeClassifyForPhishing())

222 << params_.url;	277 start_phishing_classification_cb_.Run(true);

223 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();	278 if (MaybeClassifyForMalware())

224 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(	279 start_malware_classification_cb_.Run(true);

225 rvh->GetRoutingID(), params_.url));

226 }	280 }

227	281

228 // No need to protect \|canceled_\| with a lock because it is only read and

229 // written by the UI thread.

230 bool canceled_;

231 content::FrameNavigateParams params_;	282 content::FrameNavigateParams params_;

232 WebContents* web_contents_;	283 WebContents* web_contents_;

233 ClientSideDetectionService* csd_service_;	284 ClientSideDetectionService* csd_service_;

234 // We keep a ref pointer here just to make sure the safe browsing	285 // We keep a ref pointer here just to make sure the safe browsing

235 // database manager stays alive long enough.	286 // database manager stays alive long enough.

236 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;	287 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;

237 ClientSideDetectionHost* host_;	288 ClientSideDetectionHost* host_;

238	289

	290 ShouldClassifyUrlCallback start_phishing_classification_cb_;

	291 ShouldClassifyUrlCallback start_malware_classification_cb_;

	292

239 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);	293 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);

240 };	294 };

241	295

242 // static	296 // static

243 ClientSideDetectionHost* ClientSideDetectionHost::Create(	297 ClientSideDetectionHost* ClientSideDetectionHost::Create(

244 WebContents* tab) {	298 WebContents* tab) {

245 return new ClientSideDetectionHost(tab);	299 return new ClientSideDetectionHost(tab);

246 }	300 }

247	301

248 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)	302 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)

249 : content::WebContentsObserver(tab),	303 : content::WebContentsObserver(tab),

250 csd_service_(NULL),	304 csd_service_(NULL),

	305 classification_request_(NULL),

	306 should_extract_malware_features_(true),

	307 onload_complete_(false),

251 weak_factory_(this),	308 weak_factory_(this),

252 unsafe_unique_page_id_(-1),	309 unsafe_unique_page_id_(-1) {

253 malware_killswitch_on_(false),

254 malware_report_enabled_(false) {

255 DCHECK(tab);	310 DCHECK(tab);

256 // Note: csd_service_ and sb_service will be NULL here in testing.	311 // Note: csd_service_ and sb_service will be NULL here in testing.

257 csd_service_ = g_browser_process->safe_browsing_detection_service();	312 csd_service_ = g_browser_process->safe_browsing_detection_service();

258 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));	313 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));

259 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,	314 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,

260 content::Source<WebContents>(tab));	315 content::Source<WebContents>(tab));

261	316

262 scoped_refptr<SafeBrowsingService> sb_service =	317 scoped_refptr<SafeBrowsingService> sb_service =

263 g_browser_process->safe_browsing_service();	318 g_browser_process->safe_browsing_service();

264 if (sb_service.get()) {	319 if (sb_service.get()) {

265 ui_manager_ = sb_service->ui_manager();	320 ui_manager_ = sb_service->ui_manager();

266 database_manager_ = sb_service->database_manager();	321 database_manager_ = sb_service->database_manager();

267 ui_manager_->AddObserver(this);	322 ui_manager_->AddObserver(this);

268 }	323 }

269

270 // Only enable the malware bad IP matching and report feature for canary

271 // and dev channel.

272 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();

273 malware_report_enabled_ = (

274 channel == chrome::VersionInfo::CHANNEL_DEV \|\|

275 channel == chrome::VersionInfo::CHANNEL_CANARY);
mattm 2014/03/18 02:19:06 CL description should mention that it is removing CL description should mention that it is removing the channel restriction. noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > CL description should mention that it is removing the channel restriction. Done.
276 }	324 }

277	325

278 ClientSideDetectionHost::~ClientSideDetectionHost() {	326 ClientSideDetectionHost::~ClientSideDetectionHost() {

279 if (ui_manager_.get())	327 if (ui_manager_.get())

280 ui_manager_->RemoveObserver(this);	328 ui_manager_->RemoveObserver(this);

281 }	329 }

282	330

283 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {	331 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {

284 bool handled = true;	332 bool handled = true;

285 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)	333 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)

286 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,	334 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,

287 OnPhishingDetectionDone)	335 OnPhishingDetectionDone)

288 IPC_MESSAGE_UNHANDLED(handled = false)	336 IPC_MESSAGE_UNHANDLED(handled = false)

289 IPC_END_MESSAGE_MAP()	337 IPC_END_MESSAGE_MAP()

290 return handled;	338 return handled;

291 }	339 }

292	340

293 void ClientSideDetectionHost::DidNavigateMainFrame(	341 void ClientSideDetectionHost::DidNavigateMainFrame(

294 const content::LoadCommittedDetails& details,	342 const content::LoadCommittedDetails& details,

295 const content::FrameNavigateParams& params) {	343 const content::FrameNavigateParams& params) {

296 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests	344 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests

297 // that don't call this method on the UI thread.	345 // that don't call this method on the UI thread.

298 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	346 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

299 if (details.is_in_page) {	347 if (details.is_in_page) {

300 // If the navigation is within the same page, the user isn't really	348 // If the navigation is within the same page, the user isn't really

301 // navigating away. We don't need to cancel a pending callback or	349 // navigating away. We don't need to cancel a pending callback or

302 // begin a new classification.	350 // begin a new classification.

303 return;	351 return;

304 }	352 }

	353 // Cancel any pending classification request.

	354 if (classification_request_.get()) {

	355 classification_request_->Cancel();

	356 }

305 // If we navigate away and there currently is a pending phishing	357 // If we navigate away and there currently is a pending phishing

306 // report request we have to cancel it to make sure we don't display	358 // report request we have to cancel it to make sure we don't display

307 // an interstitial for the wrong page. Note that this won't cancel	359 // an interstitial for the wrong page. Note that this won't cancel

308 // the server ping back but only cancel the showing of the	360 // the server ping back but only cancel the showing of the

309 // interstial.	361 // interstial.

310 weak_factory_.InvalidateWeakPtrs();	362 weak_factory_.InvalidateWeakPtrs();

311	363

312 if (!csd_service_) {	364 if (!csd_service_) {

313 return;	365 return;

314 }	366 }

315

316 // Cancel any pending classification request.

317 if (classification_request_.get()) {

318 classification_request_->Cancel();

319 }

320 browse_info_.reset(new BrowseInfo);	367 browse_info_.reset(new BrowseInfo);

321	368

322 // Store redirect chain information.	369 // Store redirect chain information.

323 if (params.url.host() != cur_host_) {	370 if (params.url.host() != cur_host_) {

324 cur_host_ = params.url.host();	371 cur_host_ = params.url.host();

325 cur_host_redirects_ = params.redirects;	372 cur_host_redirects_ = params.redirects;

326 }	373 }

	374 browse_info_->url = params.url;

327 browse_info_->host_redirects = cur_host_redirects_;	375 browse_info_->host_redirects = cur_host_redirects_;

328 browse_info_->url_redirects = params.redirects;	376 browse_info_->url_redirects = params.redirects;

329 browse_info_->referrer = params.referrer.url;	377 browse_info_->referrer = params.referrer.url;

330 browse_info_->http_status_code = details.http_status_code;	378 browse_info_->http_status_code = details.http_status_code;

	379 browse_info_->page_id = params.page_id;

331	380

332 // Notify the renderer if it should classify this URL.	381 should_extract_malware_features_ = true;

	382 should_classify_for_malware_.reset();

	383 onload_complete_ = false;

	384

	385 // Check whether we can cassify the current URL for phishing or malware.

333 classification_request_ = new ShouldClassifyUrlRequest(	386 classification_request_ = new ShouldClassifyUrlRequest(

334 params, web_contents(), csd_service_, database_manager_.get(), this);	387 params,

	388 base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,

	389 weak_factory_.GetWeakPtr()),

	390 base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,

	391 weak_factory_.GetWeakPtr()),

	392 web_contents(), csd_service_, database_manager_.get(), this);

335 classification_request_->Start();	393 classification_request_->Start();

336 }	394 }

337	395

338 void ClientSideDetectionHost::OnSafeBrowsingHit(	396 void ClientSideDetectionHost::OnSafeBrowsingHit(

339 const SafeBrowsingUIManager::UnsafeResource& resource) {	397 const SafeBrowsingUIManager::UnsafeResource& resource) {

340 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())	398 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())

341 return;	399 return;

342	400

343 // Check that the hit is either malware or phishing.	401 // Check that the hit is either malware or phishing.

344 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&	402 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {	465 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {

408 DCHECK(tab);	466 DCHECK(tab);

409 // Tell any pending classification request that it is being canceled.	467 // Tell any pending classification request that it is being canceled.

410 if (classification_request_.get()) {	468 if (classification_request_.get()) {

411 classification_request_->Cancel();	469 classification_request_->Cancel();

412 }	470 }

413 // Cancel all pending feature extractions.	471 // Cancel all pending feature extractions.

414 feature_extractor_.reset();	472 feature_extractor_.reset();

415 }	473 }

416	474

	475 void ClientSideDetectionHost::OnPhishingPreClassificationDone(

	476 bool should_classify) {

	477 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	478 if (browse_info_.get() && should_classify) {

	479 VLOG(1) << "Instruct renderer to start phishing detection for URL: "

	480 << browse_info_->url;

	481 content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();

	482 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(

	483 rvh->GetRoutingID(), browse_info_->url));

	484 }

	485 }

	486

	487 void ClientSideDetectionHost::OnMalwarePreClassificationDone(

	488 bool should_classify) {

	489 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	490 // If classification checks failed we should stop extracting malware features.

	491 VLOG(2) << "Malware pre-classification checks done. Should classify: "

	492 << should_classify;

	493 should_extract_malware_features_ = should_classify;

	494 should_classify_for_malware_.reset(new bool(should_classify));

	495 MaybeStartMalwareFeatureExtraction();

	496 }

	497

	498 void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(

	499 int32 page_id) {

	500 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	501 if (!csd_service_ \|\| !browse_info_.get())

	502 return;

	503 VLOG(2) << "Main frame onload hander called.";

	504 if (browse_info_->page_id != page_id) {

	505 // Something weird is happening here. The BrowseInfo page ID

	506 // should always be the same as the most recent load.

	507 UMA_HISTOGRAM_COUNTS("SBClientMalware.UnexpectedPageId", 1);

	508 return;

	509 }

	510 onload_complete_ = true;

	511 MaybeStartMalwareFeatureExtraction();

	512 }

	513

	514 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {

	515 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	516 if (csd_service_ && browse_info_.get() &&

	517 should_classify_for_malware_.get() &&

	518 *should_classify_for_malware_ &&

	519 onload_complete_) {

	520 scoped_ptr<ClientMalwareRequest> malware_verdict(
	mattm 2014/03/18 02:19:06 malware_request? malware_request? noé 2014/03/20 17:01:45 Done. Show quoted text On 2014/03/18 02:19:06, mattm wrote: > malware_request? Done.
	521 new ClientMalwareRequest);

	522 // Start browser-side malware feature extraction. Once we're done it will

	523 // send the malware client verdict request.

	524 malware_verdict->set_url(browse_info_->url.spec());

	525 const GURL& referrer = browse_info_->referrer;

	526 if (referrer.SchemeIs("http")) { // Only send http urls.

	527 malware_verdict->set_referrer_url(referrer.spec());

	528 }

	529 // This function doesn't expect browse_info_ to stay around after this

	530 // function returns.

	531 feature_extractor_->ExtractMalwareFeatures(

	532 browse_info_.get(),

	533 malware_verdict.release(),

	534 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

	535 weak_factory_.GetWeakPtr()));

	536 }

	537 }

	538

417 void ClientSideDetectionHost::OnPhishingDetectionDone(	539 void ClientSideDetectionHost::OnPhishingDetectionDone(

418 const std::string& verdict_str) {	540 const std::string& verdict_str) {

419 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	541 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

420 // There is something seriously wrong if there is no service class but	542 // There is something seriously wrong if there is no service class but

421 // this method is called. The renderer should not start phishing detection	543 // this method is called. The renderer should not start phishing detection

422 // if there isn't any service class in the browser.	544 // if there isn't any service class in the browser.

423 DCHECK(csd_service_);	545 DCHECK(csd_service_);

424 // There shouldn't be any pending requests because we revoke them everytime

425 // we navigate away.

426 DCHECK(!weak_factory_.HasWeakPtrs());

427 DCHECK(browse_info_.get());	546 DCHECK(browse_info_.get());

428	547

429 // We parse the protocol buffer here. If we're unable to parse it we won't	548 // We parse the protocol buffer here. If we're unable to parse it we won't

430 // send the verdict further.	549 // send the verdict further.

431 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);	550 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);

432 if (csd_service_ &&	551 if (csd_service_ &&

433 !weak_factory_.HasWeakPtrs() &&

434 browse_info_.get() &&	552 browse_info_.get() &&

435 verdict->ParseFromString(verdict_str) &&	553 verdict->ParseFromString(verdict_str) &&

436 verdict->IsInitialized()) {	554 verdict->IsInitialized()) {

437 // We do the malware IP matching and request sending if the feature

438 // is enabled.

439 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {

440 scoped_ptr<ClientMalwareRequest> malware_verdict(

441 new ClientMalwareRequest);

442 // Start browser-side malware feature extraction. Once we're done it will

443 // send the malware client verdict request.

444 malware_verdict->set_url(verdict->url());

445 const GURL& referrer = browse_info_->referrer;

446 if (referrer.SchemeIs("http")) { // Only send http urls.

447 malware_verdict->set_referrer_url(referrer.spec());

448 }

449 // This function doesn't expect browse_info_ to stay around after this

450 // function returns.

451 feature_extractor_->ExtractMalwareFeatures(

452 browse_info_.get(),

453 malware_verdict.release(),

454 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

455 weak_factory_.GetWeakPtr()));

456 }

457

458 // We only send phishing verdict to the server if the verdict is phishing or	555 // We only send phishing verdict to the server if the verdict is phishing or

459 // if a SafeBrowsing interstitial was already shown for this site. E.g., a	556 // if a SafeBrowsing interstitial was already shown for this site. E.g., a

460 // malware or phishing interstitial was shown but the user clicked	557 // malware or phishing interstitial was shown but the user clicked

461 // through.	558 // through.

462 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {	559 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {

463 if (DidShowSBInterstitial()) {	560 if (DidShowSBInterstitial()) {

464 browse_info_->unsafe_resource.reset(unsafe_resource_.release());	561 browse_info_->unsafe_resource.reset(unsafe_resource_.release());

465 }	562 }

466 // Start browser-side feature extraction. Once we're done it will send	563 // Start browser-side feature extraction. Once we're done it will send

467 // the client verdict request.	564 // the client verdict request.

468 feature_extractor_->ExtractFeatures(	565 feature_extractor_->ExtractFeatures(

469 browse_info_.get(),	566 browse_info_.get(),

470 verdict.release(),	567 verdict.release(),

471 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,	568 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,

472 weak_factory_.GetWeakPtr()));	569 weak_factory_.GetWeakPtr()));

473 }	570 }

474 }	571 }

475 browse_info_.reset();

476 }	572 }

477	573

478 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,	574 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,

479 bool is_phishing) {	575 bool is_phishing) {

480 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	576 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

481 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url	577 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url

482 << " is_phishing:" << is_phishing;	578 << " is_phishing:" << is_phishing;

483 if (is_phishing) {	579 if (is_phishing) {

484 DCHECK(web_contents());	580 DCHECK(web_contents());

485 if (ui_manager_.get()) {	581 if (ui_manager_.get()) {

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
594 }	690 }

595	691

596 void ClientSideDetectionHost::Observe(	692 void ClientSideDetectionHost::Observe(

597 int type,	693 int type,

598 const content::NotificationSource& source,	694 const content::NotificationSource& source,

599 const content::NotificationDetails& details) {	695 const content::NotificationDetails& details) {

600 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	696 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

601 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);	697 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);

602 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(	698 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(

603 details).ptr();	699 details).ptr();

604 if (req && browse_info_.get() && malware_report_enabled_ &&	700 if (req && browse_info_.get() &&

605 !MalwareKillSwitchIsOn()) {	701 should_extract_malware_features_ && req->url.is_valid()) {

606 if (req->url.is_valid()) {	702 UpdateIPUrlMap(req->socket_address.host() /* ip */,

607 UpdateIPUrlMap(req->socket_address.host() /* ip */,	703 req->url.spec() /* url */,

608 req->url.spec() /* url */,	704 req->method,

609 req->method,	705 req->referrer,

610 req->referrer,	706 req->resource_type);

611 req->resource_type);

612 }

613 }	707 }

614 }	708 }

615	709

616 bool ClientSideDetectionHost::DidShowSBInterstitial() const {	710 bool ClientSideDetectionHost::DidShowSBInterstitial() const {

617 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {	711 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {

618 return false;	712 return false;

619 }	713 }

620 const NavigationEntry* nav_entry =	714 const NavigationEntry* nav_entry =

621 web_contents()->GetController().GetActiveEntry();	715 web_contents()->GetController().GetActiveEntry();

622 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);	716 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);

(...skipping 10 matching lines...) Expand all Loading...
633 if (ui_manager_.get())	727 if (ui_manager_.get())

634 ui_manager_->RemoveObserver(this);	728 ui_manager_->RemoveObserver(this);

635	729

636 ui_manager_ = ui_manager;	730 ui_manager_ = ui_manager;

637 if (ui_manager)	731 if (ui_manager)

638 ui_manager_->AddObserver(this);	732 ui_manager_->AddObserver(this);

639	733

640 database_manager_ = database_manager;	734 database_manager_ = database_manager;

641 }	735 }

642	736

643 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {

644 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

645 return malware_killswitch_on_;

646 }

647

648 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {

649 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

650 malware_killswitch_on_ = killswitch_on;

651 }

652

653 } // namespace safe_browsing	737 } // namespace safe_browsing

OLD	NEW