chrome/browser/safe_browsing/client_side_detection_host.cc - Issue 173133004: Separate pre-classification checks for client-side malware and phishing

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/safe_browsing/client_side_detection_host.h ('K') | « chrome/browser/safe_browsing/client_side_detection_host.h ('k') | chrome/browser/safe_browsing/client_side_detection_service.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"	5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"

6	6

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/ref_counted.h"	10 #include "base/memory/ref_counted.h"

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 using content::ResourceRequestDetails;	43 using content::ResourceRequestDetails;

44 using content::WebContents;	44 using content::WebContents;

45	45

46 namespace safe_browsing {	46 namespace safe_browsing {

47	47

48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;	48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;

49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;	49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;

50	50

51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";	51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";

52	52

	53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;

	54

53 // This class is instantiated each time a new toplevel URL loads, and	55 // This class is instantiated each time a new toplevel URL loads, and

54 // asynchronously checks whether the phishing classifier should run for this	56 // asynchronously checks whether the malware and phishing classifiers should run

55 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC.	57 // for this URL. If so, it notifies the host class by calling the provided

56 // Objects of this class are ref-counted and will be destroyed once nobody	58 // callback form the UI thread. Objects of this class are ref-counted and will

57 // uses it anymore. If \|web_contents\|, \|csd_service\| or \|host\| go away you need	59 // be destroyed once nobody uses it anymore. If \|web_contents\|, \|csd_service\|

58 // to call Cancel(). We keep the \|database_manager\| alive in a ref pointer for	60 // or \|host\| go away you need to call Cancel(). We keep the \|database_manager\|

59 // as long as it takes.	61 // alive in a ref pointer for as long as it takes.

60 class ClientSideDetectionHost::ShouldClassifyUrlRequest	62 class ClientSideDetectionHost::ShouldClassifyUrlRequest

61 : public base::RefCountedThreadSafe<	63 : public base::RefCountedThreadSafe<

62 ClientSideDetectionHost::ShouldClassifyUrlRequest> {	64 ClientSideDetectionHost::ShouldClassifyUrlRequest> {

63 public:	65 public:

64 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,	66 ShouldClassifyUrlRequest(

65 WebContents* web_contents,	67 const content::FrameNavigateParams& params,

66 ClientSideDetectionService* csd_service,	68 const ShouldClassifyUrlCallback& start_phishing_classification,

67 SafeBrowsingDatabaseManager* database_manager,	69 const ShouldClassifyUrlCallback& start_malware_classification,

68 ClientSideDetectionHost* host)	70 WebContents* web_contents,

69 : canceled_(false),	71 ClientSideDetectionService* csd_service,

70 params_(params),	72 SafeBrowsingDatabaseManager* database_manager,

	73 ClientSideDetectionHost* host)

	74 : params_(params),

71 web_contents_(web_contents),	75 web_contents_(web_contents),

72 csd_service_(csd_service),	76 csd_service_(csd_service),

73 database_manager_(database_manager),	77 database_manager_(database_manager),

74 host_(host) {	78 host_(host),

	79 start_phishing_classification_cb_(start_phishing_classification),

	80 start_malware_classification_cb_(start_malware_classification) {

75 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	81 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

76 DCHECK(web_contents_);	82 DCHECK(web_contents_);

77 DCHECK(csd_service_);	83 DCHECK(csd_service_);

78 DCHECK(database_manager_.get());	84 DCHECK(database_manager_.get());

79 DCHECK(host_);	85 DCHECK(host_);

80 }	86 }

81	87

82 void Start() {	88 void Start() {

83 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

84	90

85 // We start by doing some simple checks that can run on the UI thread.	91 // We start by doing some simple checks that can run on the UI thread.

86 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);	92 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);

	93 UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);
	mattm 2014/02/21 00:35:29 Don't forget to update histograms.xml (it's in the Don't forget to update histograms.xml (it's in the same repo now so you don't need a separate CL) noé 2014/02/21 19:04:16 Done. Show quoted text On 2014/02/21 00:35:29, mattm wrote: > Don't forget to update histograms.xml (it's in the same repo now so you don't > need a separate CL) Done.
87	94

88 // Only classify [X]HTML documents.	95 // Only classify [X]HTML documents.

89 if (params_.contents_mime_type != "text/html" &&	96 if (params_.contents_mime_type != "text/html" &&

90 params_.contents_mime_type != "application/xhtml+xml") {	97 params_.contents_mime_type != "application/xhtml+xml" &&

	98 MaybeClassifyForPhishing()) {
	mattm 2014/02/21 00:35:29 It is confusing that some of the tests like this o It is confusing that some of the tests like this one check the MaybeClassifyForX() as part of the condition, whereas others just depend on the DontClassifyForX doing the check internally. noé 2014/02/21 19:04:16 That's a leftover. We shouldn't need to check May Show quoted text On 2014/02/21 00:35:29, mattm wrote: > It is confusing that some of the tests like this one check the > MaybeClassifyForX() as part of the condition, whereas others just depend on the > DontClassifyForX doing the check internally. That's a leftover. We shouldn't need to check Maybe* anywhere except to know whether we should continue at all. For example, right before we post a new task to the IO thread.
91 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	99 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

92 << " because it has an unsupported MIME type: "	100 << " because it has an unsupported MIME type: "

93 << params_.contents_mime_type;	101 << params_.contents_mime_type;

94 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	102 DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);

95 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

96 NO_CLASSIFY_MAX);

97 return;

98 }	103 }

99	104

100 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {	105 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {

101 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

102 << " because of hosting on private IP: "	107 << " because of hosting on private IP: "

103 << params_.socket_address.host();	108 << params_.socket_address.host();

104 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	109 DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);

105 NO_CLASSIFY_PRIVATE_IP,

106 NO_CLASSIFY_MAX);

107 return;

108 }	110 }

109	111

110 // Don't run the phishing classifier if the tab is incognito.	112 // Don't run any classifier if the tab is incognito.

111 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {	113 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {

112 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	114 VLOG(1) << "Skipping phishing and malware classification for URL: "

113 << " because we're browsing incognito.";	115 << params_.url << " because we're browsing incognito.";

114 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	116 DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);

115 NO_CLASSIFY_OFF_THE_RECORD,	117 DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);

116 NO_CLASSIFY_MAX);

117

118 return;

119 }	118 }

120	119

121 // We lookup the csd-whitelist before we lookup the cache because	120 // We lookup the csd-whitelist before we lookup the cache because

122 // a URL may have recently been whitelisted. If the URL matches	121 // a URL may have recently been whitelisted. If the URL matches

123 // the csd-whitelist we won't start classification. The	122 // the csd-whitelist we won't phishing start classification. The

124 // csd-whitelist check has to be done on the IO thread because it	123 // csd-whitelist check has to be done on the IO thread because it

125 // uses the SafeBrowsing service class.	124 // uses the SafeBrowsing service class.

126 BrowserThread::PostTask(	125 if (MaybeClassifyForPhishing() \|\| MaybeClassifyForMalware()) {

127 BrowserThread::IO,	126 BrowserThread::PostTask(

128 FROM_HERE,	127 BrowserThread::IO,

129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,	128 FROM_HERE,

130 this, params_.url));	129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,

	130 this, params_.url));

	131 }

131 }	132 }

132	133

133 void Cancel() {	134 void Cancel() {

134 canceled_ = true;	135 DontClassifyForPhishing(NO_CLASSIFY_CANCEL);

	136 DontClassifyForMalware(NO_CLASSIFY_CANCEL);

135 // Just to make sure we don't do anything stupid we reset all these	137 // Just to make sure we don't do anything stupid we reset all these

136 // pointers except for the safebrowsing service class which may be	138 // pointers except for the safebrowsing service class which may be

137 // accessed by CheckCsdWhitelist().	139 // accessed by CheckCsdWhitelist().

138 web_contents_ = NULL;	140 web_contents_ = NULL;

139 csd_service_ = NULL;	141 csd_service_ = NULL;

140 host_ = NULL;	142 host_ = NULL;

141 }	143 }

142	144

143 private:	145 private:

144 friend class base::RefCountedThreadSafe<	146 friend class base::RefCountedThreadSafe<

145 ClientSideDetectionHost::ShouldClassifyUrlRequest>;	147 ClientSideDetectionHost::ShouldClassifyUrlRequest>;

146	148

147 // Enum used to keep stats about why the pre-classification check failed.	149 // Enum used to keep stats about why the pre-classification check failed.

148 enum PreClassificationCheckFailures {	150 enum PreClassificationCheckFailures {

149 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,	151 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,

150 NO_CLASSIFY_PRIVATE_IP,	152 NO_CLASSIFY_PRIVATE_IP,

151 NO_CLASSIFY_OFF_THE_RECORD,	153 NO_CLASSIFY_OFF_THE_RECORD,

152 NO_CLASSIFY_MATCH_CSD_WHITELIST,	154 NO_CLASSIFY_MATCH_CSD_WHITELIST,

153 NO_CLASSIFY_TOO_MANY_REPORTS,	155 NO_CLASSIFY_TOO_MANY_REPORTS,

154 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,	156 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

	157 NO_CLASSIFY_NO_DATABASE_MANAGER,

	158 NO_CLASSIFY_KILLSWITCH,

	159 NO_CLASSIFY_CANCEL,

	160 NO_CLASSIFY_RESULT_FROM_CACHE,

155	161

156 NO_CLASSIFY_MAX // Always add new values before this one.	162 NO_CLASSIFY_MAX // Always add new values before this one.

157 };	163 };

158	164

159 // The destructor can be called either from the UI or the IO thread.	165 // The destructor can be called either from the UI or the IO thread.

160 virtual ~ShouldClassifyUrlRequest() { }	166 virtual ~ShouldClassifyUrlRequest() { }

161	167

	168 bool MaybeClassifyForPhishing() const {

	169 return !start_phishing_classification_cb_.is_null();

	170 }

	171

	172 bool MaybeClassifyForMalware() const {

	173 return !start_malware_classification_cb_.is_null();

	174 }

	175

	176 void DontClassifyForPhishing(PreClassificationCheckFailures reason) {

	177 if (MaybeClassifyForPhishing()) {

	178 // Track the first reason why we stopped classifying for phishing.

	179 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

	180 reason, NO_CLASSIFY_MAX);

	181 start_phishing_classification_cb_.Run(false);

	182 }

	183 start_phishing_classification_cb_.Reset();

	184 }

	185

	186 void DontClassifyForMalware(PreClassificationCheckFailures reason) {

	187 if (MaybeClassifyForMalware()) {

	188 // Track the first reason why we stopped classifying for phishing.
	mattm 2014/02/21 00:35:29 s/phishing/malware/ s/phishing/malware/ noé 2014/02/21 19:04:16 Done. Show quoted text On 2014/02/21 00:35:29, mattm wrote: > s/phishing/malware/ Done.
	189 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",

	190 reason, NO_CLASSIFY_MAX);

	191 start_malware_classification_cb_.Run(false);

	192 }

	193 start_malware_classification_cb_.Reset();

	194 }

	195

162 void CheckCsdWhitelist(const GURL& url) {	196 void CheckCsdWhitelist(const GURL& url) {

163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	197 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

164 if (!database_manager_.get() \|\|	198 // We don't want to call the classification callbacks from the IO

165 database_manager_->MatchCsdWhitelistUrl(url)) {	199 // thread so we simply pass the results of this method to CheckCache()

166 // We're done. There is no point in going back to the UI thread.	200 // which is called on the UI thread;

167 VLOG(1) << "Skipping phishing classification for URL: " << url	201 PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;

168 << " because it matches the csd whitelist";	202 PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;

169 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	203 if (!database_manager_.get()) {

170 NO_CLASSIFY_MATCH_CSD_WHITELIST,	204 // We cannot check the Safe Browsing whitelists so we stop here

171 NO_CLASSIFY_MAX);	205 // for safety.

	206 malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;

172 return;	207 return;

	208 } else {

	209 if (database_manager_->MatchCsdWhitelistUrl(url)) {

	210 VLOG(1) << "Skipping phishing classification for URL: " << url

	211 << " because it matches the csd whitelist";

	212 phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;

	213 }

	214 if (database_manager_->IsMalwareKillSwitchOn()) {

	215 malware_reason = NO_CLASSIFY_KILLSWITCH;

	216 }

173 }	217 }

174

175 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();

176

177 BrowserThread::PostTask(	218 BrowserThread::PostTask(

178 BrowserThread::UI,	219 BrowserThread::UI,

179 FROM_HERE,	220 FROM_HERE,

180 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,	221 base::Bind(&ShouldClassifyUrlRequest::CheckCache,

181 malware_killswitch_on));	222 this,

	223 phishing_reason,

	224 malware_reason));

182 }	225 }

183	226

184 void CheckCache(bool malware_killswitch_on) {	227 void CheckCache(PreClassificationCheckFailures phishing_reason,

	228 PreClassificationCheckFailures malware_reason) {

185 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	229 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

186 if (canceled_) {	230 if (phishing_reason != NO_CLASSIFY_MAX)

187 return;	231 DontClassifyForPhishing(phishing_reason);

	232 if (malware_reason != NO_CLASSIFY_MAX)

	233 DontClassifyForMalware(malware_reason);

	234 if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {

	235 return; // No point in doing anything else.

188 }	236 }

189	237 // If result is cached, we don't want to run classification again.

190 host_->SetMalwareKillSwitch(malware_killswitch_on);	238 // In that case we're just trying to show the warning.

191 // If result is cached, we don't want to run classification again

192 bool is_phishing;	239 bool is_phishing;

193 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {	240 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {

194 VLOG(1) << "Satisfying request for " << params_.url << " from cache";	241 VLOG(1) << "Satisfying request for " << params_.url << " from cache";

195 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);	242 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);

196 // Since we are already on the UI thread, this is safe.	243 // Since we are already on the UI thread, this is safe.

197 host_->MaybeShowPhishingWarning(params_.url, is_phishing);	244 host_->MaybeShowPhishingWarning(params_.url, is_phishing);

198 return;	245 DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);

199 }	246 }

200	247

201 // We want to limit the number of requests, though we will ignore the	248 // We want to limit the number of requests, though we will ignore the

202 // limit for urls in the cache. We don't want to start classifying	249 // limit for urls in the cache. We don't want to start classifying

203 // too many pages as phishing, but for those that we already think are	250 // too many pages as phishing, but for those that we already think are

204 // phishing we want to give ourselves a chance to fix false positives.	251 // phishing we want to give ourselves a chance to fix false positives.

205 if (csd_service_->IsInCache(params_.url)) {	252 if (csd_service_->IsInCache(params_.url)) {

206 VLOG(1) << "Reporting limit skipped for " << params_.url	253 VLOG(1) << "Reporting limit skipped for " << params_.url

207 << " as it was in the cache.";	254 << " as it was in the cache.";

208 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);	255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);

209 } else if (csd_service_->OverPhishingReportLimit()) {	256 } else if (csd_service_->OverPhishingReportLimit()) {

210 VLOG(1) << "Too many report phishing requests sent recently, "	257 VLOG(1) << "Too many report phishing requests sent recently, "

211 << "not running classification for " << params_.url;	258 << "not running classification for " << params_.url;

212 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	259 DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);

213 NO_CLASSIFY_TOO_MANY_REPORTS,	260 }

214 NO_CLASSIFY_MAX);	261 if (csd_service_->OverMalwareReportLimit()) {

215 return;	262 DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);

216 }	263 }

217	264

218 // Everything checks out, so start classification.	265 // Everything checks out, so start classification.

219 // \|web_contents_\| is safe to call as we will be destructed	266 // \|web_contents_\| is safe to call as we will be destructed

220 // before it is.	267 // before it is.

221 VLOG(1) << "Instruct renderer to start phishing detection for URL: "	268 if (MaybeClassifyForPhishing())

222 << params_.url;	269 start_phishing_classification_cb_.Run(true);

223 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();	270 if (MaybeClassifyForMalware())

224 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(	271 start_malware_classification_cb_.Run(true);

225 rvh->GetRoutingID(), params_.url));

226 }	272 }

227	273

228 // No need to protect \|canceled_\| with a lock because it is only read and

229 // written by the UI thread.

230 bool canceled_;

231 content::FrameNavigateParams params_;	274 content::FrameNavigateParams params_;

232 WebContents* web_contents_;	275 WebContents* web_contents_;

233 ClientSideDetectionService* csd_service_;	276 ClientSideDetectionService* csd_service_;

234 // We keep a ref pointer here just to make sure the safe browsing	277 // We keep a ref pointer here just to make sure the safe browsing

235 // database manager stays alive long enough.	278 // database manager stays alive long enough.

236 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;	279 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;

237 ClientSideDetectionHost* host_;	280 ClientSideDetectionHost* host_;

238	281

	282 ShouldClassifyUrlCallback start_phishing_classification_cb_;

	283 ShouldClassifyUrlCallback start_malware_classification_cb_;

	284

239 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);	285 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);

240 };	286 };

241	287

242 // static	288 // static

243 ClientSideDetectionHost* ClientSideDetectionHost::Create(	289 ClientSideDetectionHost* ClientSideDetectionHost::Create(

244 WebContents* tab) {	290 WebContents* tab) {

245 return new ClientSideDetectionHost(tab);	291 return new ClientSideDetectionHost(tab);

246 }	292 }

247	293

248 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)	294 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)

249 : content::WebContentsObserver(tab),	295 : content::WebContentsObserver(tab),

250 csd_service_(NULL),	296 csd_service_(NULL),

	297 should_extract_malware_features_(true),

	298 should_classify_for_malware_(false),

251 weak_factory_(this),	299 weak_factory_(this),

252 unsafe_unique_page_id_(-1),	300 unsafe_unique_page_id_(-1) {

253 malware_killswitch_on_(false),

254 malware_report_enabled_(false) {

255 DCHECK(tab);	301 DCHECK(tab);

256 // Note: csd_service_ and sb_service will be NULL here in testing.	302 // Note: csd_service_ and sb_service will be NULL here in testing.

257 csd_service_ = g_browser_process->safe_browsing_detection_service();	303 csd_service_ = g_browser_process->safe_browsing_detection_service();

258 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));	304 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));

259 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,	305 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,

260 content::Source<WebContents>(tab));	306 content::Source<WebContents>(tab));

261	307

262 scoped_refptr<SafeBrowsingService> sb_service =	308 scoped_refptr<SafeBrowsingService> sb_service =

263 g_browser_process->safe_browsing_service();	309 g_browser_process->safe_browsing_service();

264 if (sb_service.get()) {	310 if (sb_service.get()) {

265 ui_manager_ = sb_service->ui_manager();	311 ui_manager_ = sb_service->ui_manager();

266 database_manager_ = sb_service->database_manager();	312 database_manager_ = sb_service->database_manager();

267 ui_manager_->AddObserver(this);	313 ui_manager_->AddObserver(this);

268 }	314 }

269

270 // Only enable the malware bad IP matching and report feature for canary

271 // and dev channel.

272 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();

273 malware_report_enabled_ = (

274 channel == chrome::VersionInfo::CHANNEL_DEV \|\|

275 channel == chrome::VersionInfo::CHANNEL_CANARY);

276 }	315 }

277	316

278 ClientSideDetectionHost::~ClientSideDetectionHost() {	317 ClientSideDetectionHost::~ClientSideDetectionHost() {

279 if (ui_manager_.get())	318 if (ui_manager_.get())

280 ui_manager_->RemoveObserver(this);	319 ui_manager_->RemoveObserver(this);

281 }	320 }

282	321

283 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {	322 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {

284 bool handled = true;	323 bool handled = true;

285 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)	324 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
317 if (classification_request_.get()) {	356 if (classification_request_.get()) {

318 classification_request_->Cancel();	357 classification_request_->Cancel();

319 }	358 }

320 browse_info_.reset(new BrowseInfo);	359 browse_info_.reset(new BrowseInfo);

321	360

322 // Store redirect chain information.	361 // Store redirect chain information.

323 if (params.url.host() != cur_host_) {	362 if (params.url.host() != cur_host_) {

324 cur_host_ = params.url.host();	363 cur_host_ = params.url.host();

325 cur_host_redirects_ = params.redirects;	364 cur_host_redirects_ = params.redirects;

326 }	365 }

	366 browse_info_->url = params.url;

327 browse_info_->host_redirects = cur_host_redirects_;	367 browse_info_->host_redirects = cur_host_redirects_;

328 browse_info_->url_redirects = params.redirects;	368 browse_info_->url_redirects = params.redirects;

329 browse_info_->referrer = params.referrer.url;	369 browse_info_->referrer = params.referrer.url;

330 browse_info_->http_status_code = details.http_status_code;	370 browse_info_->http_status_code = details.http_status_code;

	371 browse_info_->page_id = params.page_id;

331	372

332 // Notify the renderer if it should classify this URL.	373 // Check whether we can cassify the current URL for phishing or malware.

333 classification_request_ = new ShouldClassifyUrlRequest(	374 classification_request_ = new ShouldClassifyUrlRequest(

334 params, web_contents(), csd_service_, database_manager_.get(), this);	375 params,

	376 base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,

	377 weak_factory_.GetWeakPtr()),

	378 base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,

	379 weak_factory_.GetWeakPtr()),

	380 web_contents(), csd_service_, database_manager_.get(), this);

335 classification_request_->Start();	381 classification_request_->Start();

336 }	382 }

337	383

338 void ClientSideDetectionHost::OnSafeBrowsingHit(	384 void ClientSideDetectionHost::OnSafeBrowsingHit(

339 const SafeBrowsingUIManager::UnsafeResource& resource) {	385 const SafeBrowsingUIManager::UnsafeResource& resource) {

340 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())	386 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())

341 return;	387 return;

342	388

343 // Check that the hit is either malware or phishing.	389 // Check that the hit is either malware or phishing.

344 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&	390 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {	453 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {

408 DCHECK(tab);	454 DCHECK(tab);

409 // Tell any pending classification request that it is being canceled.	455 // Tell any pending classification request that it is being canceled.

410 if (classification_request_.get()) {	456 if (classification_request_.get()) {

411 classification_request_->Cancel();	457 classification_request_->Cancel();

412 }	458 }

413 // Cancel all pending feature extractions.	459 // Cancel all pending feature extractions.

414 feature_extractor_.reset();	460 feature_extractor_.reset();

415 }	461 }

416	462

	463 void ClientSideDetectionHost::OnPhishingPreClassificationDone(

	464 bool should_classify) {

	465 if (browse_info_.get() && should_classify) {

	466 VLOG(1) << "Instruct renderer to start phishing detection for URL: "

	467 << browse_info_->url;

	468 content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();

	469 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(

	470 rvh->GetRoutingID(), browse_info_->url));

	471 }

	472 }

	473

	474 void ClientSideDetectionHost::OnMalwarePreClassificationDone(

	475 bool should_classify) {

	476 // If classification checks failed we should stop extracting malware features.

	477 should_extract_malware_features_ = should_classify;

	478 should_classify_for_malware_ = should_classify;
	mattm 2014/02/21 00:35:29 I think there are going to be some races between t I think there are going to be some races between this callback, ClientSideDetectionHost::Observe, and DocumentOnLoadCompletedInMainFrame. noé 2014/02/21 19:04:16 Good point. Created a separate method with a memb Show quoted text On 2014/02/21 00:35:29, mattm wrote: > I think there are going to be some races between this callback, > ClientSideDetectionHost::Observe, and DocumentOnLoadCompletedInMainFrame. Good point. Created a separate method with a member variable that keeps the state around.
	479 }

	480

	481 void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(

	482 int32 page_id) {
	mattm 2014/02/21 00:35:29 Add thread dcheck (probably in more other methods Add thread dcheck (probably in more other methods too) noé 2014/02/21 19:04:16 Done. Show quoted text On 2014/02/21 00:35:29, mattm wrote: > Add thread dcheck (probably in more other methods too) Done.
	483 if (!browse_info_.get()) {

	484 // This only happens if OnPhishingDetectionDone is called before the

	485 // document is done loading. Phishing detection takes longer than

	486 // that except when there is an error.
	mattm 2014/02/21 00:35:29 Not super clear, but it sounds like if there are s Not super clear, but it sounds like if there are slow loading resources or the page's onload handler takes an arbitrarily long amount of time, this could happen? Seems like it would be better to just only clear the browse_info when both are done with it. noé 2014/02/21 19:04:16 Removed that part. Is there a help class to do re Show quoted text On 2014/02/21 00:35:29, mattm wrote: > Not super clear, but it sounds like if there are slow loading resources or the > page's onload handler takes an arbitrarily long amount of time, this could > happen? Seems like it would be better to just only clear the browse_info when > both are done with it. Removed that part. Is there a help class to do reference counting by hand? I'd rather avoid yet another flag or member variable to track that both feature extractions are done. Any thoughts? Right now, I decided not to delete the browse info. It's a pretty small object and it will be deleted once a new page loads. mattm 2014/02/22 02:49:03 Don't think there's too much like that. Could make Show quoted text On 2014/02/21 19:04:16, noé wrote: > On 2014/02/21 00:35:29, mattm wrote: > > Not super clear, but it sounds like if there are slow loading resources or the > > page's onload handler takes an arbitrarily long amount of time, this could > > happen? Seems like it would be better to just only clear the browse_info when > > both are done with it. > > Removed that part. > > Is there a help class to do reference counting by hand? I'd rather avoid yet > another flag or member variable to track that both feature extractions are done. > Any thoughts? Right now, I decided not to delete the browse info. It's a > pretty small object and it will be deleted once a new page loads. Don't think there's too much like that. Could make it a RefCounted object and keep separate refptrs for the malware and phishing. I guess there is the RefCountedData wrapper but I'm not sure that is much better than just making it RefCounted (and RefCountedData is thread-safe which isn't necessary here.) noé 2014/03/14 22:21:32 RefCounted isn't a good fit here. The object is a Show quoted text On 2014/02/22 02:49:03, mattm wrote: > On 2014/02/21 19:04:16, noé wrote: > > On 2014/02/21 00:35:29, mattm wrote: > > > Not super clear, but it sounds like if there are slow loading resources or > the > > > page's onload handler takes an arbitrarily long amount of time, this could > > > happen? Seems like it would be better to just only clear the browse_info > when > > > both are done with it. > > > > Removed that part. > > > > Is there a help class to do reference counting by hand? I'd rather avoid yet > > another flag or member variable to track that both feature extractions are > done. > > Any thoughts? Right now, I decided not to delete the browse info. It's a > > pretty small object and it will be deleted once a new page loads. > > Don't think there's too much like that. Could make it a RefCounted object and > keep separate refptrs for the malware and phishing. > > I guess there is the RefCountedData wrapper but I'm not sure that is much better > than just making it RefCounted (and RefCountedData is thread-safe which isn't > necessary here.) > RefCounted isn't a good fit here. The object is a member of the host and should be deleted once both malware and phishing classification is done. Since I can't manually increment the reference counter it's not that helpful. I think the object is small enough to keep it around.
	487 UMA_HISTOGRAM_COUNTS("SBClientMalware.MissingBrowseInfo", 1);

	488 return;

	489 }

	490 if (browse_info_->page_id == page_id &&
	mattm 2014/02/21 00:35:29 Is there an expected case where the pageids don't Is there an expected case where the pageids don't match? Should there be a dcheck or uma log ? noé 2014/02/21 19:04:16 Added a UMA stat for that instead. Show quoted text On 2014/02/21 00:35:29, mattm wrote: > Is there an expected case where the pageids don't match? Should there be a > dcheck or uma log ? Added a UMA stat for that instead.
	491 should_classify_for_malware_) {

	492 scoped_ptr<ClientMalwareRequest> malware_verdict(

	493 new ClientMalwareRequest);

	494 // Start browser-side malware feature extraction. Once we're done it will

	495 // send the malware client verdict request.

	496 malware_verdict->set_url(browse_info_->url.spec());

	497 const GURL& referrer = browse_info_->referrer;

	498 if (referrer.SchemeIs("http")) { // Only send http urls.

	499 malware_verdict->set_referrer_url(referrer.spec());

	500 }

	501 // This function doesn't expect browse_info_ to stay around after this

	502 // function returns.

	503 feature_extractor_->ExtractMalwareFeatures(

	504 browse_info_.get(),

	505 malware_verdict.release(),

	506 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

	507 weak_factory_.GetWeakPtr()));

	508 }

	509 }

	510

417 void ClientSideDetectionHost::OnPhishingDetectionDone(	511 void ClientSideDetectionHost::OnPhishingDetectionDone(

418 const std::string& verdict_str) {	512 const std::string& verdict_str) {

419 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	513 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

420 // There is something seriously wrong if there is no service class but	514 // There is something seriously wrong if there is no service class but

421 // this method is called. The renderer should not start phishing detection	515 // this method is called. The renderer should not start phishing detection

422 // if there isn't any service class in the browser.	516 // if there isn't any service class in the browser.

423 DCHECK(csd_service_);	517 DCHECK(csd_service_);

424 // There shouldn't be any pending requests because we revoke them everytime	518 // There shouldn't be any pending requests because we revoke them everytime

425 // we navigate away.	519 // we navigate away.

426 DCHECK(!weak_factory_.HasWeakPtrs());	520 DCHECK(!weak_factory_.HasWeakPtrs());

427 DCHECK(browse_info_.get());	521 DCHECK(browse_info_.get());

428	522

429 // We parse the protocol buffer here. If we're unable to parse it we won't	523 // We parse the protocol buffer here. If we're unable to parse it we won't

430 // send the verdict further.	524 // send the verdict further.

431 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);	525 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);

432 if (csd_service_ &&	526 if (csd_service_ &&

433 !weak_factory_.HasWeakPtrs() &&	527 !weak_factory_.HasWeakPtrs() &&

434 browse_info_.get() &&	528 browse_info_.get() &&

435 verdict->ParseFromString(verdict_str) &&	529 verdict->ParseFromString(verdict_str) &&

436 verdict->IsInitialized()) {	530 verdict->IsInitialized()) {

437 // We do the malware IP matching and request sending if the feature

438 // is enabled.

439 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {

440 scoped_ptr<ClientMalwareRequest> malware_verdict(

441 new ClientMalwareRequest);

442 // Start browser-side malware feature extraction. Once we're done it will

443 // send the malware client verdict request.

444 malware_verdict->set_url(verdict->url());

445 const GURL& referrer = browse_info_->referrer;

446 if (referrer.SchemeIs("http")) { // Only send http urls.

447 malware_verdict->set_referrer_url(referrer.spec());

448 }

449 // This function doesn't expect browse_info_ to stay around after this

450 // function returns.

451 feature_extractor_->ExtractMalwareFeatures(

452 browse_info_.get(),

453 malware_verdict.release(),

454 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

455 weak_factory_.GetWeakPtr()));

456 }

457

458 // We only send phishing verdict to the server if the verdict is phishing or	531 // We only send phishing verdict to the server if the verdict is phishing or

459 // if a SafeBrowsing interstitial was already shown for this site. E.g., a	532 // if a SafeBrowsing interstitial was already shown for this site. E.g., a

460 // malware or phishing interstitial was shown but the user clicked	533 // malware or phishing interstitial was shown but the user clicked

461 // through.	534 // through.

462 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {	535 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {

463 if (DidShowSBInterstitial()) {	536 if (DidShowSBInterstitial()) {

464 browse_info_->unsafe_resource.reset(unsafe_resource_.release());	537 browse_info_->unsafe_resource.reset(unsafe_resource_.release());

465 }	538 }

466 // Start browser-side feature extraction. Once we're done it will send	539 // Start browser-side feature extraction. Once we're done it will send

467 // the client verdict request.	540 // the client verdict request.

468 feature_extractor_->ExtractFeatures(	541 feature_extractor_->ExtractFeatures(

469 browse_info_.get(),	542 browse_info_.get(),

470 verdict.release(),	543 verdict.release(),

471 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,	544 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,

472 weak_factory_.GetWeakPtr()));	545 weak_factory_.GetWeakPtr()));

473 }	546 }

474 }	547 }

	548 // It's safe to delete the browse info here because we're not going to further

	549 // need it. This method is typically called after

	550 // DocumentOnLoadCompletedInMainFrame which is the last moment the malware

	551 // classifier needs access to the browse info.

475 browse_info_.reset();	552 browse_info_.reset();

476 }	553 }

477	554

478 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,	555 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,

479 bool is_phishing) {	556 bool is_phishing) {

480 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	557 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

481 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url	558 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url

482 << " is_phishing:" << is_phishing;	559 << " is_phishing:" << is_phishing;

483 if (is_phishing) {	560 if (is_phishing) {

484 DCHECK(web_contents());	561 DCHECK(web_contents());

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
594 }	671 }

595	672

596 void ClientSideDetectionHost::Observe(	673 void ClientSideDetectionHost::Observe(

597 int type,	674 int type,

598 const content::NotificationSource& source,	675 const content::NotificationSource& source,

599 const content::NotificationDetails& details) {	676 const content::NotificationDetails& details) {

600 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	677 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

601 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);	678 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);

602 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(	679 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(

603 details).ptr();	680 details).ptr();

604 if (req && browse_info_.get() && malware_report_enabled_ &&	681 if (req && browse_info_.get() &&

605 !MalwareKillSwitchIsOn()) {	682 should_extract_malware_features_ && req->url.is_valid()) {

606 if (req->url.is_valid()) {	683 UpdateIPUrlMap(req->socket_address.host() /* ip */,

607 UpdateIPUrlMap(req->socket_address.host() /* ip */,	684 req->url.spec() /* url */,

608 req->url.spec() /* url */,	685 req->method,

609 req->method,	686 req->referrer,

610 req->referrer,	687 req->resource_type);

611 req->resource_type);

612 }

613 }	688 }

614 }	689 }

615	690

616 bool ClientSideDetectionHost::DidShowSBInterstitial() const {	691 bool ClientSideDetectionHost::DidShowSBInterstitial() const {

617 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {	692 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {

618 return false;	693 return false;

619 }	694 }

620 const NavigationEntry* nav_entry =	695 const NavigationEntry* nav_entry =

621 web_contents()->GetController().GetActiveEntry();	696 web_contents()->GetController().GetActiveEntry();

622 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);	697 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);

(...skipping 10 matching lines...) Expand all Loading...
633 if (ui_manager_.get())	708 if (ui_manager_.get())

634 ui_manager_->RemoveObserver(this);	709 ui_manager_->RemoveObserver(this);

635	710

636 ui_manager_ = ui_manager;	711 ui_manager_ = ui_manager;

637 if (ui_manager)	712 if (ui_manager)

638 ui_manager_->AddObserver(this);	713 ui_manager_->AddObserver(this);

639	714

640 database_manager_ = database_manager;	715 database_manager_ = database_manager;

641 }	716 }

642	717

643 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {

644 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

645 return malware_killswitch_on_;

646 }

647

648 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {

649 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

650 malware_killswitch_on_ = killswitch_on;

651 }

652

653 } // namespace safe_browsing	718 } // namespace safe_browsing

OLD	NEW