chrome/browser/safe_browsing/client_side_detection_host.cc - Issue 173133004: Separate pre-classification checks for client-side malware and phishing

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Remove done() Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/browser/safe_browsing/client_side_detection_host.h ('k') | chrome/browser/safe_browsing/client_side_detection_service.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"	5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"

6	6

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/ref_counted.h"	10 #include "base/memory/ref_counted.h"

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 using content::ResourceRequestDetails;	43 using content::ResourceRequestDetails;

44 using content::WebContents;	44 using content::WebContents;

45	45

46 namespace safe_browsing {	46 namespace safe_browsing {

47	47

48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;	48 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;

49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;	49 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;

50	50

51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";	51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";

52	52

	53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;

	54

53 // This class is instantiated each time a new toplevel URL loads, and	55 // This class is instantiated each time a new toplevel URL loads, and

54 // asynchronously checks whether the phishing classifier should run for this	56 // asynchronously checks whether the malware and phishing classifiers should run

55 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC.	57 // for this URL. If so, it notifies the host class by calling the provided

56 // Objects of this class are ref-counted and will be destroyed once nobody	58 // callback form the UI thread. Objects of this class are ref-counted and will

57 // uses it anymore. If \|web_contents\|, \|csd_service\| or \|host\| go away you need	59 // be destroyed once nobody uses it anymore. If \|web_contents\|, \|csd_service\|

58 // to call Cancel(). We keep the \|database_manager\| alive in a ref pointer for	60 // or \|host\| go away you need to call Cancel(). We keep the \|database_manager\|

59 // as long as it takes.	61 // alive in a ref pointer for as long as it takes.

60 class ClientSideDetectionHost::ShouldClassifyUrlRequest	62 class ClientSideDetectionHost::ShouldClassifyUrlRequest

61 : public base::RefCountedThreadSafe<	63 : public base::RefCountedThreadSafe<

62 ClientSideDetectionHost::ShouldClassifyUrlRequest> {	64 ClientSideDetectionHost::ShouldClassifyUrlRequest> {

63 public:	65 public:

64 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,	66 ShouldClassifyUrlRequest(

65 WebContents* web_contents,	67 const content::FrameNavigateParams& params,

66 ClientSideDetectionService* csd_service,	68 const ShouldClassifyUrlCallback& start_phishing_classification,

67 SafeBrowsingDatabaseManager* database_manager,	69 const ShouldClassifyUrlCallback& start_malware_classification,

68 ClientSideDetectionHost* host)	70 WebContents* web_contents,

69 : canceled_(false),	71 ClientSideDetectionService* csd_service,

70 params_(params),	72 SafeBrowsingDatabaseManager* database_manager,

	73 ClientSideDetectionHost* host)

	74 : params_(params),

71 web_contents_(web_contents),	75 web_contents_(web_contents),

72 csd_service_(csd_service),	76 csd_service_(csd_service),

73 database_manager_(database_manager),	77 database_manager_(database_manager),

74 host_(host) {	78 host_(host),

	79 start_phishing_classification_cb_(start_phishing_classification),

	80 start_malware_classification_cb_(start_malware_classification) {

75 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	81 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

76 DCHECK(web_contents_);	82 DCHECK(web_contents_);

77 DCHECK(csd_service_);	83 DCHECK(csd_service_);

78 DCHECK(database_manager_.get());	84 DCHECK(database_manager_.get());

79 DCHECK(host_);	85 DCHECK(host_);

80 }	86 }

81	87

82 void Start() {	88 void Start() {

83 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

84	90

85 // We start by doing some simple checks that can run on the UI thread.	91 // We start by doing some simple checks that can run on the UI thread.

86 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);	92 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);

	93 UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);

87	94

88 // Only classify [X]HTML documents.	95 // Only classify [X]HTML documents.

89 if (params_.contents_mime_type != "text/html" &&	96 if (params_.contents_mime_type != "text/html" &&

90 params_.contents_mime_type != "application/xhtml+xml") {	97 params_.contents_mime_type != "application/xhtml+xml") {

91 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	98 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

92 << " because it has an unsupported MIME type: "	99 << " because it has an unsupported MIME type: "

93 << params_.contents_mime_type;	100 << params_.contents_mime_type;

94 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	101 DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);

95 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

96 NO_CLASSIFY_MAX);

97 return;

98 }	102 }

99	103

100 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {	104 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {

101 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	105 VLOG(1) << "Skipping phishing classification for URL: " << params_.url

102 << " because of hosting on private IP: "	106 << " because of hosting on private IP: "

103 << params_.socket_address.host();	107 << params_.socket_address.host();

104 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	108 DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);

105 NO_CLASSIFY_PRIVATE_IP,

106 NO_CLASSIFY_MAX);

107 return;

108 }	109 }

109	110

110 // Don't run the phishing classifier if the tab is incognito.	111 // Don't run any classifier if the tab is incognito.

111 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {	112 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {

112 VLOG(1) << "Skipping phishing classification for URL: " << params_.url	113 VLOG(1) << "Skipping phishing and malware classification for URL: "

113 << " because we're browsing incognito.";	114 << params_.url << " because we're browsing incognito.";

114 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	115 DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);

115 NO_CLASSIFY_OFF_THE_RECORD,	116 DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);

116 NO_CLASSIFY_MAX);

117

118 return;

119 }	117 }

120	118

121 // We lookup the csd-whitelist before we lookup the cache because	119 // We lookup the csd-whitelist before we lookup the cache because

122 // a URL may have recently been whitelisted. If the URL matches	120 // a URL may have recently been whitelisted. If the URL matches

123 // the csd-whitelist we won't start classification. The	121 // the csd-whitelist we won't phishing start classification. The

124 // csd-whitelist check has to be done on the IO thread because it	122 // csd-whitelist check has to be done on the IO thread because it

125 // uses the SafeBrowsing service class.	123 // uses the SafeBrowsing service class.

126 BrowserThread::PostTask(	124 if (MaybeClassifyForPhishing() \|\| MaybeClassifyForMalware()) {

127 BrowserThread::IO,	125 BrowserThread::PostTask(

128 FROM_HERE,	126 BrowserThread::IO,

129 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,	127 FROM_HERE,

130 this, params_.url));	128 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,

	129 this, params_.url));

	130 }

131 }	131 }

132	132

133 void Cancel() {	133 void Cancel() {

134 canceled_ = true;	134 DontClassifyForPhishing(NO_CLASSIFY_CANCEL);

	135 DontClassifyForMalware(NO_CLASSIFY_CANCEL);

135 // Just to make sure we don't do anything stupid we reset all these	136 // Just to make sure we don't do anything stupid we reset all these

136 // pointers except for the safebrowsing service class which may be	137 // pointers except for the safebrowsing service class which may be

137 // accessed by CheckCsdWhitelist().	138 // accessed by CheckCsdWhitelist().

138 web_contents_ = NULL;	139 web_contents_ = NULL;

139 csd_service_ = NULL;	140 csd_service_ = NULL;

140 host_ = NULL;	141 host_ = NULL;

141 }	142 }

142	143

143 private:	144 private:

144 friend class base::RefCountedThreadSafe<	145 friend class base::RefCountedThreadSafe<

145 ClientSideDetectionHost::ShouldClassifyUrlRequest>;	146 ClientSideDetectionHost::ShouldClassifyUrlRequest>;

146	147

147 // Enum used to keep stats about why the pre-classification check failed.	148 // Enum used to keep stats about why the pre-classification check failed.

148 enum PreClassificationCheckFailures {	149 enum PreClassificationCheckFailures {

149 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,	150 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,

150 NO_CLASSIFY_PRIVATE_IP,	151 NO_CLASSIFY_PRIVATE_IP,

151 NO_CLASSIFY_OFF_THE_RECORD,	152 NO_CLASSIFY_OFF_THE_RECORD,

152 NO_CLASSIFY_MATCH_CSD_WHITELIST,	153 NO_CLASSIFY_MATCH_CSD_WHITELIST,

153 NO_CLASSIFY_TOO_MANY_REPORTS,	154 NO_CLASSIFY_TOO_MANY_REPORTS,

154 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,	155 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

	156 NO_CLASSIFY_NO_DATABASE_MANAGER,

	157 NO_CLASSIFY_KILLSWITCH,

	158 NO_CLASSIFY_CANCEL,

	159 NO_CLASSIFY_RESULT_FROM_CACHE,

155	160

156 NO_CLASSIFY_MAX // Always add new values before this one.	161 NO_CLASSIFY_MAX // Always add new values before this one.

157 };	162 };

158	163

159 // The destructor can be called either from the UI or the IO thread.	164 // The destructor can be called either from the UI or the IO thread.

160 virtual ~ShouldClassifyUrlRequest() { }	165 virtual ~ShouldClassifyUrlRequest() { }

161	166

	167 bool MaybeClassifyForPhishing() const {

	168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	169 return !start_phishing_classification_cb_.is_null();

	170 }

	171

	172 bool MaybeClassifyForMalware() const {

	173 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	174 return !start_malware_classification_cb_.is_null();

	175 }

	176

	177 void DontClassifyForPhishing(PreClassificationCheckFailures reason) {

	178 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	179 if (MaybeClassifyForPhishing()) {

	180 // Track the first reason why we stopped classifying for phishing.

	181 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

	182 reason, NO_CLASSIFY_MAX);

	183 start_phishing_classification_cb_.Run(false);

	184 }

	185 start_phishing_classification_cb_.Reset();

	186 }

	187

	188 void DontClassifyForMalware(PreClassificationCheckFailures reason) {

	189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	190 if (MaybeClassifyForMalware()) {

	191 // Track the first reason why we stopped classifying for malware.

	192 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",

	193 reason, NO_CLASSIFY_MAX);

	194 start_malware_classification_cb_.Run(false);

	195 }

	196 start_malware_classification_cb_.Reset();

	197 }

	198

162 void CheckCsdWhitelist(const GURL& url) {	199 void CheckCsdWhitelist(const GURL& url) {

163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	200 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

164 if (!database_manager_.get() \|\|	201 // We don't want to call the classification callbacks from the IO

165 database_manager_->MatchCsdWhitelistUrl(url)) {	202 // thread so we simply pass the results of this method to CheckCache()

166 // We're done. There is no point in going back to the UI thread.	203 // which is called on the UI thread;

167 VLOG(1) << "Skipping phishing classification for URL: " << url	204 PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;

168 << " because it matches the csd whitelist";	205 PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;

169 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	206 if (!database_manager_.get()) {

170 NO_CLASSIFY_MATCH_CSD_WHITELIST,	207 // We cannot check the Safe Browsing whitelists so we stop here

171 NO_CLASSIFY_MAX);	208 // for safety.

172 return;	209 malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;

	210 } else {

	211 if (database_manager_->MatchCsdWhitelistUrl(url)) {

	212 VLOG(1) << "Skipping phishing classification for URL: " << url

	213 << " because it matches the csd whitelist";

	214 phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;

	215 }

	216 if (database_manager_->IsMalwareKillSwitchOn()) {

	217 malware_reason = NO_CLASSIFY_KILLSWITCH;

	218 }

173 }	219 }

174

175 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();

176

177 BrowserThread::PostTask(	220 BrowserThread::PostTask(

178 BrowserThread::UI,	221 BrowserThread::UI,

179 FROM_HERE,	222 FROM_HERE,

180 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,	223 base::Bind(&ShouldClassifyUrlRequest::CheckCache,

181 malware_killswitch_on));	224 this,

	225 phishing_reason,

	226 malware_reason));

182 }	227 }

183	228

184 void CheckCache(bool malware_killswitch_on) {	229 void CheckCache(PreClassificationCheckFailures phishing_reason,

	230 PreClassificationCheckFailures malware_reason) {

185 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

186 if (canceled_) {	232 if (phishing_reason != NO_CLASSIFY_MAX)

187 return;	233 DontClassifyForPhishing(phishing_reason);

	234 if (malware_reason != NO_CLASSIFY_MAX)

	235 DontClassifyForMalware(malware_reason);

	236 if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {

	237 return; // No point in doing anything else.

188 }	238 }

189	239 // If result is cached, we don't want to run classification again.

190 host_->SetMalwareKillSwitch(malware_killswitch_on);	240 // In that case we're just trying to show the warning.

191 // If result is cached, we don't want to run classification again

192 bool is_phishing;	241 bool is_phishing;

193 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {	242 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {

194 VLOG(1) << "Satisfying request for " << params_.url << " from cache";	243 VLOG(1) << "Satisfying request for " << params_.url << " from cache";

195 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);	244 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);

196 // Since we are already on the UI thread, this is safe.	245 // Since we are already on the UI thread, this is safe.

197 host_->MaybeShowPhishingWarning(params_.url, is_phishing);	246 host_->MaybeShowPhishingWarning(params_.url, is_phishing);

198 return;	247 DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);

199 }	248 }

200	249

201 // We want to limit the number of requests, though we will ignore the	250 // We want to limit the number of requests, though we will ignore the

202 // limit for urls in the cache. We don't want to start classifying	251 // limit for urls in the cache. We don't want to start classifying

203 // too many pages as phishing, but for those that we already think are	252 // too many pages as phishing, but for those that we already think are

204 // phishing we want to give ourselves a chance to fix false positives.	253 // phishing we want to give ourselves a chance to fix false positives.
	mattm 2014/02/22 02:49:03 Guess it's not really related to this CL, but this Guess it's not really related to this CL, but this comment seems confusing. Maybe I'm not clear on what "fix false positives" means. Seems like it's really something more like "if we had a cached result but it's too old, always re-check it". Another interesting thing is if the classifier doesn't think it's phishing, the entry in the cache won't actually be updated, so this would keep checking until the cache entry gets expired. (If you agree with any of that, you don't need to fix in this CL.) noé 2014/03/14 22:21:32 If the cached value is phishing we want to re-send Show quoted text On 2014/02/22 02:49:03, mattm wrote: > Guess it's not really related to this CL, but this comment seems confusing. > Maybe I'm not clear on what "fix false positives" means. > > Seems like it's really something more like "if we had a cached result but it's > too old, always re-check it". If the cached value is phishing we want to re-send a server ping to make sure it's still phishing. That's how we fix false positives. Changed the comment slightly. Show quoted text > Another interesting thing is if the classifier doesn't think it's phishing, the > entry in the cache won't actually be updated, so this would keep checking until > the cache entry gets expired. > > (If you agree with any of that, you don't need to fix in this CL.) That's a good observation. I don't think we want to cache any URL that isn't classified as phishing on the client side. Otherwise, the cache will blow up quickly. The client verdict is unlikely to change from phishing to non-phishing unless the model changes so I think we're OK here.
205 if (csd_service_->IsInCache(params_.url)) {	254 if (csd_service_->IsInCache(params_.url)) {

206 VLOG(1) << "Reporting limit skipped for " << params_.url	255 VLOG(1) << "Reporting limit skipped for " << params_.url

207 << " as it was in the cache.";	256 << " as it was in the cache.";

208 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);	257 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);

209 } else if (csd_service_->OverPhishingReportLimit()) {	258 } else if (csd_service_->OverPhishingReportLimit()) {

210 VLOG(1) << "Too many report phishing requests sent recently, "	259 VLOG(1) << "Too many report phishing requests sent recently, "

211 << "not running classification for " << params_.url;	260 << "not running classification for " << params_.url;

212 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",	261 DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);

213 NO_CLASSIFY_TOO_MANY_REPORTS,	262 }

214 NO_CLASSIFY_MAX);	263 if (csd_service_->OverMalwareReportLimit()) {

215 return;	264 DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);

216 }	265 }

217	266

218 // Everything checks out, so start classification.	267 // Everything checks out, so start classification.

219 // \|web_contents_\| is safe to call as we will be destructed	268 // \|web_contents_\| is safe to call as we will be destructed

220 // before it is.	269 // before it is.

221 VLOG(1) << "Instruct renderer to start phishing detection for URL: "	270 if (MaybeClassifyForPhishing())

222 << params_.url;	271 start_phishing_classification_cb_.Run(true);

223 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();	272 if (MaybeClassifyForMalware())

224 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(	273 start_malware_classification_cb_.Run(true);

225 rvh->GetRoutingID(), params_.url));

226 }	274 }

227	275

228 // No need to protect \|canceled_\| with a lock because it is only read and

229 // written by the UI thread.

230 bool canceled_;

231 content::FrameNavigateParams params_;	276 content::FrameNavigateParams params_;

232 WebContents* web_contents_;	277 WebContents* web_contents_;

233 ClientSideDetectionService* csd_service_;	278 ClientSideDetectionService* csd_service_;

234 // We keep a ref pointer here just to make sure the safe browsing	279 // We keep a ref pointer here just to make sure the safe browsing

235 // database manager stays alive long enough.	280 // database manager stays alive long enough.

236 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;	281 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;

237 ClientSideDetectionHost* host_;	282 ClientSideDetectionHost* host_;

238	283

	284 ShouldClassifyUrlCallback start_phishing_classification_cb_;

	285 ShouldClassifyUrlCallback start_malware_classification_cb_;

	286

239 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);	287 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);

240 };	288 };

241	289

242 // static	290 // static

243 ClientSideDetectionHost* ClientSideDetectionHost::Create(	291 ClientSideDetectionHost* ClientSideDetectionHost::Create(

244 WebContents* tab) {	292 WebContents* tab) {

245 return new ClientSideDetectionHost(tab);	293 return new ClientSideDetectionHost(tab);

246 }	294 }

247	295

248 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)	296 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)

249 : content::WebContentsObserver(tab),	297 : content::WebContentsObserver(tab),

250 csd_service_(NULL),	298 csd_service_(NULL),

	299 classification_request_(NULL),

	300 should_extract_malware_features_(true),

	301 onload_complete_(false),

251 weak_factory_(this),	302 weak_factory_(this),

252 unsafe_unique_page_id_(-1),	303 unsafe_unique_page_id_(-1) {

253 malware_killswitch_on_(false),

254 malware_report_enabled_(false) {

255 DCHECK(tab);	304 DCHECK(tab);

256 // Note: csd_service_ and sb_service will be NULL here in testing.	305 // Note: csd_service_ and sb_service will be NULL here in testing.

257 csd_service_ = g_browser_process->safe_browsing_detection_service();	306 csd_service_ = g_browser_process->safe_browsing_detection_service();

258 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));	307 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));

259 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,	308 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,

260 content::Source<WebContents>(tab));	309 content::Source<WebContents>(tab));

261	310

262 scoped_refptr<SafeBrowsingService> sb_service =	311 scoped_refptr<SafeBrowsingService> sb_service =

263 g_browser_process->safe_browsing_service();	312 g_browser_process->safe_browsing_service();

264 if (sb_service.get()) {	313 if (sb_service.get()) {

265 ui_manager_ = sb_service->ui_manager();	314 ui_manager_ = sb_service->ui_manager();

266 database_manager_ = sb_service->database_manager();	315 database_manager_ = sb_service->database_manager();

267 ui_manager_->AddObserver(this);	316 ui_manager_->AddObserver(this);

268 }	317 }

269

270 // Only enable the malware bad IP matching and report feature for canary

271 // and dev channel.

272 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();

273 malware_report_enabled_ = (

274 channel == chrome::VersionInfo::CHANNEL_DEV \|\|

275 channel == chrome::VersionInfo::CHANNEL_CANARY);

276 }	318 }

277	319

278 ClientSideDetectionHost::~ClientSideDetectionHost() {	320 ClientSideDetectionHost::~ClientSideDetectionHost() {

279 if (ui_manager_.get())	321 if (ui_manager_.get())

280 ui_manager_->RemoveObserver(this);	322 ui_manager_->RemoveObserver(this);

281 }	323 }

282	324

283 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {	325 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {

284 bool handled = true;	326 bool handled = true;

285 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)	327 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)

(...skipping 14 matching lines...) Expand all Loading...
300 // If the navigation is within the same page, the user isn't really	342 // If the navigation is within the same page, the user isn't really

301 // navigating away. We don't need to cancel a pending callback or	343 // navigating away. We don't need to cancel a pending callback or

302 // begin a new classification.	344 // begin a new classification.

303 return;	345 return;

304 }	346 }

305 // If we navigate away and there currently is a pending phishing	347 // If we navigate away and there currently is a pending phishing

306 // report request we have to cancel it to make sure we don't display	348 // report request we have to cancel it to make sure we don't display

307 // an interstitial for the wrong page. Note that this won't cancel	349 // an interstitial for the wrong page. Note that this won't cancel

308 // the server ping back but only cancel the showing of the	350 // the server ping back but only cancel the showing of the

309 // interstial.	351 // interstial.

310 weak_factory_.InvalidateWeakPtrs();	352 weak_factory_.InvalidateWeakPtrs();
	mattm 2014/02/22 02:49:03 I can't remember, does invalidating weak pointers I can't remember, does invalidating weak pointers make the callback.is_null() return true? If so you might need to move the classification_request_->Cancel(); above this. noé 2014/03/14 22:21:32 Done. Show quoted text On 2014/02/22 02:49:03, mattm wrote: > I can't remember, does invalidating weak pointers make the callback.is_null() > return true? If so you might need to move the classification_request_->Cancel(); > above this. Done.
311	353

312 if (!csd_service_) {	354 if (!csd_service_) {

313 return;	355 return;

314 }	356 }

315	357

316 // Cancel any pending classification request.	358 // Cancel any pending classification request.

317 if (classification_request_.get()) {	359 if (classification_request_.get()) {

318 classification_request_->Cancel();	360 classification_request_->Cancel();

319 }	361 }

320 browse_info_.reset(new BrowseInfo);	362 browse_info_.reset(new BrowseInfo);

321	363

322 // Store redirect chain information.	364 // Store redirect chain information.

323 if (params.url.host() != cur_host_) {	365 if (params.url.host() != cur_host_) {

324 cur_host_ = params.url.host();	366 cur_host_ = params.url.host();

325 cur_host_redirects_ = params.redirects;	367 cur_host_redirects_ = params.redirects;

326 }	368 }

	369 browse_info_->url = params.url;

327 browse_info_->host_redirects = cur_host_redirects_;	370 browse_info_->host_redirects = cur_host_redirects_;

328 browse_info_->url_redirects = params.redirects;	371 browse_info_->url_redirects = params.redirects;

329 browse_info_->referrer = params.referrer.url;	372 browse_info_->referrer = params.referrer.url;

330 browse_info_->http_status_code = details.http_status_code;	373 browse_info_->http_status_code = details.http_status_code;

	374 browse_info_->page_id = params.page_id;

331	375

332 // Notify the renderer if it should classify this URL.	376 should_extract_malware_features_ = true;

	377 should_classify_for_malware_.reset();
	mattm 2014/02/22 02:49:03 Seems like a plain bool initialized to false here Seems like a plain bool initialized to false here would achieve the same effect. noé 2014/03/14 22:21:32 MaybeStartMalwareFeatureExtraction() needs to know Show quoted text On 2014/02/22 02:49:03, mattm wrote: > Seems like a plain bool initialized to false here would achieve the same effect. MaybeStartMalwareFeatureExtraction() needs to know whether or not OnMalwarePreClassificationDone() was called already. It does so by checking whether that flag is NULL or not before checking its actual value. mattm 2014/03/18 02:19:06 But the only thing it actually checks is "should_c Show quoted text On 2014/03/14 22:21:32, noé wrote: > On 2014/02/22 02:49:03, mattm wrote: > > Seems like a plain bool initialized to false here would achieve the same > effect. > > MaybeStartMalwareFeatureExtraction() needs to know whether or not > OnMalwarePreClassificationDone() was called already. It does so by checking > whether that flag is NULL or not before checking its actual value. But the only thing it actually checks is "should_classify_for_malware_.get() && should_classify_for_malware_" The possible states are 1. false && n/a = false (OnMalwarePreClassificationDone not called yet) 2. true && false = false (OnMalwarePreClassificationDone called and shouldn't classify) 3. true && true = true (OnMalwarePreClassificationDone called and should classify) These states could all be represented by a single bool. noé* 2014/03/20 17:01:45 You are absolutely right. So sorry about the unne Show quoted text On 2014/03/18 02:19:06, mattm wrote: > On 2014/03/14 22:21:32, noé wrote: > > On 2014/02/22 02:49:03, mattm wrote: > > > Seems like a plain bool initialized to false here would achieve the same > > effect. > > > > MaybeStartMalwareFeatureExtraction() needs to know whether or not > > OnMalwarePreClassificationDone() was called already. It does so by checking > > whether that flag is NULL or not before checking its actual value. > > But the only thing it actually checks is "should_classify_for_malware_.get() && > *should_classify_for_malware_" > > The possible states are > 1. false && n/a = false (OnMalwarePreClassificationDone not called yet) > 2. true && false = false (OnMalwarePreClassificationDone called and shouldn't > classify) > 3. true && true = true (OnMalwarePreClassificationDone called and should > classify) > > These states could all be represented by a single bool. You are absolutely right. So sorry about the unnecessary back and forth.
	378 onload_complete_ = false;

	379

	380 // Check whether we can cassify the current URL for phishing or malware.

333 classification_request_ = new ShouldClassifyUrlRequest(	381 classification_request_ = new ShouldClassifyUrlRequest(

334 params, web_contents(), csd_service_, database_manager_.get(), this);	382 params,

	383 base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,

	384 weak_factory_.GetWeakPtr()),

	385 base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,

	386 weak_factory_.GetWeakPtr()),

	387 web_contents(), csd_service_, database_manager_.get(), this);

335 classification_request_->Start();	388 classification_request_->Start();

336 }	389 }

337	390

338 void ClientSideDetectionHost::OnSafeBrowsingHit(	391 void ClientSideDetectionHost::OnSafeBrowsingHit(

339 const SafeBrowsingUIManager::UnsafeResource& resource) {	392 const SafeBrowsingUIManager::UnsafeResource& resource) {

340 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())	393 if (!web_contents() \|\| !web_contents()->GetController().GetActiveEntry())

341 return;	394 return;

342	395

343 // Check that the hit is either malware or phishing.	396 // Check that the hit is either malware or phishing.

344 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&	397 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {	460 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {

408 DCHECK(tab);	461 DCHECK(tab);

409 // Tell any pending classification request that it is being canceled.	462 // Tell any pending classification request that it is being canceled.

410 if (classification_request_.get()) {	463 if (classification_request_.get()) {

411 classification_request_->Cancel();	464 classification_request_->Cancel();

412 }	465 }

413 // Cancel all pending feature extractions.	466 // Cancel all pending feature extractions.

414 feature_extractor_.reset();	467 feature_extractor_.reset();

415 }	468 }

416	469

	470 void ClientSideDetectionHost::OnPhishingPreClassificationDone(

	471 bool should_classify) {

	472 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	473 if (browse_info_.get() && should_classify) {

	474 VLOG(1) << "Instruct renderer to start phishing detection for URL: "

	475 << browse_info_->url;

	476 content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();

	477 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(

	478 rvh->GetRoutingID(), browse_info_->url));

	479 }

	480 }

	481

	482 void ClientSideDetectionHost::OnMalwarePreClassificationDone(

	483 bool should_classify) {

	484 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	485 // If classification checks failed we should stop extracting malware features.

	486 should_extract_malware_features_ = should_classify;

	487 should_classify_for_malware_.reset(new bool(should_classify));

	488 MaybeStartMalwareFeatureExtraction();

	489 }

	490

	491 void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(

	492 int32 page_id) {

	493 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	494 DCHECK(browse_info_.get());

	495 if (browse_info_->page_id != page_id) {

	496 // Something weird is happening here. The BrowseInfo page ID

	497 // should always be the same as the most recent load.

	498 UMA_HISTOGRAM_COUNTS("SBClientMalware.UnexpectedPageId", 1);

	499 return;

	500 }

	501 onload_complete_ = true;

	502 MaybeStartMalwareFeatureExtraction();

	503 }

	504

	505 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {

	506 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

	507 if (should_classify_for_malware_.get() &&

	508 *should_classify_for_malware_ &&

	509 onload_complete_) {

	510 scoped_ptr<ClientMalwareRequest> malware_verdict(

	511 new ClientMalwareRequest);

	512 // Start browser-side malware feature extraction. Once we're done it will

	513 // send the malware client verdict request.

	514 malware_verdict->set_url(browse_info_->url.spec());

	515 const GURL& referrer = browse_info_->referrer;

	516 if (referrer.SchemeIs("http")) { // Only send http urls.

	517 malware_verdict->set_referrer_url(referrer.spec());

	518 }

	519 // This function doesn't expect browse_info_ to stay around after this

	520 // function returns.

	521 feature_extractor_->ExtractMalwareFeatures(

	522 browse_info_.get(),

	523 malware_verdict.release(),

	524 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

	525 weak_factory_.GetWeakPtr()));

	526 }

	527 }

	528

417 void ClientSideDetectionHost::OnPhishingDetectionDone(	529 void ClientSideDetectionHost::OnPhishingDetectionDone(

418 const std::string& verdict_str) {	530 const std::string& verdict_str) {

419 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	531 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

420 // There is something seriously wrong if there is no service class but	532 // There is something seriously wrong if there is no service class but

421 // this method is called. The renderer should not start phishing detection	533 // this method is called. The renderer should not start phishing detection

422 // if there isn't any service class in the browser.	534 // if there isn't any service class in the browser.

423 DCHECK(csd_service_);	535 DCHECK(csd_service_);

424 // There shouldn't be any pending requests because we revoke them everytime	536 // There shouldn't be any pending requests because we revoke them everytime

425 // we navigate away.	537 // we navigate away.

426 DCHECK(!weak_factory_.HasWeakPtrs());	538 DCHECK(!weak_factory_.HasWeakPtrs());

427 DCHECK(browse_info_.get());	539 DCHECK(browse_info_.get());

428	540

429 // We parse the protocol buffer here. If we're unable to parse it we won't	541 // We parse the protocol buffer here. If we're unable to parse it we won't

430 // send the verdict further.	542 // send the verdict further.

431 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);	543 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);

432 if (csd_service_ &&	544 if (csd_service_ &&

433 !weak_factory_.HasWeakPtrs() &&	545 !weak_factory_.HasWeakPtrs() &&

434 browse_info_.get() &&	546 browse_info_.get() &&

435 verdict->ParseFromString(verdict_str) &&	547 verdict->ParseFromString(verdict_str) &&

436 verdict->IsInitialized()) {	548 verdict->IsInitialized()) {

437 // We do the malware IP matching and request sending if the feature

438 // is enabled.

439 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {

440 scoped_ptr<ClientMalwareRequest> malware_verdict(

441 new ClientMalwareRequest);

442 // Start browser-side malware feature extraction. Once we're done it will

443 // send the malware client verdict request.

444 malware_verdict->set_url(verdict->url());

445 const GURL& referrer = browse_info_->referrer;

446 if (referrer.SchemeIs("http")) { // Only send http urls.

447 malware_verdict->set_referrer_url(referrer.spec());

448 }

449 // This function doesn't expect browse_info_ to stay around after this

450 // function returns.

451 feature_extractor_->ExtractMalwareFeatures(

452 browse_info_.get(),

453 malware_verdict.release(),

454 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

455 weak_factory_.GetWeakPtr()));

456 }

457

458 // We only send phishing verdict to the server if the verdict is phishing or	549 // We only send phishing verdict to the server if the verdict is phishing or

459 // if a SafeBrowsing interstitial was already shown for this site. E.g., a	550 // if a SafeBrowsing interstitial was already shown for this site. E.g., a

460 // malware or phishing interstitial was shown but the user clicked	551 // malware or phishing interstitial was shown but the user clicked

461 // through.	552 // through.

462 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {	553 if (verdict->is_phishing() \|\| DidShowSBInterstitial()) {

463 if (DidShowSBInterstitial()) {	554 if (DidShowSBInterstitial()) {

464 browse_info_->unsafe_resource.reset(unsafe_resource_.release());	555 browse_info_->unsafe_resource.reset(unsafe_resource_.release());

465 }	556 }

466 // Start browser-side feature extraction. Once we're done it will send	557 // Start browser-side feature extraction. Once we're done it will send

467 // the client verdict request.	558 // the client verdict request.

468 feature_extractor_->ExtractFeatures(	559 feature_extractor_->ExtractFeatures(

469 browse_info_.get(),	560 browse_info_.get(),

470 verdict.release(),	561 verdict.release(),

471 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,	562 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,

472 weak_factory_.GetWeakPtr()));	563 weak_factory_.GetWeakPtr()));

473 }	564 }

474 }	565 }

475 browse_info_.reset();

476 }	566 }

477	567

478 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,	568 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,

479 bool is_phishing) {	569 bool is_phishing) {

480 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	570 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

481 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url	571 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url

482 << " is_phishing:" << is_phishing;	572 << " is_phishing:" << is_phishing;

483 if (is_phishing) {	573 if (is_phishing) {

484 DCHECK(web_contents());	574 DCHECK(web_contents());

485 if (ui_manager_.get()) {	575 if (ui_manager_.get()) {

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
594 }	684 }

595	685

596 void ClientSideDetectionHost::Observe(	686 void ClientSideDetectionHost::Observe(

597 int type,	687 int type,

598 const content::NotificationSource& source,	688 const content::NotificationSource& source,

599 const content::NotificationDetails& details) {	689 const content::NotificationDetails& details) {

600 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));	690 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

601 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);	691 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);

602 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(	692 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(

603 details).ptr();	693 details).ptr();

604 if (req && browse_info_.get() && malware_report_enabled_ &&	694 if (req && browse_info_.get() &&

605 !MalwareKillSwitchIsOn()) {	695 should_extract_malware_features_ && req->url.is_valid()) {

606 if (req->url.is_valid()) {	696 UpdateIPUrlMap(req->socket_address.host() /* ip */,

607 UpdateIPUrlMap(req->socket_address.host() /* ip */,	697 req->url.spec() /* url */,

608 req->url.spec() /* url */,	698 req->method,

609 req->method,	699 req->referrer,

610 req->referrer,	700 req->resource_type);

611 req->resource_type);

612 }

613 }	701 }

614 }	702 }

615	703

616 bool ClientSideDetectionHost::DidShowSBInterstitial() const {	704 bool ClientSideDetectionHost::DidShowSBInterstitial() const {

617 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {	705 if (unsafe_unique_page_id_ <= 0 \|\| !web_contents()) {

618 return false;	706 return false;

619 }	707 }

620 const NavigationEntry* nav_entry =	708 const NavigationEntry* nav_entry =

621 web_contents()->GetController().GetActiveEntry();	709 web_contents()->GetController().GetActiveEntry();

622 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);	710 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);

(...skipping 10 matching lines...) Expand all Loading...
633 if (ui_manager_.get())	721 if (ui_manager_.get())

634 ui_manager_->RemoveObserver(this);	722 ui_manager_->RemoveObserver(this);

635	723

636 ui_manager_ = ui_manager;	724 ui_manager_ = ui_manager;

637 if (ui_manager)	725 if (ui_manager)

638 ui_manager_->AddObserver(this);	726 ui_manager_->AddObserver(this);

639	727

640 database_manager_ = database_manager;	728 database_manager_ = database_manager;

641 }	729 }

642	730

643 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {

644 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

645 return malware_killswitch_on_;

646 }

647

648 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {

649 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

650 malware_killswitch_on_ = killswitch_on;

651 }

652

653 } // namespace safe_browsing	731 } // namespace safe_browsing

OLD	NEW