Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Second try. Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/notification_details.h"
mattm 2011/06/15 22:34:12 Could include of notification_details be omitted?
noelutz 2011/06/15 22:39:24 I actually moved the notification to the host.
18 #include "content/common/notification_service.h"
mattm 2011/06/15 22:34:12 this too?
noelutz 2011/06/15 22:39:24 same as above.
19 #include "content/common/notification_source.h"
20 #include "content/common/notification_type.h"
21 #include "content/common/page_transition_types.h"
22 #include "content/browser/browser_thread.h"
23 #include "content/browser/cancelable_request.h"
24 #include "content/browser/tab_contents/navigation_controller.h"
25 #include "content/browser/tab_contents/tab_contents.h"
26 #include "googleurl/src/gurl.h"
27
28 namespace safe_browsing {
29 namespace features {
30 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
31 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
32 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
33 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
34 "UrlHistoryVisitCountMoreThan24hAgo";
35 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
36 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
37 const char kFirstHttpHostVisitMoreThan24hAgo[] =
38 "FirstHttpHostVisitMoreThan24hAgo";
39 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
40 "FirstHttpsHostVisitMoreThan24hAgo";
41 } // namespace features
42
43 static void AddFeature(const std::string& feature_name,
44 double feature_value,
45 ClientPhishingRequest* request) {
46 DCHECK(request);
47 ClientPhishingRequest::Feature* feature =
48 request->add_non_model_feature_map();
49 feature->set_name(feature_name);
50 feature->set_value(feature_value);
51 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
52 }
53
54 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
55 : tab_(tab),
56 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
57 cancelled_(false) {
58 DCHECK(tab);
59 if (tab) {
60 registrar_.Add(this, NotificationType::TAB_CLOSED,
61 Source<NavigationController>(&tab->controller()));
62 }
63 }
64
65 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
66 Cancel();
67 }
68
69 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
70 DoneCallback* callback) {
71 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
72 DCHECK(request);
73 DCHECK_EQ(0U, request->url().find("http:"));
74 DCHECK(callback);
75 if (!callback) {
76 DLOG(ERROR) << "ExtractFeatures called without a callback object";
77 return;
78 }
79 pending_extractions_.insert(std::make_pair(request, callback));
80 MessageLoop::current()->PostTask(
81 FROM_HERE,
82 method_factory_.NewRunnableMethod(
83 &BrowserFeatureExtractor::StartExtractFeatures,
84 request, callback));
85 }
86
87 void BrowserFeatureExtractor::StartExtractFeatures(
88 ClientPhishingRequest* request,
89 DoneCallback* callback) {
90 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
91 ExtractionData extraction = std::make_pair(request, callback);
92 size_t removed = pending_extractions_.erase(extraction);
93 DCHECK_EQ(1U, removed);
94 HistoryService* history;
95 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
96 callback->Run(false, request);
97 delete callback;
98 return;
99 }
100 CancelableRequestProvider::Handle handle = history->QueryURL(
101 GURL(request->url()),
102 true /* wants_visits */,
103 &request_consumer_,
104 NewCallback(this,
105 &BrowserFeatureExtractor::QueryUrlHistoryDone));
106
107 StorePendingQuery(handle, request, callback);
108 }
109
110 void BrowserFeatureExtractor::QueryUrlHistoryDone(
111 CancelableRequestProvider::Handle handle,
112 bool success,
113 const history::URLRow* row,
114 history::VisitVector* visits) {
115 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
116 ClientPhishingRequest* request;
117 DoneCallback* callback;
118 if (!GetPendingQuery(handle, &request, &callback)) {
119 DLOG(FATAL) << "No pending history query found";
120 return;
121 }
122 DCHECK(request);
123 DCHECK(callback);
124 if (!success) {
125 // URL is not found in the history. In practice this should not
126 // happen (unless there is a real error) because we just visited
127 // that URL.
128 callback->Run(false, request);
129 delete callback;
130 return;
131 }
132 AddFeature(features::kUrlHistoryVisitCount,
133 static_cast<double>(row->visit_count()),
134 request);
135
136 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
137 int num_visits_24h_ago = 0;
138 int num_visits_typed = 0;
139 int num_visits_link = 0;
140 for (history::VisitVector::const_iterator it = visits->begin();
141 it != visits->end(); ++it) {
142 if (!PageTransition::IsMainFrame(it->transition)) {
143 continue;
144 }
145 if (it->visit_time < threshold) {
146 ++num_visits_24h_ago;
147 }
148 PageTransition::Type transition = PageTransition::StripQualifier(
149 it->transition);
150 if (transition == PageTransition::TYPED) {
151 ++num_visits_typed;
152 } else if (transition == PageTransition::LINK) {
153 ++num_visits_link;
154 }
155 }
156 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
157 static_cast<double>(num_visits_24h_ago),
158 request);
159 AddFeature(features::kUrlHistoryTypedCount,
160 static_cast<double>(num_visits_typed),
161 request);
162 AddFeature(features::kUrlHistoryLinkCount,
163 static_cast<double>(num_visits_link),
164 request);
165
166 // Issue next history lookup for host visits.
167 HistoryService* history;
168 if (!GetHistoryService(&history)) {
169 callback->Run(false, request);
170 delete callback;
171 return;
172 }
173 CancelableRequestProvider::Handle next_handle =
174 history->GetVisibleVisitCountToHost(
175 GURL(request->url()),
176 &request_consumer_,
177 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
178 StorePendingQuery(next_handle, request, callback);
179 }
180
181 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
182 CancelableRequestProvider::Handle handle,
183 bool success,
184 int num_visits,
185 base::Time first_visit) {
186 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
187 ClientPhishingRequest* request;
188 DoneCallback* callback;
189 if (!GetPendingQuery(handle, &request, &callback)) {
190 DLOG(FATAL) << "No pending history query found";
191 return;
192 }
193 DCHECK(request);
194 DCHECK(callback);
195 if (!success) {
196 callback->Run(false, request);
197 delete callback;
198 return;
199 }
200 SetHostVisitsFeatures(num_visits, first_visit, true, request);
201
202 // Same lookup but for the HTTPS URL.
203 HistoryService* history;
204 if (!GetHistoryService(&history)) {
205 callback->Run(false, request);
206 delete callback;
207 return;
208 }
209 std::string https_url = request->url();
210 CancelableRequestProvider::Handle next_handle =
211 history->GetVisibleVisitCountToHost(
212 GURL(https_url.replace(0, 5, "https:")),
213 &request_consumer_,
214 NewCallback(this,
215 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
216 StorePendingQuery(next_handle, request, callback);
217 }
218
219 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
220 CancelableRequestProvider::Handle handle,
221 bool success,
222 int num_visits,
223 base::Time first_visit) {
224 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
225 ClientPhishingRequest* request;
226 DoneCallback* callback;
227 if (!GetPendingQuery(handle, &request, &callback)) {
228 DLOG(FATAL) << "No pending history query found";
229 return;
230 }
231 DCHECK(request);
232 DCHECK(callback);
233 if (!success) {
234 callback->Run(false, request);
mattm 2011/06/15 22:34:12 this one doesn't delete callback?
noelutz 2011/06/15 22:39:24 Nice catch.
235 return;
236 }
237 SetHostVisitsFeatures(num_visits, first_visit, false, request);
238 callback->Run(true, request); // We're done with all the history lookups.
239 delete callback;
240 }
241
242 void BrowserFeatureExtractor::SetHostVisitsFeatures(
243 int num_visits,
244 base::Time first_visit,
245 bool is_http_query,
246 ClientPhishingRequest* request) {
247 DCHECK(request);
248 AddFeature(is_http_query ?
249 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
250 static_cast<double>(num_visits),
251 request);
252 AddFeature(
253 is_http_query ?
254 features::kFirstHttpHostVisitMoreThan24hAgo :
255 features::kFirstHttpsHostVisitMoreThan24hAgo,
256 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
257 1.0 : 0.0,
258 request);
259 }
260
261 void BrowserFeatureExtractor::StorePendingQuery(
262 CancelableRequestProvider::Handle handle,
263 ClientPhishingRequest* request,
264 DoneCallback* callback) {
265 DCHECK_EQ(0U, pending_queries_.count(handle));
266 pending_queries_[handle] = std::make_pair(request, callback);
267 }
268
269 bool BrowserFeatureExtractor::GetPendingQuery(
270 CancelableRequestProvider::Handle handle,
271 ClientPhishingRequest** request,
272 DoneCallback** callback) {
273 PendingQueriesMap::iterator it = pending_queries_.find(handle);
274 DCHECK(it != pending_queries_.end());
275 if (it != pending_queries_.end()) {
276 *request = it->second.first;
277 *callback = it->second.second;
278 pending_queries_.erase(it);
279 return true;
280 }
281 return false;
282 }
283
284 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
285 *history = NULL;
286 if (tab_ && tab_->profile()) {
287 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
288 if (*history) {
289 return true;
290 }
291 }
292 VLOG(2) << "Unable to query history. No history service available.";
293 return false;
294 }
295
296 void BrowserFeatureExtractor::Observe(NotificationType type,
297 const NotificationSource& source,
298 const NotificationDetails& details) {
299 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
300 Cancel();
301 }
302
303 void BrowserFeatureExtractor::Cancel() {
304 if (cancelled_) {
305 return;
306 }
307 cancelled_ = true;
308 method_factory_.RevokeAll();
309 // Delete all the pending extractions (delete callback and request objects).
310 STLDeleteContainerPairPointers(pending_extractions_.begin(),
311 pending_extractions_.end());
312 // Also cancel all the pending history service queries.
313 HistoryService* history;
314 bool success = GetHistoryService(&history);
315 DCHECK(success || pending_queries_.size() == 0);
316 // Cancel all the pending history lookups and cleanup the memory.
317 for (PendingQueriesMap::iterator it = pending_queries_.begin();
318 it != pending_queries_.end(); ++it) {
319 if (history) {
320 history->CancelRequest(it->first);
321 }
322 ExtractionData& extraction = it->second;
323 delete extraction.first; // delete request
324 delete extraction.second; // delete callback
325 }
326 pending_queries_.clear();
327 }
328 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698