Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(27)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Address Matt's and Pawel's comments Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "googleurl/src/gurl.h"
22
23 namespace safe_browsing {
24 namespace features {
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
gcasto (DO NOT USE) 2011/06/09 21:39:55 It might be nice to be able to easily distinguish
noelutz 2011/06/09 22:52:09 Done.
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
28 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
29 "UrlHistoryVisitCountMoreThan24hAgo";
30 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
31 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
32 const char kFirstHttpHostVisitMoreThan24hAgo[] =
33 "FirstHttpHostVisitMoreThan24hAgo";
34 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
35 "FirstHttpsHostVisitMoreThan24hAgo";
36 } // namespace features
37
38 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
39 : tab_(tab),
40 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
41 DCHECK(tab);
42 }
43
44 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
45 method_factory_.RevokeAll();
46 // Delete all the pending extractions (delete callback and request objects).
47 STLDeleteContainerPairPointers(pending_extractions_.begin(),
48 pending_extractions_.end());
49 // Also cancel all the pending history service queries.
50 HistoryService* history;
51 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0);
52 if (history) {
53 // Cancel all the pending history lookups and cleanup the memory.
54 for (PendingQueriesMap::iterator it = pending_queries_.begin();
55 it != pending_queries_.end(); ++it) {
56 history->CancelRequest(it->first);
57 }
58 }
59 // Once we cancelled all the pending queries to the history service we also
60 // need to cleanup the request and callback objects.
61 for (PendingQueriesMap::iterator it = pending_queries_.begin();
gcasto (DO NOT USE) 2011/06/09 21:39:55 I think that it would be cleaner for this to be me
noelutz 2011/06/09 22:52:09 Done.
62 it != pending_queries_.end(); ++it) {
63 ExtractionData& extraction = it->second;
64 delete extraction.first; // delete request
65 delete extraction.second; // delete callback
66 }
67 }
68
69 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
70 DoneCallback* callback) {
71 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
72 DCHECK(request);
73 DCHECK(request->url().find("http:") == 0);
74 DCHECK(callback);
75 if (!callback) {
76 DLOG(ERROR) << "ExtractFeatures called without a callback object";
77 return;
78 }
79 pending_extractions_.insert(std::make_pair(request, callback));
80 MessageLoop::current()->PostTask(
81 FROM_HERE,
82 method_factory_.NewRunnableMethod(
83 &BrowserFeatureExtractor::StartExtractFeatures,
84 request, callback));
85 }
86
87 void BrowserFeatureExtractor::StartExtractFeatures(
88 ClientPhishingRequest* request,
89 DoneCallback* callback) {
90 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
91 ExtractionData extraction = std::make_pair(request, callback);
92 DCHECK_EQ(1U, pending_extractions_.erase(extraction));
93 HistoryService* history;
94 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
95 callback->Run(false, request);
96 return;
97 }
98 CancelableRequestProvider::Handle handle = history->QueryURL(
99 GURL(request->url()),
100 true /* wants_visits */,
101 &request_consumer_,
102 NewCallback(this,
103 &BrowserFeatureExtractor::QueryUrlHistoryDone));
104
105 StorePendingQuery(handle, request, callback);
106 }
107
108 void BrowserFeatureExtractor::QueryUrlHistoryDone(
109 CancelableRequestProvider::Handle handle,
110 bool success,
111 const history::URLRow* row,
112 history::VisitVector* visits) {
113 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
114 ClientPhishingRequest* request;
115 DoneCallback* callback;
116 if (!GetPendingQuery(handle, &request, &callback)) {
117 DLOG(FATAL) << "No pending history query found";
118 return;
119 }
120 DCHECK(request);
121 DCHECK(callback);
122 if (!success) {
123 // URL is not found in the history. In practice this should not
124 // happen (unless there is a real error) because we just visited
125 // that URL.
gcasto (DO NOT USE) 2011/06/09 21:39:55 Just to make sure, is the store that the browser d
noelutz 2011/06/09 22:52:09 It looks like adding to the history service is asy
126 callback->Run(false, request);
127 return;
128 }
129 ClientPhishingRequest::Feature* feature = request->add_feature_map();
gcasto (DO NOT USE) 2011/06/09 21:39:55 Might be worth making these 4 lines a function, Ad
noelutz 2011/06/09 22:52:09 Done.
130 feature->set_name(features::kUrlHistoryVisitCount);
131 feature->set_value(static_cast<double>(row->visit_count()));
132 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
133
134 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
135 int num_visits_24h_ago = 0;
136 int num_visits_typed = 0;
137 int num_visits_link = 0;
138 for (history::VisitVector::const_iterator it = visits->begin();
139 it != visits->end(); ++it) {
140 if (!PageTransition::IsMainFrame(it->transition)) {
141 continue;
142 }
143 if (it->visit_time < threshold) {
144 ++num_visits_24h_ago;
145 }
146 PageTransition::Type transition = PageTransition::StripQualifier(
147 it->transition);
148 if (transition == PageTransition::TYPED) {
149 ++num_visits_typed;
150 } else if (transition == PageTransition::LINK) {
151 ++num_visits_link;
152 }
153 }
154 feature = request->add_feature_map();
155 feature->set_name(features::kUrlHistoryVisitCountMoreThan24hAgo);
156 feature->set_value(static_cast<double>(num_visits_24h_ago));
157 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
158
159 feature = request->add_feature_map();
160 feature->set_name(features::kUrlHistoryTypedCount);
161 feature->set_value(static_cast<double>(num_visits_typed));
162 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
163
164 feature = request->add_feature_map();
165 feature->set_name(features::kUrlHistoryLinkCount);
166 feature->set_value(static_cast<double>(num_visits_link));
167 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
168
169 // Issue next history lookup for hist visits.
gcasto (DO NOT USE) 2011/06/09 21:39:55 hist -> host
noelutz 2011/06/09 22:52:09 Done.
170 HistoryService* history;
171 if (!GetHistoryService(&history)) {
172 callback->Run(false, request);
173 return;
174 }
175 CancelableRequestProvider::Handle next_handle =
176 history->GetVisibleVisitCountToHost(
177 GURL(request->url()),
178 &request_consumer_,
179 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
180 StorePendingQuery(next_handle, request, callback);
181 }
182
183 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
184 CancelableRequestProvider::Handle handle,
185 bool success,
186 int num_visits,
187 base::Time first_visit) {
188 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
189 ClientPhishingRequest* request;
190 DoneCallback* callback;
191 if (!GetPendingQuery(handle, &request, &callback)) {
192 DLOG(FATAL) << "No pending history query found";
193 return;
194 }
195 DCHECK(request);
196 DCHECK(callback);
197 if (!success) {
198 callback->Run(false, request);
199 return;
200 }
201 SetHostVisitsFeatures(num_visits, first_visit, true, request);
202
203 // Same lookup but for the HTTPS URL.
204 HistoryService* history;
205 if (!GetHistoryService(&history)) {
206 callback->Run(false, request);
207 return;
208 }
209 std::string https_url = request->url();
210 CancelableRequestProvider::Handle next_handle =
211 history->GetVisibleVisitCountToHost(
212 GURL(https_url.replace(0, 5, "https:")),
213 &request_consumer_,
214 NewCallback(this,
215 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
216 StorePendingQuery(next_handle, request, callback);
217 }
218
219 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
220 CancelableRequestProvider::Handle handle,
221 bool success,
222 int num_visits,
223 base::Time first_visit) {
224 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
225 ClientPhishingRequest* request;
226 DoneCallback* callback;
227 if (!GetPendingQuery(handle, &request, &callback)) {
228 DLOG(FATAL) << "No pending history query found";
229 return;
230 }
231 DCHECK(request);
232 DCHECK(callback);
233 if (!success) {
234 callback->Run(false, request);
235 return;
236 }
237 SetHostVisitsFeatures(num_visits, first_visit, false, request);
238 callback->Run(true, request); // We're done with all the history lookups.
239 }
240
241 void BrowserFeatureExtractor::SetHostVisitsFeatures(
242 int num_visits,
243 base::Time first_visit,
244 bool is_http_query,
245 ClientPhishingRequest* request) {
246 DCHECK(request);
247 ClientPhishingRequest::Feature* feature = request->add_feature_map();
248 feature->set_name(is_http_query ?
249 features::kHttpHostVisitCount :
250 features::kHttpsHostVisitCount);
251 feature->set_value(static_cast<double>(num_visits));
252 VLOG(2) << "Browser feature: " << feature->name() << " "
253 << feature->value();
254
255 feature = request->add_feature_map();
256 feature->set_name(is_http_query ?
257 features::kFirstHttpHostVisitMoreThan24hAgo :
258 features::kFirstHttpsHostVisitMoreThan24hAgo);
259 if (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) {
260 feature->set_value(1.0);
261 } else {
262 feature->set_value(0.0);
263 }
264 VLOG(2) << "Browser feature: " << feature->name() << " "
265 << feature->value();
266 }
267
268 void BrowserFeatureExtractor::StorePendingQuery(
269 CancelableRequestProvider::Handle handle,
270 ClientPhishingRequest* request,
271 DoneCallback* callback) {
272 DCHECK(0 == pending_queries_.count(handle));
273 pending_queries_[handle] = std::make_pair(request, callback);
274 }
275
276 bool BrowserFeatureExtractor::GetPendingQuery(
277 CancelableRequestProvider::Handle handle,
278 ClientPhishingRequest** request,
279 DoneCallback** callback) {
280 PendingQueriesMap::iterator it = pending_queries_.find(handle);
281 DCHECK(it != pending_queries_.end());
282 if (it != pending_queries_.end()) {
283 *request = it->second.first;
284 *callback = it->second.second;
285 pending_queries_.erase(it);
286 return true;
287 }
288 return false;
289 }
290 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
291 *history = NULL;
292 if (tab_ && tab_->profile()) {
293 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
294 if (*history) {
295 return true;
296 }
297 }
298 VLOG(2) << "Unable to query history. No history service available.";
299 return false;
300 }
301 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698