Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Address Pawel's comments Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "googleurl/src/gurl.h"
22
23 namespace safe_browsing {
24 namespace features {
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount=";
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount=";
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount=";
28 const char kUrlHistoryVisitCount24hAgo[] = "UrlHistoryVisitCount24hAgo=";
29 const char kHttpHostVisitCount[] = "HttpHostVisitCount=";
30 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount=";
mattm 2011/06/08 23:18:18 Why do these ones have trailing '=' but the next t
noelutz 2011/06/09 19:21:39 Oups. I just realized I misunderstood this notati
31 const char kFirstHttpHostVisitMoreThan24hAgo[] =
32 "FirstHttpHostVisitMoreThan24hAgo";
33 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
34 "FirstHttpsHostVisitMoreThan24hAgo";
35 } // namespace features
36
37 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
38 : tab_(tab),
39 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
40 DCHECK(tab);
41 }
42
43 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
44 method_factory_.RevokeAll();
45 // Delete all the pending extractions (delete callback and request objects).
46 STLDeleteContainerPairPointers(pending_extractions_.begin(),
47 pending_extractions_.end());
48 // Also cancel all the pending history service queries.
49 HistoryService* history;
50 DCHECK(GetHistoryService(&history) || pending_queries_.size() == 0);
51 if (history) {
52 // Cancel all the pending history lookups and cleanup the memory.
53 for (PendingQueriesMap::iterator it = pending_queries_.begin();
54 it != pending_queries_.end(); ++it) {
55 history->CancelRequest(it->first);
56 }
57 }
58 // Once we cancelled all the pending queries to the history service we also
59 // need to cleanup the request and callback objects.
60 for (PendingQueriesMap::iterator it = pending_queries_.begin();
61 it != pending_queries_.end(); ++it) {
62 ExtractionData& extraction = it->second;
63 delete extraction.first; // delete request
64 delete extraction.second; // delete callback
65 }
66 }
67
68 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
69 DoneCallback* callback) {
70 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
71 DCHECK(request);
72 DCHECK(request->url().find("http:") == 0);
73 DCHECK(callback);
74 if (!callback) {
75 DLOG(ERROR) << "ExtractFeatures called without a callback object";
76 return;
77 }
78 pending_extractions_.insert(std::make_pair(request, callback));
79 MessageLoop::current()->PostTask(
80 FROM_HERE,
81 method_factory_.NewRunnableMethod(
82 &BrowserFeatureExtractor::StartExtractFeatures,
83 request, callback));
84 }
85
86 void BrowserFeatureExtractor::StartExtractFeatures(
87 ClientPhishingRequest* request,
88 DoneCallback* callback) {
89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
90 ExtractionData extraction = std::make_pair(request, callback);
91 DCHECK(1 == pending_extractions_.erase(extraction));
mattm 2011/06/08 23:18:18 DCHECK_EQ
noelutz 2011/06/09 19:21:39 Done.
92 HistoryService* history;
93 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
94 callback->Run(false, request);
95 return;
96 }
97 CancelableRequestProvider::Handle handle = history->QueryURL(
98 GURL(request->url()),
99 true /* wants_visits */,
100 &request_consumer_,
101 NewCallback(this,
102 &BrowserFeatureExtractor::QueryUrlHistoryDone));
103
104 StorePendingQuery(handle, request, callback);
105 }
106
107 void BrowserFeatureExtractor::QueryUrlHistoryDone(
108 CancelableRequestProvider::Handle handle,
109 bool success,
110 const history::URLRow* row,
111 history::VisitVector* visits) {
112 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
113 ClientPhishingRequest* request;
114 DoneCallback* callback;
115 if (!GetPendingQuery(handle, &request, &callback)) {
116 DLOG(FATAL) << "No pending history query found";
117 return;
118 }
119 DCHECK(request);
120 DCHECK(callback);
121 if (!success) {
122 // URL is not found in the history. In practice this should not
123 // happen (unless there is a real error) because we just visited
124 // that URL.
125 callback->Run(false, request);
126 return;
127 }
128 ClientPhishingRequest::Feature* feature = request->add_feature_map();
129 feature->set_name(features::kUrlHistoryVisitCount);
130 feature->set_value(static_cast<double>(row->visit_count()));
131 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
132
133 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
134 int num_visits_24h_ago = 0;
135 int num_visits_typed = 0;
136 int num_visits_link = 0;
137 for (history::VisitVector::const_iterator it = visits->begin();
138 it != visits->end(); ++it) {
139 if (!PageTransition::IsMainFrame(it->transition)) {
140 continue;
141 }
142 if (it->visit_time < threshold) {
143 ++num_visits_24h_ago;
144 }
145 PageTransition::Type transition = PageTransition::StripQualifier(
146 it->transition);
147 if (transition == PageTransition::TYPED) {
148 ++num_visits_typed;
149 } else if (transition == PageTransition::LINK) {
150 ++num_visits_link;
151 }
152 }
153 feature = request->add_feature_map();
154 feature->set_name(features::kUrlHistoryVisitCount24hAgo);
155 feature->set_value(static_cast<double>(num_visits_24h_ago));
156 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
157
158 feature = request->add_feature_map();
159 feature->set_name(features::kUrlHistoryTypedCount);
160 feature->set_value(static_cast<double>(num_visits_typed));
161 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
162
163 feature = request->add_feature_map();
164 feature->set_name(features::kUrlHistoryLinkCount);
165 feature->set_value(static_cast<double>(num_visits_link));
166 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
167
168 // Issue next history lookup for hist visits.
169 HistoryService* history;
170 if (!GetHistoryService(&history)) {
171 callback->Run(false, request);
172 return;
173 }
174 CancelableRequestProvider::Handle next_handle =
175 history->GetVisibleVisitCountToHost(
176 GURL(request->url()),
177 &request_consumer_,
178 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
179 StorePendingQuery(next_handle, request, callback);
180 }
181
182 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
183 CancelableRequestProvider::Handle handle,
184 bool success,
185 int num_visits,
186 base::Time first_visit) {
187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
188 ClientPhishingRequest* request;
189 DoneCallback* callback;
190 if (!GetPendingQuery(handle, &request, &callback)) {
191 DLOG(FATAL) << "No pending history query found";
192 return;
193 }
194 DCHECK(request);
195 DCHECK(callback);
196 if (!success) {
197 callback->Run(false, request);
198 return;
199 }
200 SetHostVisitsFeatures(num_visits, first_visit, true, request);
201
202 // Same lookup but for the HTTPS URL.
203 HistoryService* history;
204 if (!GetHistoryService(&history)) {
205 callback->Run(false, request);
206 return;
207 }
208 std::string https_url = request->url();
209 CancelableRequestProvider::Handle next_handle =
210 history->GetVisibleVisitCountToHost(
211 GURL(https_url.replace(0, 5, "https:")),
212 &request_consumer_,
213 NewCallback(this,
214 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
215 StorePendingQuery(next_handle, request, callback);
216 }
217
218 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
219 CancelableRequestProvider::Handle handle,
220 bool success,
221 int num_visits,
222 base::Time first_visit) {
223 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
224 ClientPhishingRequest* request;
225 DoneCallback* callback;
226 if (!GetPendingQuery(handle, &request, &callback)) {
227 DLOG(FATAL) << "No pending history query found";
228 return;
229 }
230 DCHECK(request);
231 DCHECK(callback);
232 if (!success) {
233 callback->Run(false, request);
234 return;
235 }
236 SetHostVisitsFeatures(num_visits, first_visit, false, request);
237 callback->Run(true, request); // We're done with all the history lookups.
238 }
239
240 void BrowserFeatureExtractor::SetHostVisitsFeatures(
241 int num_visits,
242 base::Time first_visit,
243 bool is_http_query,
244 ClientPhishingRequest* request) {
245 DCHECK(request);
246 ClientPhishingRequest::Feature* feature = request->add_feature_map();
247 feature->set_name(is_http_query ?
248 features::kHttpHostVisitCount :
249 features::kHttpsHostVisitCount);
250 feature->set_value(static_cast<double>(num_visits));
251 VLOG(2) << "Browser feature: " << feature->name() << " "
252 << feature->value();
253
254 feature = request->add_feature_map();
255 feature->set_name(is_http_query ?
256 features::kFirstHttpHostVisitMoreThan24hAgo :
257 features::kFirstHttpsHostVisitMoreThan24hAgo);
258 if (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) {
259 feature->set_value(1.0);
260 } else {
261 feature->set_value(0.0);
262 }
263 VLOG(2) << "Browser feature: " << feature->name() << " "
264 << feature->value();
265 }
266
267 void BrowserFeatureExtractor::StorePendingQuery(
268 CancelableRequestProvider::Handle handle,
269 ClientPhishingRequest* request,
270 DoneCallback* callback) {
271 DCHECK(0 == pending_queries_.count(handle));
272 pending_queries_[handle] = std::make_pair(request, callback);
273 }
274
275 bool BrowserFeatureExtractor::GetPendingQuery(
276 CancelableRequestProvider::Handle handle,
277 ClientPhishingRequest** request,
278 DoneCallback** callback) {
279 PendingQueriesMap::iterator it = pending_queries_.find(handle);
280 DCHECK(it != pending_queries_.end());
281 if (it != pending_queries_.end()) {
282 *request = it->second.first;
283 *callback = it->second.second;
284 pending_queries_.erase(it);
285 return true;
286 }
287 return false;
288 }
289 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
290 *history = NULL;
291 if (tab_ && tab_->profile()) {
292 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
293 if (*history) {
294 return true;
295 }
296 }
297 VLOG(2) << "Unable to query history. No history service available.";
298 return false;
299 }
300 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698