Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(600)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7119003: Create a browser feature extractor that runs after the renderer has (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add new files Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <utility>
9
10 #include "base/stl_util-inl.h"
11 #include "base/task.h"
12 #include "base/time.h"
13 #include "chrome/common/safe_browsing/csd.pb.h"
14 #include "chrome/browser/history/history.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "content/common/page_transition_types.h"
18 #include "content/browser/browser_thread.h"
19 #include "content/browser/cancelable_request.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "googleurl/src/gurl.h"
22
23 namespace safe_browsing {
24 namespace features {
25 const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
26 const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
27 const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
28 const char kUrlHistoryVisitCountMoreThan24hAgo[] =
29 "UrlHistoryVisitCountMoreThan24hAgo";
30 const char kHttpHostVisitCount[] = "HttpHostVisitCount";
31 const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
32 const char kFirstHttpHostVisitMoreThan24hAgo[] =
33 "FirstHttpHostVisitMoreThan24hAgo";
34 const char kFirstHttpsHostVisitMoreThan24hAgo[] =
35 "FirstHttpsHostVisitMoreThan24hAgo";
36 } // namespace features
37
38 static void AddFeature(const std::string& feature_name,
39 double feature_value,
40 ClientPhishingRequest* request) {
41 DCHECK(request);
42 ClientPhishingRequest::Feature* feature =
43 request->add_non_model_feature_map();
44 feature->set_name(feature_name);
45 feature->set_value(feature_value);
46 VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value();
47 }
48
49 BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab)
50 : tab_(tab),
51 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
52 DCHECK(tab);
53 }
54
55 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
56 method_factory_.RevokeAll();
57 // Delete all the pending extractions (delete callback and request objects).
58 STLDeleteContainerPairPointers(pending_extractions_.begin(),
59 pending_extractions_.end());
60 // Also cancel all the pending history service queries.
61 HistoryService* history;
62 bool success = GetHistoryService(&history);
63 DCHECK(success || pending_queries_.size() == 0);
64 // Cancel all the pending history lookups and cleanup the memory.
65 for (PendingQueriesMap::iterator it = pending_queries_.begin();
66 it != pending_queries_.end(); ++it) {
67 if (history) {
68 history->CancelRequest(it->first);
69 }
70 ExtractionData& extraction = it->second;
71 delete extraction.first; // delete request
72 delete extraction.second; // delete callback
73 }
74 pending_queries_.clear();
75 }
76
77 void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request,
78 DoneCallback* callback) {
79 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
80 DCHECK(request);
81 DCHECK_EQ(0U, request->url().find("http:"));
82 DCHECK(callback);
83 if (!callback) {
84 DLOG(ERROR) << "ExtractFeatures called without a callback object";
85 return;
86 }
87 pending_extractions_.insert(std::make_pair(request, callback));
88 MessageLoop::current()->PostTask(
89 FROM_HERE,
90 method_factory_.NewRunnableMethod(
91 &BrowserFeatureExtractor::StartExtractFeatures,
92 request, callback));
93 }
94
95 void BrowserFeatureExtractor::StartExtractFeatures(
96 ClientPhishingRequest* request,
97 DoneCallback* callback) {
98 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
99 ExtractionData extraction = std::make_pair(request, callback);
100 size_t removed = pending_extractions_.erase(extraction);
101 DCHECK_EQ(1U, removed);
102 HistoryService* history;
103 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
104 callback->Run(false, request);
105 delete callback;
106 return;
107 }
108 CancelableRequestProvider::Handle handle = history->QueryURL(
109 GURL(request->url()),
110 true /* wants_visits */,
111 &request_consumer_,
112 NewCallback(this,
113 &BrowserFeatureExtractor::QueryUrlHistoryDone));
114
115 StorePendingQuery(handle, request, callback);
116 }
117
118 void BrowserFeatureExtractor::QueryUrlHistoryDone(
119 CancelableRequestProvider::Handle handle,
120 bool success,
121 const history::URLRow* row,
122 history::VisitVector* visits) {
123 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
124 ClientPhishingRequest* request;
125 DoneCallback* callback;
126 if (!GetPendingQuery(handle, &request, &callback)) {
127 DLOG(FATAL) << "No pending history query found";
128 return;
129 }
130 DCHECK(request);
131 DCHECK(callback);
132 if (!success) {
133 // URL is not found in the history. In practice this should not
134 // happen (unless there is a real error) because we just visited
135 // that URL.
136 callback->Run(false, request);
137 delete callback;
138 return;
139 }
140 AddFeature(features::kUrlHistoryVisitCount,
141 static_cast<double>(row->visit_count()),
142 request);
143
144 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
145 int num_visits_24h_ago = 0;
146 int num_visits_typed = 0;
147 int num_visits_link = 0;
148 for (history::VisitVector::const_iterator it = visits->begin();
149 it != visits->end(); ++it) {
150 if (!PageTransition::IsMainFrame(it->transition)) {
151 continue;
152 }
153 if (it->visit_time < threshold) {
154 ++num_visits_24h_ago;
155 }
156 PageTransition::Type transition = PageTransition::StripQualifier(
157 it->transition);
158 if (transition == PageTransition::TYPED) {
159 ++num_visits_typed;
160 } else if (transition == PageTransition::LINK) {
161 ++num_visits_link;
162 }
163 }
164 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
165 static_cast<double>(num_visits_24h_ago),
166 request);
167 AddFeature(features::kUrlHistoryTypedCount,
168 static_cast<double>(num_visits_typed),
169 request);
170 AddFeature(features::kUrlHistoryLinkCount,
171 static_cast<double>(num_visits_link),
172 request);
173
174 // Issue next history lookup for host visits.
175 HistoryService* history;
176 if (!GetHistoryService(&history)) {
177 callback->Run(false, request);
178 delete callback;
179 return;
180 }
181 CancelableRequestProvider::Handle next_handle =
182 history->GetVisibleVisitCountToHost(
183 GURL(request->url()),
184 &request_consumer_,
185 NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone));
186 StorePendingQuery(next_handle, request, callback);
187 }
188
189 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
190 CancelableRequestProvider::Handle handle,
191 bool success,
192 int num_visits,
193 base::Time first_visit) {
194 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
195 ClientPhishingRequest* request;
196 DoneCallback* callback;
197 if (!GetPendingQuery(handle, &request, &callback)) {
198 DLOG(FATAL) << "No pending history query found";
199 return;
200 }
201 DCHECK(request);
202 DCHECK(callback);
203 if (!success) {
204 callback->Run(false, request);
205 delete callback;
206 return;
207 }
208 SetHostVisitsFeatures(num_visits, first_visit, true, request);
209
210 // Same lookup but for the HTTPS URL.
211 HistoryService* history;
212 if (!GetHistoryService(&history)) {
213 callback->Run(false, request);
214 delete callback;
215 return;
216 }
217 std::string https_url = request->url();
218 CancelableRequestProvider::Handle next_handle =
219 history->GetVisibleVisitCountToHost(
220 GURL(https_url.replace(0, 5, "https:")),
221 &request_consumer_,
222 NewCallback(this,
223 &BrowserFeatureExtractor::QueryHttpsHostVisitsDone));
224 StorePendingQuery(next_handle, request, callback);
225 }
226
227 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
228 CancelableRequestProvider::Handle handle,
229 bool success,
230 int num_visits,
231 base::Time first_visit) {
232 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
233 ClientPhishingRequest* request;
234 DoneCallback* callback;
235 if (!GetPendingQuery(handle, &request, &callback)) {
236 DLOG(FATAL) << "No pending history query found";
237 return;
238 }
239 DCHECK(request);
240 DCHECK(callback);
241 if (!success) {
242 callback->Run(false, request);
243 delete callback;
244 return;
245 }
246 SetHostVisitsFeatures(num_visits, first_visit, false, request);
247 callback->Run(true, request); // We're done with all the history lookups.
248 delete callback;
249 }
250
251 void BrowserFeatureExtractor::SetHostVisitsFeatures(
252 int num_visits,
253 base::Time first_visit,
254 bool is_http_query,
255 ClientPhishingRequest* request) {
256 DCHECK(request);
257 AddFeature(is_http_query ?
258 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
259 static_cast<double>(num_visits),
260 request);
261 AddFeature(
262 is_http_query ?
263 features::kFirstHttpHostVisitMoreThan24hAgo :
264 features::kFirstHttpsHostVisitMoreThan24hAgo,
265 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
266 1.0 : 0.0,
267 request);
268 }
269
270 void BrowserFeatureExtractor::StorePendingQuery(
271 CancelableRequestProvider::Handle handle,
272 ClientPhishingRequest* request,
273 DoneCallback* callback) {
274 DCHECK_EQ(0U, pending_queries_.count(handle));
275 pending_queries_[handle] = std::make_pair(request, callback);
276 }
277
278 bool BrowserFeatureExtractor::GetPendingQuery(
279 CancelableRequestProvider::Handle handle,
280 ClientPhishingRequest** request,
281 DoneCallback** callback) {
282 PendingQueriesMap::iterator it = pending_queries_.find(handle);
283 DCHECK(it != pending_queries_.end());
284 if (it != pending_queries_.end()) {
285 *request = it->second.first;
286 *callback = it->second.second;
287 pending_queries_.erase(it);
288 return true;
289 }
290 return false;
291 }
292
293 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
294 *history = NULL;
295 if (tab_ && tab_->profile()) {
296 *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS);
297 if (*history) {
298 return true;
299 }
300 }
301 VLOG(2) << "Unable to query history. No history service available.";
302 return false;
303 }
304 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698